-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathfuzzyMatcher.ts
More file actions
295 lines (257 loc) · 8.49 KB
/
fuzzyMatcher.ts
File metadata and controls
295 lines (257 loc) · 8.49 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
// src/utils/fuzzyMatcher.ts
// 模糊匹配算法 - 用于匹配 SDK 库名称
// 采用 7 层级匹配策略,处理各种复杂命名情况
import { Library } from '../types';
/**
* 模糊匹配库名(增强版,支持 7 层级匹配策略)
* @param libraryName - 要匹配的库名(如 "libacra-5.9.7.so")
* @param rulesMap - 规则库映射 { "libacra.so": {...}, ... }
* @returns 匹配到的规则对象,如果没有匹配则返回 null
*/
export function fuzzyMatchLibrary(
libraryName: string,
rulesMap: Record<string, Omit<Library, 'count' | 'locations' | 'architectures' | 'hasMetadata' | 'expanded'>>
): Omit<Library, 'count' | 'locations' | 'architectures' | 'hasMetadata' | 'expanded'> | null {
// 优先级 1: 精确匹配
if (rulesMap[libraryName]) {
return rulesMap[libraryName];
}
// 优先级 2-5: 规范化匹配(多种策略)
const normalizedNames = generateNormalizedNames(libraryName);
for (const normalized of normalizedNames) {
if (rulesMap[normalized]) {
return rulesMap[normalized];
}
}
// 优先级 6: 子串模糊匹配
const substringMatch = findSubstringMatch(libraryName, rulesMap);
if (substringMatch) {
return substringMatch;
}
// 优先级 7: 判断是否为 Hash 名称
if (isHashName(libraryName)) {
console.warn(`检测到混淆名称(Hash): ${libraryName}`);
return null;
}
// 未匹配到
return null;
}
/**
* 生成所有可能的规范化名称(优先级 2-5)
* @param libraryName - 原始库名
* @returns 规范化后的名称列表
*/
function generateNormalizedNames(libraryName: string): string[] {
const variants = new Set<string>();
// 原始名称
let current = libraryName;
// 优先级 2: 基础规范化(去 lib、去 .so、转小写)
current = removeExtension(current); // 去除 .so
const withoutLib = removeLibPrefix(current); // 去除 lib 前缀
const lowercase = current.toLowerCase(); // 转小写
variants.add(withoutLib);
variants.add(lowercase);
variants.add(removeLibPrefix(lowercase));
// 优先级 3: 去除版本号
const withoutVersion = removeVersionSuffix(current);
variants.add(withoutVersion);
variants.add(withoutVersion + '.so'); // 还原 .so 扩展名
variants.add('lib' + withoutVersion + '.so'); // 还原 lib 前缀
// 优先级 4: 去除构建后缀
const withoutSuffix = removeCommonSuffix(current);
variants.add(withoutSuffix);
variants.add(withoutSuffix + '.so');
variants.add('lib' + withoutSuffix + '.so');
// 优先级 5: 去除包名前缀
const withoutPackage = removePackagePrefix(current);
if (withoutPackage !== current) {
variants.add(withoutPackage);
variants.add(withoutPackage + '.so');
variants.add('lib' + withoutPackage + '.so');
}
// 组合策略:去除 lib + 版本号 + 后缀 + 包名
let fullyNormalized = removePackagePrefix(
removeCommonSuffix(
removeVersionSuffix(
removeLibPrefix(current)
)
)
);
variants.add(fullyNormalized);
variants.add(fullyNormalized + '.so');
variants.add('lib' + fullyNormalized + '.so');
// 去除所有空字符串和重复项
return Array.from(variants).filter(name => name && name.length > 0);
}
/**
* 去除文件扩展名(.so)
*/
function removeExtension(name: string): string {
return name.replace(/\.so$/i, '');
}
/**
* 去除 "lib" 前缀
*/
function removeLibPrefix(name: string): string {
if (name.toLowerCase().startsWith('lib')) {
return name.substring(3);
}
return name;
}
/**
* 去除版本号后缀(增强版)
* 支持格式:
* - libacra-5.9.7.so -> libacra.so
* - libacra_v2.0.so -> libacra.so
* - libacra.5.9.so -> libacra.so
* - libacra-release-1.2.3.so -> libacra.so
*/
function removeVersionSuffix(name: string): string {
// 正则:匹配各种版本号模式
return name
.replace(/[-_.]?v?\d+(\.\d+)+/g, '') // 匹配 -5.9.7 或 _v2.0 或 .5.9
.replace(/-release-\d+/g, '') // 匹配 -release-1.2.3
.replace(/_\d+$/g, ''); // 匹配末尾的 _123
}
/**
* 去除常见构建后缀
* - debug, release, arm64, armeabi, x86, etc.
*/
function removeCommonSuffix(name: string): string {
const suffixes = [
'-debug', '-release', '-prod', '-dev',
'_debug', '_release', '_prod', '_dev',
'_arm64', '_armeabi', '_x86', '_x86_64',
'-arm64-v8a', '-armeabi-v7a', '-x86', '-x86_64',
'_alijtca_plus', // 特殊后缀(阿里巴巴)
];
let result = name;
for (const suffix of suffixes) {
const regex = new RegExp(suffix + '$', 'i');
result = result.replace(regex, '');
}
return result;
}
/**
* 去除包名前缀(⭐ 新增)
* 检测并移除 com_example_app_ 或 cn_company_ 等包名模式
*
* 例如:
* - libcom_example_app_native.so -> native.so
* - libcn_company_sdk_core.so -> core.so
*/
function removePackagePrefix(name: string): string {
// 检测是否以常见包名前缀开头
const packagePrefixes = ['com_', 'cn_', 'org_', 'io_', 'net_', 'android_'];
for (const prefix of packagePrefixes) {
if (name.toLowerCase().startsWith(prefix)) {
// 按下划线分割
const parts = name.split('_');
// 如果至少有 3 段(如 com_example_app),则认为是包名
if (parts.length >= 3) {
// 保留最后一段作为库名
return parts[parts.length - 1];
}
}
}
return name;
}
/**
* 子串模糊匹配(⭐ 新增)
* 在规则库中查找包含关系
*
* 例如:
* - libalicomphonenumberauthsdk-release_alijtca_plus.so
* - 可能匹配到规则库中的 libalicomphonenumberauthsdk.so
*
* @param libraryName - 待匹配的库名
* @param rulesMap - 规则库映射
* @returns 匹配到的规则对象
*/
function findSubstringMatch(
libraryName: string,
rulesMap: Record<string, Omit<Library, 'count' | 'locations' | 'architectures' | 'hasMetadata' | 'expanded'>>
): Omit<Library, 'count' | 'locations' | 'architectures' | 'hasMetadata' | 'expanded'> | null {
const normalizedName = libraryName.toLowerCase();
// 遍历规则库的所有键
for (const ruleKey of Object.keys(rulesMap)) {
const normalizedRuleKey = ruleKey.toLowerCase();
// 检查规则库键名是否为待匹配库名的子串
if (normalizedName.includes(normalizedRuleKey.replace(/\.so$/i, ''))) {
return rulesMap[ruleKey];
}
// 检查待匹配库名是否为规则库键名的子串
if (normalizedRuleKey.includes(normalizedName.replace(/\.so$/i, ''))) {
return rulesMap[ruleKey];
}
}
return null;
}
/**
* 判断是否为 Hash 名称(⭐ 新增)
* 检测库名是否为纯十六进制 Hash(8-16 位)
*
* 例如:
* - lib39285EFA.so → true
* - libA3AEECD8.so → true
* - libacra.so → false
*
* @param libraryName - 库名
* @returns 是否为 Hash 名称
*/
function isHashName(libraryName: string): boolean {
// 去除 lib 前缀和 .so 扩展名
const coreName = libraryName
.replace(/^lib/i, '')
.replace(/\.so$/i, '');
// 检查是否为纯十六进制,且长度在 8-16 之间
const hexPattern = /^[0-9A-Fa-f]{8,16}$/;
return hexPattern.test(coreName);
}
/**
* 批量模糊匹配
* @param libraryNames - 库名列表
* @param rulesMap - 规则库映射
* @returns 匹配结果数组
*/
export function fuzzyMatchBatch(
libraryNames: string[],
rulesMap: Record<string, Omit<Library, 'count' | 'locations' | 'architectures' | 'hasMetadata' | 'expanded'>>
): Array<{
original: string;
matched: Omit<Library, 'count' | 'locations' | 'architectures' | 'hasMetadata' | 'expanded'> | null;
}> {
return libraryNames
.map(name => ({
original: name,
matched: fuzzyMatchLibrary(name, rulesMap),
}))
.filter(item => item.matched !== null);
}
/**
* 匹配结果缓存(性能优化)
*/
const matchCache = new Map<string, Omit<Library, 'count' | 'locations' | 'architectures' | 'hasMetadata' | 'expanded'> | null>();
/**
* 带缓存的模糊匹配
*/
export function fuzzyMatchLibraryWithCache(
libraryName: string,
rulesMap: Record<string, Omit<Library, 'count' | 'locations' | 'architectures' | 'hasMetadata' | 'expanded'>>
): Omit<Library, 'count' | 'locations' | 'architectures' | 'hasMetadata' | 'expanded'> | null {
// 检查缓存
if (matchCache.has(libraryName)) {
return matchCache.get(libraryName)!;
}
// 执行匹配
const result = fuzzyMatchLibrary(libraryName, rulesMap);
// 缓存结果
matchCache.set(libraryName, result);
return result;
}
/**
* 清除匹配缓存
*/
export function clearMatchCache(): void {
matchCache.clear();
}