Source code for apkutils.wildcard

from difflib import Differ, SequenceMatcher

[docs]def get_max_len(wildcards): ''' find the max length from wildcards. ''' mlen = 0 for item in wildcards.split('*'): length = len(item) if length > mlen: mlen = length
# FIXME 直接生成*.*.*.*, 或者,compare的顺序相反?但是仍然不能避免。
[docs]def get_wildcards(str1, str2, min_length=0): ''' 获取2个字符串的通配符字符串, length,2个*之间的字符串的最小长度,默认为0。 如果小于这个长度,那么会变成*;如果min_length=1,*a* -> * ''' if not str1 or not str2: return None num1 = str1.count('.') num2 = str2.count('.') if num2 < num1: num1 = num2 diff = Differ().compare(str1, str2) diff = Differ().compare(str2, str1) # print('-' * 100) # print(str1, str2) # print(num1, num2) wildcards = '' for item in list(diff): if '-' in item or '+' in item: if not wildcards.endswith('*'): wildcards = wildcards + '*' else: wildcards = wildcards + item.strip() if not wildcards: return wildcards result = '' if min_length > 0: if wildcards[0] == '*': result = '*' is_first = True for item in wildcards.split('*'): if is_first: is_first = False if len(item) < min_length and not result.endswith('*'): result = result + '*' elif not result.endswith('*'): result = result + '*' if len(item) > min_length: result = result + item elif '.' in item: result = result + '.' else: result = wildcards return result
[docs]def get_wildcards_in_list(str_list, min_length=0): ''' 获取一个通配字符串,可以通配符该列表里面所有的字符串。 ''' wildcards = str_list[0] str_list.remove(wildcards) for item in str_list: wildcards = get_wildcards(wildcards, item, min_length) return wildcards
[docs]def get_best_wildcard_from_list(str1, str_list, min_length=0): ''' 从列表str_list中,找出一个与str最相似的通配字符串。 ''' best_radio = 0.0 best_str = '' for sss in str_list: radio = get_ratio(str1, sss) if best_radio < radio: best_radio = radio best_str = sss return get_wildcards(str1, best_str, min_length)
[docs]def get_ratio(str1, str2, weight=3): len1 = len(str1) len2 = len(str2) if len1 < weight or len2 < weight: return 0 # print(int(len1 / len2 + len2 / len1)) if int(len1 / len2 + len2 / len1) >= weight: return 0 return SequenceMatcher(None, str1, str2).ratio()
[docs]def gen_wildcard_str(str1, str2, min_length=0): ''' get commom opcode ''' result = '' fcp = find_common_opcodes(str1, str2) for key, value in fcp[0]: if key and len(value) > min_length: result = result + value + '*' result = result.replace('**', '*') if result.endswith('*'): return result[:-1] return result
[docs]def longest_common_subopcode(s1, s2): ''' 如果是2个普通串还好,但是,如果里面包含*,这种符号,那就完蛋了 ''' m = [[0] * (1 + len(s2)) for i in range(1 + len(s1))] longest, x_longest = 0, 0 for x in range(1, 1 + len(s1)): for y in range(1, 1 + len(s2)): if s1[x - 1] == s2[y - 1]: m[x][y] = m[x - 1][y - 1] + 1 if m[x][y] > longest: longest = m[x][y] x_longest = x else: m[x][y] = 0 if '*' not in s1: end = x_longest & ~1 longest = longest - (x_longest - end) start = end - (longest & ~1) subopcode = s1[start:end] else: start = x_longest - longest end = x_longest subopcode = s1[start:end] if subopcode.startswith('*') and len(subopcode) % 2 == 0: subopcode = subopcode[0:-1] return subopcode
[docs]def find_common_opcodes(s1, s2): # used recursively if s1 == '' or s2 == '': return [], [] com = longest_common_subopcode(s1, s2) if len(com) < 2: return ([(0, s1)], [(0, s2)]) s1_bef, _, s1_aft = s1.partition(com) s2_bef, _, s2_aft = s2.partition(com) before = find_common_opcodes(s1_bef, s2_bef) after = find_common_opcodes(s1_aft, s2_aft) return (before[0] + [(1, com)] + after[0], before[1] + [(1, com)] + after[1])
[docs]def longest_common_substring(s1, s2): m = [[0] * (1 + len(s2)) for i in range(1 + len(s1))] longest, x_longest = 0, 0 for x in range(1, 1 + len(s1)): for y in range(1, 1 + len(s2)): if s1[x - 1] == s2[y - 1]: m[x][y] = m[x - 1][y - 1] + 1 if m[x][y] > longest: longest = m[x][y] x_longest = x else: m[x][y] = 0 return s1[x_longest - longest:x_longest]
[docs]def find_common_patterns(s1, s2): # used recursively if s1 == '' or s2 == '': return [], [] com = longest_common_substring(s1, s2) if len(com) < 2: return ([(0, s1)], [(0, s2)]) s1_bef, _, s1_aft = s1.partition(com) s2_bef, _, s2_aft = s2.partition(com) before = find_common_patterns(s1_bef, s2_bef) after = find_common_patterns(s1_aft, s2_aft) return (before[0] + [(1, com)] + after[0], before[1] + [(1, com)] + after[1])
# 将opcode 分成opcode数组,然后,再diff,也许效果会好一些,只要有差异就不接受? if __name__ == '__main__': print('hello') a = '*o*.*.*.*m*' c = '*.*o*.*.*m' b = '' d = '' d2 = 'com.system.activity.MyApplication' e = 'com.wjbl.mio.efj.ycgh.rywy.prl' print(get_wildcards(a, b)) print(gen_wildcard_str(d, d2)) print(get_wildcards(c, b)) print(get_wildcards(d, e, 1))