import os.path
import re
from apkutils import gdiff, wildcard
from collections import OrderedDict
import xmltodict, json
[docs]class APK_Intersection:
def __init__(self, apks):
self.apks = apks
permission_pattern1 = r'uses-permission\s+?.*?:name="([^"]+?)"'
permission_pattern2 = r'android:permission="([^"]+?)"'
self.perm1_matcher = re.compile(permission_pattern1)
self.perm2_matcher = re.compile(permission_pattern2)
action_pattern = r'action\s+?.*?:name="([^"]+?)"'
self.action_matcher = re.compile(action_pattern)
category_pattern = r'<category\s[^>]*?:name="([^"]+?)"'
self.category_matcher = re.compile(category_pattern)
uses_feature_pattern = r'<uses-feature\s[^>]*?:name="([^"]+?)"'
self.uses_feature_matcher = re.compile(uses_feature_pattern)
activity_pattern = r'<activity\s[^>]*?:name="([^"]*?)"'
self.activity_matcher = re.compile(activity_pattern)
activity_alias_pattern = r'<activity-alias\s[^>]*?:name="([^"]*?)"'
self.activity_alias_matcher = re.compile(activity_alias_pattern)
receiver_pattern = r'<receiver\s[^>]*?:name="([^"]*?)"'
self.receiver_matcher = re.compile(receiver_pattern)
service_pattern = r'<service\s[^>]*?:name="([^"]*?)"'
self.service_matcher = re.compile(service_pattern)
provider_pattern = r'<provider\s[^>]*?:name="([^"]*?)"'
self.provider_matcher = re.compile(provider_pattern)
meta_name_pattern = r'<meta-data\s[^>]*?:name="([^"]*?)"'
self.meta_name_matcher = re.compile(meta_name_pattern)
meta_value_pattern = r'<meta-data\s[^>]*?:value="([^"]*?)"'
self.meta_value_matcher = re.compile(meta_value_pattern)
label_pattern = r'android:label="([^"]+?)"'
self.label_matcher = re.compile(label_pattern)
component_permission_pattern = r'<(\w+)\s[^>]*?:permission="([^"]*?)"'
self.component_permission_matcher = re.compile(
component_permission_pattern)
data_scheme_pattern = r'<data\s[^>]*?:scheme="([^"]*?)"'
self.data_scheme_matcher = re.compile(data_scheme_pattern)
data_mime_pattern = r'<data\s[^>]*?:mimeType="([^"]*?)"'
self.data_mime_matcher = re.compile(data_mime_pattern)
[docs] def get_permissions(self, mani):
perms = set()
iter = self.perm1_matcher.finditer(mani)
for item in iter:
perms.add(item.groups()[0])
iter = self.perm2_matcher.finditer(mani)
for item in iter:
perms.add(item.groups()[0])
return perms
[docs] def get_actions(self, mani):
actions = set()
iter = self.action_matcher.finditer(mani)
for item in iter:
actions.add(item.groups()[0])
return actions
[docs] def common(self, one, two):
"""清单内容交集,不一样的地方用*号表示。
注:只是简单的匹配,可能不如人意。
Args:
one (TYPE): 第一个清单
two (TYPE): 第二个清单
Returns:
TYPE: 清单交集
"""
dmp = gdiff.diff_match_patch()
diff = dmp.diff_main(one, two)
s = ''
for a, b in diff:
if a == 0:
s += b
else:
s += '*'
return s
[docs] def intersect_manifest_text(self):
result = None
for item in self.apks:
mani = item.get_mini_mani()
if not mani:
print(item.apk_path, 'not manifest')
continue
if not result:
result = mani
else:
result = self.common(result, mani)
return result
[docs] def intersect_manifest_tag_num(self):
result = {
# min max
'uses-permission': [0xFFFFFFFF, 0],
'activity': [0xFFFFFFFF, 0],
'receiver': [0xFFFFFFFF, 0],
'service': [0xFFFFFFFF, 0],
'provider': [0xFFFFFFFF, 0],
'version_code': [0xFFFFFFFF, 0],
}
for item in self.apks:
nums = item.get_manifest_tag_numbers()
if nums is None:
continue
for key, value in nums.items():
mm = result.get(key)
if mm[0] > value:
mm[0] = value
if mm[1] < value:
mm[1] = value
result[key] = mm
return result, nums
[docs] @staticmethod
def gen_words(s):
words = set()
size = len(s)
for offset in range(1, size+1):
for i in range(0, size+1-offset):
word = '.'.join(s[i:i+offset])
start = ''
end = ''
if i > 0:
start = '*.'
if size - offset > i:
end = '.*'
words.add(start + word + end)
return words
[docs] @staticmethod
def process_mani(mani):
j = xmltodict.parse(mani)
words = set()
def parseList(node, l):
for item in l:
if isinstance(item, OrderedDict):
parseOrderedDict(node, item)
else:
print('list', node, item)
def parseOrderedDict(node, od):
for k, v in od.items():
if isinstance(v, OrderedDict):
parseOrderedDict(k, v)
elif isinstance(v, list):
parseList(k, v)
elif isinstance(v, str):
words.add((node, k, v))
elif v is None:
continue
else:
print(type(v))
print(node, k, v)
parseOrderedDict('root', j)
return words
[docs] def intersect_manifest(self):
"""清单交集
Returns:
TYPE: 清单内容交集
"""
nums = {
# min max
'uses-permission': [0xFFFFFFFF, 0],
'activity': [0xFFFFFFFF, 0],
'receiver': [0xFFFFFFFF, 0],
'service': [0xFFFFFFFF, 0],
'provider': [0xFFFFFFFF, 0],
'version_code': [0xFFFFFFFF, 0],
}
is_first = True
words = {
'application': set(),
'activity': set(),
'activity-alias': set(),
'receiver': set(),
'service': set(),
'provider': set(),
}
words2 = set()
same = None
for apk in self.apks:
mani = apk.get_mini_mani()
mani = re.sub(r' \w+:', ' android:', mani)# 修复异常节点
mani = mani.replace('android:android="http://schemas.android.com/apk/res/android"', 'xmlns:android="http://schemas.android.com/apk/res/android"')
if not mani:
print(apk.apk_path, 'no mani')
continue
ms = self.process_mani(mani)
if is_first:
words2 = ms
else:
words2 &= ms
application = apk.get_application()
app_words = set()
if application:
app_words = APK_Intersection.gen_words(application.split('.'))
if is_first:
words['application'] = app_words
else:
words['application'] &= app_words
pieces = set()
for item in self.activity_matcher.finditer(mani):
piece = APK_Intersection.gen_words(item.groups()[0].split('.'))
pieces |= piece
if is_first:
words['activity'] = pieces
else:
words['activity'] &= pieces
pieces = set()
for item in self.receiver_matcher.finditer(mani):
piece = APK_Intersection.gen_words(item.groups()[0].split('.'))
pieces |= piece
if is_first:
words['receiver'] = pieces
else:
words['receiver'] &= pieces
pieces = set()
for item in self.service_matcher.finditer(mani):
piece = APK_Intersection.gen_words(item.groups()[0].split('.'))
pieces |= piece
if is_first:
words['service'] = pieces
else:
words['service'] &= pieces
pieces = set()
for item in self.provider_matcher.finditer(mani):
piece = APK_Intersection.gen_words(item.groups()[0].split('.'))
pieces |= piece
if is_first:
words['provider'] = pieces
else:
words['provider'] &= pieces
if is_first:
is_first = False
if not same:
same = mani
else:
same = self.common(same, mani)
mtn = apk.get_manifest_tag_numbers()
if mtn is None:
continue
for key, value in mtn.items():
mm = nums.get(key)
if mm[0] > value:
mm[0] = value
if mm[1] < value:
mm[1] = value
nums[key] = mm
return words, nums, words2
[docs] def intersect_dex_string_refx(self, filters):
"""字符串交集
真正的字符串,不包含类名、方法命。
特征方法中定义的、使用的字符串。
"""
def to_set(data):
"""通过类名,过滤不必要的字符串
Args:
data ([type]): [description]
Returns:
[type]: [description]
"""
strs = set()
for key, value in data.items():
skip = False
for item in filters:
if item in key:
skip = True
break
if skip:
continue
for _, v in value.items():
strs.update(v)
return strs
flag = True
strings = set()
for apk in self.apks:
if flag:
strings = to_set(apk.get_strings_refx())
flag = False
else:
strings = strings & to_set(apk.get_strings_refx())
return sorted(strings)
[docs] def intersect_dex_apis(self):
"""api字符串交集
真正的字符串不包含类名、方法名。
特征方法中定义的、使用的字符串。
"""
def to_set(data):
strs = set()
for key, value in data.items():
# print(key, value)
for _, v in value.items():
strs.update(v)
return strs
flag = True
strings = set()
for apk in self.apks:
if flag:
strings = to_set(apk.get_methods_refx())
flag = False
else:
strings = strings & to_set(apk.get_methods_refx())
return sorted(strings)
[docs] def intersect_dex_string(self):
flag = True
strings = set()
for apk in self.apks:
if flag:
strings = set(apk.get_strings())
flag = False
else:
strings = strings & set(apk.get_strings())
return sorted(strings)
[docs] def intersect_dex_opcode(self, is_wildcard, is_obj):
"""[summary]
Args:
is_wildcard (bool): 是否通配
is_obj (bool): 父类是否为Object
Returns:
[type]: [description]
"""
ops_set = set()
fuzzy_ops_set = set()
method_dict = dict()
is_first = True
common_opcodes = []
for apk in self.apks:
opcodes = apk.get_opcodes()
if is_first:
for item in opcodes:
super_class = item['super_class']
if is_obj and super_class != 'java/lang/Object':
continue
if not is_obj and super_class == 'java/lang/Object':
continue
common_opcodes.append(item)
is_first = False
continue
next_common_opcodes = []
for item1 in opcodes:
sup1 = item1['super_class']
if is_obj and sup1 != 'java/lang/Object':
continue
if not is_obj and sup1 == 'java/lang/Object':
continue
proto1 = item1['proto']
opcs1 = item1['opcodes']
len1 = len(opcs1)
if len1 < 10:
continue
max_item = None
max_ratio = 0
max_len = 0
for item2 in common_opcodes:
sup2 = item2['super_class']
proto2 = item2['proto']
opcs2 = item2['opcodes']
if (sup1, proto1, opcs1) == (sup2, proto2, opcs2):
if item2 not in next_common_opcodes:
next_common_opcodes.append(item2)
break
if is_wildcard and (sup1, proto1) == (sup2, proto2):
len2 = len(opcs2)
ratio = wildcard.get_ratio(opcs1, opcs2, 2)
if ratio > max_ratio:
max_ratio = ratio
max_item = item2
if max_ratio:
com_opcs = wildcard.get_wildcards(
opcs1, max_item['opcodes'])
len2 = len(com_opcs)
if len1 > len2:
max_len = len1
else:
max_len = len2
max_item['max_len'] = max_len
max_item['opcodes'] = com_opcs
next_common_opcodes.append(max_item)
common_opcodes = next_common_opcodes
return common_opcodes
[docs] def intersect_mf(self):
pass
[docs] def intersect_dex_tree(self):
md5s = set()
flag = True
ftree = None
for apk in self.apks:
result = apk.get_trees()
if flag:
ftree = result
if not result:
continue
if flag:
md5s = result.keys()
flag = False
else:
md5s = md5s & result.keys()
return (ftree, md5s)
[docs] def intersect_apis(self):
for apk in self.apks:
print(apk.get_methods_refx())
return
[docs] def intersect_arsc(self):
flag = True
result = set()
for apk in self.apks:
arsc = apk.get_arsc()
pns = arsc.get_packages_names()
tmps = set()
for item in pns:
for sr in arsc.get_string_resources(item):
tmps.add((sr['name'], sr['value']))
if flag:
flag = False
result = tmps
else:
result = result & tmps
return result
[docs] def intersect_files(self):
flag1 = True
files1 = set()
for apk in self.apks:
tmps = set()
for item in apk.get_files():
tmps.add((item['name'], item['crc']))
if flag1:
files1 = tmps
flag1 = False
else:
files1 = files1 & tmps
flag1 = True
files2 = set()
for apk in self.apks:
tmps = set()
for item in apk.get_files():
tmps.add(item.get('name'))
if flag1:
files2 = tmps
flag1 = False
else:
files2 = files2 & tmps
flag1 = True
files3 = set()
for apk in self.apks:
tmps = set()
for item in apk.get_files():
tmps.add(item['crc'])
if flag1:
files3 = tmps
flag1 = False
else:
files3 = files3 & tmps
return sorted(files1), sorted(files2), sorted(files3)
[docs] def intersect_certs(self):
for apk in self.apks:
print(apk.get_certs())