from struct import pack, unpack
from xml.dom import minidom
from xml.sax.saxutils import escape
from apkutils.axml import public
from apkutils.axml.chunk import BuffHandle, StringPoolChunk
# AXML FORMAT #
# Translated from
# http://code.google.com/p/android4me/source/browse/src/android/content/res/AXmlResourceParser.java
UTF8_FLAG = 0x00000100
CHUNK_STRINGPOOL_TYPE = 0x001C0001
CHUNK_NULL_TYPE = 0x00000000
ATTRIBUTE_IX_NAMESPACE_URI = 0
ATTRIBUTE_IX_NAME = 1
ATTRIBUTE_IX_VALUE_STRING = 2
ATTRIBUTE_IX_VALUE_TYPE = 3
ATTRIBUTE_IX_VALUE_DATA = 4
ATTRIBUTE_LENGHT = 5
CHUNK_AXML_FILE = {0x00080003, 0x00080009}
MAGIC_NUMBER = 0x00080003
MAGIC_NUMBER_MIN = 0x00080000
MAGIC_NUMBER_MAX = 0x00080009
CHUNK_RESOURCEIDS = 0x00080180
CHUNK_XML_FIRST = 0x00100100
CHUNK_XML_START_NAMESPACE = 0x00100100
CHUNK_XML_END_NAMESPACE = 0x00100101
CHUNK_XML_START_TAG = 0x00100102
CHUNK_XML_END_TAG = 0x00100103
CHUNK_XML_TEXT = 0x00100104
CHUNK_XML_LAST = 0x00100104
START_DOCUMENT = 0
END_DOCUMENT = 1
START_TAG = 2
END_TAG = 3
TEXT = 4
[docs]class AXMLParser(object):
def __init__(self, raw_buff):
self.reset()
self.file_size = 0
self.valid_axml = True
self.buff = BuffHandle(raw_buff)
magic_number = unpack('<L', self.buff.read(4))[0]
if magic_number == MAGIC_NUMBER:
self.file_size = unpack('<L', self.buff.read(4))[0]
self.sb = StringPoolChunk(self.buff)
self.m_resourceIDs = []
self.m_prefixuri = {}
self.m_uriprefix = {}
self.m_prefixuriL = []
self.visited_ns = []
elif magic_number >= MAGIC_NUMBER_MIN and magic_number <= MAGIC_NUMBER_MAX:
self.file_size = unpack('<L', self.buff.read(4))[0]
self.sb = StringPoolChunk(self.buff)
self.m_resourceIDs = []
self.m_prefixuri = {}
self.m_uriprefix = {}
self.m_prefixuriL = []
self.visited_ns = []
else:
self.valid_axml = False
raise Exception("It's a invalid xml file.")
[docs] def is_valid(self):
return self.valid_axml
[docs] def reset(self):
self.m_event = -1
self.m_lineNumber = -1
self.m_name = -1
self.m_namespaceUri = -1
self.m_attributes = []
self.m_idAttribute = -1
self.m_classAttribute = -1
self.m_styleAttribute = -1
def __next__(self):
self.do_next()
return self.m_event
[docs] def do_next(self):
if self.m_event == END_DOCUMENT:
return
event = self.m_event
self.reset()
while True:
chunkType = -1
# Fake END_DOCUMENT event.
if event == END_TAG:
pass
# START_DOCUMENT
if event == START_DOCUMENT:
chunkType = CHUNK_XML_START_TAG
else:
if self.buff.end():
self.m_event = END_DOCUMENT
break
# --- FIXME 这里不一定是4个
# 这里出里问题,导致死循环
data4 = self.buff.read(4)
if data4:
chunkType = unpack('<L', data4)[0]
else:
pass
if chunkType == CHUNK_RESOURCEIDS:
chunkSize = unpack('<L', self.buff.read(4))[0]
# FIXME
if chunkSize < 8 or chunkSize % 4 != 0:
break
for i in range(0, int(chunkSize / 4) - 2):
self.m_resourceIDs.append(
unpack('<L', self.buff.read(4))[0])
continue
# FIXME
if chunkType < CHUNK_XML_FIRST or chunkType > CHUNK_XML_LAST:
break
# Fake START_DOCUMENT event.
if chunkType == CHUNK_XML_START_TAG and event == -1:
self.m_event = START_DOCUMENT
break
self.buff.read(4) # /*chunkSize*/
lineNumber = unpack('<L', self.buff.read(4))[0]
self.buff.read(4) # 0xFFFFFFFF
if chunkType == CHUNK_XML_START_NAMESPACE or chunkType == CHUNK_XML_END_NAMESPACE:
if chunkType == CHUNK_XML_START_NAMESPACE:
prefix = unpack('<L', self.buff.read(4))[0]
uri = unpack('<L', self.buff.read(4))[0]
self.m_prefixuri[prefix] = uri
self.m_uriprefix[uri] = prefix
self.m_prefixuriL.append((prefix, uri))
self.ns = uri
else:
self.ns = -1
self.buff.read(4)
self.buff.read(4)
(prefix, uri) = self.m_prefixuriL.pop()
continue
self.m_lineNumber = lineNumber
if chunkType == CHUNK_XML_START_TAG:
self.m_namespaceUri = unpack('<L', self.buff.read(4))[0]
self.m_name = unpack('<L', self.buff.read(4))[0]
# FIXME
self.buff.read(4) # flags
attributeCount = unpack('<L', self.buff.read(4))[0]
self.m_idAttribute = (attributeCount >> 16) - 1
attributeCount = attributeCount & 0xFFFF
self.m_classAttribute = unpack('<L', self.buff.read(4))[0]
self.m_styleAttribute = (self.m_classAttribute >> 16) - 1
self.m_classAttribute = (self.m_classAttribute & 0xFFFF) - 1
for i in range(0, attributeCount * ATTRIBUTE_LENGHT):
self.m_attributes.append(
unpack('<L', self.buff.read(4))[0])
for i in range(ATTRIBUTE_IX_VALUE_TYPE, len(self.m_attributes),
ATTRIBUTE_LENGHT):
self.m_attributes[i] = self.m_attributes[i] >> 24
self.m_event = START_TAG
break
if chunkType == CHUNK_XML_END_TAG:
self.m_namespaceUri = unpack('<L', self.buff.read(4))[0]
self.m_name = unpack('<L', self.buff.read(4))[0]
self.m_event = END_TAG
break
if chunkType == CHUNK_XML_TEXT:
self.m_name = unpack('<L', self.buff.read(4))[0]
# FIXME
self.buff.read(4)
self.buff.read(4)
self.m_event = TEXT
break
[docs] def get_prefix_by_uri(self, uri):
try:
return self.m_uriprefix[uri]
except KeyError:
return -1
[docs] def get_prefix(self):
try:
return self.sb.getString(self.m_uriprefix[self.m_namespaceUri])
except KeyError:
return ''
[docs] def get_name(self):
if self.m_name == -1 or (self.m_event != START_TAG and
self.m_event != END_TAG):
return ''
return self.sb.getString(self.m_name)
[docs] def get_text(self):
if self.m_name == -1 or self.m_event != TEXT:
return ''
return self.sb.getString(self.m_name)
[docs] def get_namespace_prefix(self, pos):
prefix = self.m_prefixuriL[pos][0]
return self.sb.getString(prefix)
[docs] def get_namespace_uri(self, pos):
uri = self.m_prefixuriL[pos][1]
return self.sb.getString(uri)
[docs] def get_xmlns(self):
buff = ""
for i in self.m_uriprefix:
if i not in self.visited_ns:
buff += "xmlns:%s=\"%s\"\n" % (
self.sb.getString(self.m_uriprefix[i]),
self.sb.getString(self.m_prefixuri[self.m_uriprefix[i]]))
self.visited_ns.append(i)
return buff
[docs] def get_attribute_offset(self, index):
# FIXME
if self.m_event != START_TAG:
print("Current event is not START_TAG.")
offset = index * 5
# FIXME
if offset >= len(self.m_attributes):
print("Invalid attribute index")
return offset
[docs] def get_attribute_count(self):
if self.m_event != START_TAG:
return -1
return len(self.m_attributes) / ATTRIBUTE_LENGHT
[docs] def get_attribute_prefix(self, index):
offset = self.get_attribute_offset(index)
uri = self.m_attributes[offset + ATTRIBUTE_IX_NAMESPACE_URI]
prefix = self.get_prefix_by_uri(uri)
if prefix == -1:
return ""
return self.sb.getString(prefix)
[docs] def get_attribute_name(self, index):
offset = self.get_attribute_offset(index)
name = self.m_attributes[offset + ATTRIBUTE_IX_NAME]
if name == -1:
return ""
res = self.sb.getString(name)
if not res:
attr = self.m_resourceIDs[name]
if attr in public.SYSTEM_RESOURCES['attributes']['inverse']:
res = 'android:' + public.SYSTEM_RESOURCES['attributes']['inverse'][
attr
]
return res
[docs] def get_attribute_valueType(self, index):
offset = self.get_attribute_offset(index)
return self.m_attributes[offset + ATTRIBUTE_IX_VALUE_TYPE]
[docs] def get_attribute_value_data(self, index):
offset = self.get_attribute_offset(index)
return self.m_attributes[offset + ATTRIBUTE_IX_VALUE_DATA]
[docs] def get_attribute_value(self, index):
offset = self.get_attribute_offset(index)
valueType = self.m_attributes[offset + ATTRIBUTE_IX_VALUE_TYPE]
if valueType == TYPE_STRING:
valueString = self.m_attributes[offset + ATTRIBUTE_IX_VALUE_STRING]
return self.sb.getString(valueString)
# WIP
return ""
START_DOCUMENT = 0
END_DOCUMENT = 1
START_TAG = 2
END_TAG = 3
TEXT = 4
TYPE_ATTRIBUTE = 2
TYPE_DIMENSION = 5
TYPE_FIRST_COLOR_INT = 28
TYPE_FIRST_INT = 16
TYPE_FLOAT = 4
TYPE_FRACTION = 6
TYPE_INT_BOOLEAN = 18
TYPE_INT_COLOR_ARGB4 = 30
TYPE_INT_COLOR_ARGB8 = 28
TYPE_INT_COLOR_RGB4 = 31
TYPE_INT_COLOR_RGB8 = 29
TYPE_INT_DEC = 16
TYPE_INT_HEX = 17
TYPE_LAST_COLOR_INT = 31
TYPE_LAST_INT = 31
TYPE_NULL = 0
TYPE_REFERENCE = 1
TYPE_STRING = 3
RADIX_MULTS = [0.00390625, 3.051758E-005, 1.192093E-007, 4.656613E-010]
DIMENSION_UNITS = ["px", "dip", "sp", "pt", "in", "mm", "", ""]
FRACTION_UNITS = ["%", "%p", "", "", "", "", "", ""]
COMPLEX_UNIT_MASK = 15
[docs]class AXML:
def __init__(self, raw_buff):
self.parser = AXMLParser(raw_buff)
self.xmlns = False
# 存放解析后的XML
self.buff = ''
self.is_valid = True
if self.parser.is_valid():
self.parse()
else:
self.is_valid = False
[docs] def parse(self):
tag = "notag"
while True:
_type = next(self.parser)
# 异常类型直接退出
if _type == -1:
break
if "</manifest>" in self.buff:
break
if _type == START_DOCUMENT:
self.buff += '''<?xml version="1.0" encoding="utf-8"?>\n'''
elif _type == START_TAG:
prefix = self.get_prefix(
self.parser.get_prefix()) + self.parser.get_name()
if len(prefix) == 0:
tag = "notag"
self.buff += '<' + prefix + '\n'
self.buff += self.parser.get_xmlns()
# tag = prefix
for i in range(0, int(self.parser.get_attribute_count())):
self.buff += "%s%s=\"%s\"\n" % (
self.get_prefix(self.parser.get_attribute_prefix(i)),
self.parser.get_attribute_name(i),
self._escape(self.get_attribute_value(i)))
self.buff += '>\n'
elif _type == END_TAG:
prefix = self.get_prefix(
self.parser.get_prefix()) + self.parser.get_name()
if len(prefix) == 0:
prefix = "notag"
self.buff += "</%s>\n" % (prefix)
elif _type == TEXT:
self.buff += "%s\n" % self.parser.get_text()
elif _type == END_DOCUMENT:
break
# pleed patch
def _escape(self, s):
s = s.replace("&", "&")
s = s.replace('"', """)
s = s.replace("'", "'")
s = s.replace("<", "<")
s = s.replace(">", ">")
return escape(s)
[docs] def get_buff(self):
return self.buff
[docs] def get_xml(self):
return self.buff.replace('\0', '')
def _format_xml(self):
tmp = minidom.parseString(self.get_buff()).toprettyxml()
A = str(tmp).replace('\t', '').replace('\n', '')
return minidom.parseString(A).toprettyxml()
[docs] def get_xml_obj(self):
return minidom.parseString(self.get_buff())
[docs] def get_prefix(self, prefix):
"""
处理没有前缀的情况
有一部分异常的节点,需要特殊处理。
"""
if prefix is None or len(prefix) == 0:
return ''
return prefix + ':'
[docs] def get_attribute_value(self, index):
_type = self.parser.get_attribute_valueType(index)
_data = self.parser.get_attribute_value_data(index)
if _type == TYPE_STRING:
return self.parser.get_attribute_value(index)
elif _type == TYPE_ATTRIBUTE:
return "?%s%08X" % (self.get_package(_data), _data)
elif _type == TYPE_REFERENCE:
return "@%s%08X" % (self.get_package(_data), _data)
elif _type == TYPE_FLOAT:
return "%f" % unpack("=f", pack("=L", _data))[0]
elif _type == TYPE_INT_HEX:
return "0x%08X" % _data
elif _type == TYPE_INT_BOOLEAN:
if _data == 0:
return "false"
return "true"
elif _type == TYPE_DIMENSION:
return "%f%s" % (self.complexToFloat(_data), DIMENSION_UNITS[_data & COMPLEX_UNIT_MASK])
elif _type == TYPE_FRACTION:
return "%f%s" % (self.complexToFloat(_data), FRACTION_UNITS[_data & COMPLEX_UNIT_MASK])
elif _type >= TYPE_FIRST_COLOR_INT and _type <= TYPE_LAST_COLOR_INT:
return "#%08X" % _data
elif _type >= TYPE_FIRST_INT and _type <= TYPE_LAST_INT:
return "%d" % int(_data)
return "<0x%X, type 0x%02X>" % (_data, _type)
[docs] def complexToFloat(self, xcomplex):
return (float)(xcomplex & 0xFFFFFF00) * RADIX_MULTS[(xcomplex >> 4) & 3]
[docs] def get_package(self, id):
if id >> 24 == 1:
return "android:"
return ""
[docs] def get_content(self):
return self.content