# Copyright 2015 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import array
from apkutils.dex.byteio import Reader
from apkutils.dex.dalvik import parseBytecode
from apkutils.dex.util import signExtend
NO_INDEX = 0xFFFFFFFF
[docs]def typeList(dex, off, parseClsDesc=False):
if off == 0:
return []
# size = dex.u32s[off // 4]
# u16_off = (off // 4 + 1) * 2
# idxs = dex.u16s[u16_off:u16_off + size]
st = dex.stream(off)
size = st.u32()
idxs = [st.u16() for _ in range(size)]
func = dex.clsType if parseClsDesc else dex.type
return list(map(func, idxs))
[docs]def encodedValue(dex, stream):
tag = stream.u8()
vtype, varg = tag & 31, tag >> 5
if vtype == 0x1c: # ARRAY
size = stream.uleb128()
return [encodedValue(dex, stream) for _ in range(size)]
if vtype == 0x1d: # ANNOTATION
# We don't actually care about annotations but still need to read it to
# find out how much data is taken up
stream.uleb128()
for _ in range(stream.uleb128()):
stream.uleb128()
encodedValue(dex, stream)
return None
if vtype == 0x1e: # NULL
return None
# For the rest, we just return it as unsigned integers without recording type
# extended to either u32 or u64 depending on int/float or long/double
if vtype == 0x1f: # BOOLEAN
return b'I', varg
# the rest are an int encoded into varg + 1 bytes in some way
size = varg + 1
val = sum(stream.u8() << (i * 8) for i in range(size))
if vtype == 0x00: # BYTE
return b'I', signExtend(val, 8) % (1 << 32)
if vtype == 0x02: # SHORT
return b'I', signExtend(val, 16) % (1 << 32)
if vtype == 0x03: # CHAR
return b'I', val
if vtype == 0x04: # INT
return b'I', val
if vtype == 0x06: # LONG
return b'J', val
# floats are 0 extended to the right
if vtype == 0x10: # FLOAT
return b'F', val << (32 - size * 8)
if vtype == 0x11: # DOUBLE
return b'D', val << (64 - size * 8)
if vtype == 0x17: # STRING
return b'Ljava/lang/String;', dex.string(val)
if vtype == 0x18: # TYPE
return b'Ljava/lang/Class;', dex.clsType(val)
[docs]class MFIdMixin:
[docs] def triple(self):
return self.cname, self.name, self.desc
[docs]class FieldId(MFIdMixin):
def __init__(self, dex, field_idx):
stream = dex.stream(dex.field_ids.off + field_idx * 8)
self.cname = dex.clsType(stream.u16())
self.desc = dex.type(stream.u16())
self.name = dex.string(stream.u32())
[docs]class Field:
def __init__(self, dex, field_idx, access):
self.dex = dex
self.id = FieldId(dex, field_idx)
self.access = access
self.constant_value = None # will be set later
[docs]class MethodId(MFIdMixin):
def __init__(self, dex, method_idx):
stream = dex.stream(dex.method_ids.off + method_idx * 8)
self.cname = dex.clsType(stream.u16())
proto_idx = stream.u16()
self.name = dex.string(stream.u32())
# off = (dex.proto_ids.off + proto_idx * 12) // 4
# shorty_idx, return_idx, parameters_off = dex.u32s[off:off + 3]
stream2 = dex.stream(dex.proto_ids.off + proto_idx * 12)
shorty_idx, return_idx, parameters_off = stream2.u32(), stream2.u32(), stream2.u32()
self.return_type = dex.type(return_idx)
self.param_types = typeList(dex, parameters_off)
# rearrange things to Java format
parts = [b'('] + self.param_types + [b')', self.return_type]
self.desc = b''.join(parts)
[docs] def getSpacedParamTypes(self, isstatic):
results = []
if not isstatic:
if self.cname.startswith(b'['):
results.append(self.cname)
else:
results.append(b'L' + self.cname + b';')
for ptype in self.param_types:
results.append(ptype)
if ptype == b'J' or ptype == b'D':
results.append(None)
return results
[docs]class TryItem:
def __init__(self, stream):
self.start, self.count, self.handler_off = stream.u32(), stream.u16(), stream.u16()
self.end = self.start + self.count
self.catches = None # to be filled in later
[docs] def finish(self, dex, list_off):
stream = dex.stream(list_off + self.handler_off)
size = stream.sleb128()
self.catches = results = []
for _ in range(abs(size)):
results.append((dex.clsType(stream.uleb128()), stream.uleb128()))
if size <= 0:
results.append((b'java/lang/Throwable', stream.uleb128()))
[docs]class CodeItem:
def __init__(self, dex, offset):
stream = dex.stream(offset)
self.nregs = registers_size = stream.u16()
ins_size = stream.u16()
outs_size = stream.u16()
tries_size = stream.u16()
debug_off = stream.u32()
self.insns_size = stream.u32()
insns_start_pos = stream.pos
insns = [stream.u16() for _ in range(self.insns_size)]
if tries_size and self.insns_size & 1:
stream.u16() # padding
self.tries = [TryItem(stream) for _ in range(tries_size)]
self.list_off = stream.pos
for item in self.tries:
item.finish(dex, self.list_off)
catch_addrs = set()
for tryi in self.tries:
catch_addrs.update(t[1] for t in tryi.catches)
self.bytecode = parseBytecode(dex, insns_start_pos, insns, catch_addrs)
[docs]class Method:
def __init__(self, dex, method_idx, access, code_off):
self.dex = dex
self.id = MethodId(dex, method_idx)
self.access = access
self.code_off = code_off
self.code = CodeItem(dex, code_off) if code_off else None
[docs]class ClassData:
def __init__(self, dex, offset):
self.fields = []
self.methods = []
# for offset 0, leave dummy data with no fields or methods
if offset != 0:
self._parse(dex, dex.stream(offset))
def _parse(self, dex, stream):
numstatic = stream.uleb128()
numinstance = stream.uleb128()
numdirect = stream.uleb128()
numvirtual = stream.uleb128()
fields = self.fields
for num in (numstatic, numinstance):
field_idx = 0
for i in range(num):
field_idx += stream.uleb128()
fields.append(Field(dex, field_idx, stream.uleb128()))
methods = self.methods
for num in (numdirect, numvirtual):
method_idx = 0
for i in range(num):
method_idx += stream.uleb128()
methods.append(
Method(dex, method_idx, stream.uleb128(), stream.uleb128()))
# try:
# methods.append(
# Method(dex, method_idx, stream.uleb128(), stream.uleb128()))
# except TypeError as e:
# continue
[docs]class DexClass:
def __init__(self, dex, base_off, i):
self.dex = dex
st = dex.stream(base_off + i * 32)
self.name = dex.clsType(st.u32())
self.access = st.u32()
super_ = st.u32()
self.super = dex.clsType(super_) if super_ != NO_INDEX else None
self.interfaces = typeList(dex, st.u32(), parseClsDesc=True)
_ = st.u32()
_ = st.u32()
self.data_off = st.u32()
self.data = None # parse data lazily in parseData()
self.constant_values_off = st.u32()
# offset = base_off // 4 + i * 8
# words = dex.u32s[offset:offset + 8]
# self.name = dex.clsType(words[0])
# self.access = words[1]
# self.super = dex.clsType(words[2]) if words[2] != NO_INDEX else None
# self.interfaces = typeList(dex, words[3], parseClsDesc=True)
# # ignore sourcefile for now
# # ignore annotations for now
# self.data_off = words[6]
# self.data = None # parse data lazily in parseData()
# self.constant_values_off = words[7]
[docs] def parseData(self):
if self.data is None:
self.data = ClassData(self.dex, self.data_off)
if self.constant_values_off:
stream = self.dex.stream(self.constant_values_off)
for field in self.data.fields[:stream.uleb128()]:
field.constant_value = encodedValue(self.dex, stream)
# if self.constant_values_off:
# stream = self.dex.stream(self.constant_values_off)
# size = stream.uleb128()
# constant_vals = [encodedValue(self.dex, stream)
# for _ in range(size)]
# for field, val in zip(self.data.fields, constant_vals):
# field.constant_value = val
[docs]class SizeOff:
def __init__(self, stream):
self.size = stream.u32()
self.off = stream.u32()
[docs]class DexFile:
def __init__(self, data, flag=True):
self.raw = data
self.u16s = array.array('H', data[:len(data) & ~1])
assert(self.u16s.itemsize == 2)
self.u32s = array.array('I', data[:len(data) & ~3])
assert(self.u32s.itemsize == 4)
stream = Reader(data)
# parse header
# magic = stream.read(4) # magic
# magic_vers = stream.read(4) # magic_vers
# checksum = stream.u32() # adler32 checksum
# import binascii
# sha1 = binascii.b2a_hex(stream.read(20)).decode('utf-8')
stream.read(32) # skip 32(magic, magic_vers, checksum, sha1)
if stream.u32() != len(self.raw):
print('Warning, unexpected file size!')
if stream.u32() != 0x70:
print('Warning, unexpected header size!')
if stream.u32() != 0x12345678:
print('Warning, unexpected endianess tag!')
self.link = SizeOff(stream)
self.map_off = stream.u32()
self.string_ids = SizeOff(stream)
self.type_ids = SizeOff(stream)
self.proto_ids = SizeOff(stream)
self.field_ids = SizeOff(stream)
self.method_ids = SizeOff(stream)
self.class_defs = SizeOff(stream)
self.data = SizeOff(stream)
if flag: # parse dex class
defs = self.class_defs
self.classes = []
for i in range(defs.size):
self.classes.append(DexClass(self, defs.off, i))
[docs] def stream(self, offset):
return Reader(self.raw, offset)
[docs] def string(self, i):
# data_off = self.u32s[self.string_ids.off // 4 + i]
data_off = self.stream(self.string_ids.off + i * 4).u32()
stream = self.stream(data_off)
stream.uleb128() # ignore decoded length
return stream.readCStr()
[docs] def type(self, i):
if 0 <= i < NO_INDEX:
# return self.string(self.u32s[self.type_ids.off // 4 + i])
str_idx = self.stream(self.type_ids.off + i * 4).u32()
return self.string(str_idx)
[docs] def clsType(self, i):
# Can be either class _name_ or array _descriptor_
desc = self.type(i)
if desc.startswith(b'['):
return desc
elif desc.startswith(b'L'):
return desc[1:-1]
[docs] def field_id(self, i):
return FieldId(self, i)
[docs] def method_id(self, i):
return MethodId(self, i)