Source code for apkutils.dex.dexparser

# Copyright 2015 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import array

from apkutils.dex.byteio import Reader
from apkutils.dex.dalvik import parseBytecode
from apkutils.dex.util import signExtend

NO_INDEX = 0xFFFFFFFF


[docs]def typeList(dex, off, parseClsDesc=False):
    if off == 0:
        return []
    # size = dex.u32s[off // 4]
    # u16_off = (off // 4 + 1) * 2
    # idxs = dex.u16s[u16_off:u16_off + size]

    st = dex.stream(off)
    size = st.u32()
    idxs = [st.u16() for _ in range(size)]

    func = dex.clsType if parseClsDesc else dex.type
    return list(map(func, idxs))


[docs]def encodedValue(dex, stream):
    tag = stream.u8()
    vtype, varg = tag & 31, tag >> 5

    if vtype == 0x1c:  # ARRAY
        size = stream.uleb128()
        return [encodedValue(dex, stream) for _ in range(size)]
    if vtype == 0x1d:  # ANNOTATION
        # We don't actually care about annotations but still need to read it to
        # find out how much data is taken up
        stream.uleb128()
        for _ in range(stream.uleb128()):
            stream.uleb128()
            encodedValue(dex, stream)
        return None
    if vtype == 0x1e:  # NULL
        return None

    # For the rest, we just return it as unsigned integers without recording type
    # extended to either u32 or u64 depending on int/float or long/double
    if vtype == 0x1f:  # BOOLEAN
        return b'I', varg
    # the rest are an int encoded into varg + 1 bytes in some way
    size = varg + 1
    val = sum(stream.u8() << (i * 8) for i in range(size))

    if vtype == 0x00:  # BYTE
        return b'I', signExtend(val, 8) % (1 << 32)
    if vtype == 0x02:  # SHORT
        return b'I', signExtend(val, 16) % (1 << 32)
    if vtype == 0x03:  # CHAR
        return b'I', val
    if vtype == 0x04:  # INT
        return b'I', val

    if vtype == 0x06:  # LONG
        return b'J', val

    # floats are 0 extended to the right
    if vtype == 0x10:  # FLOAT
        return b'F', val << (32 - size * 8)
    if vtype == 0x11:  # DOUBLE
        return b'D', val << (64 - size * 8)

    if vtype == 0x17:  # STRING
        return b'Ljava/lang/String;', dex.string(val)
    if vtype == 0x18:  # TYPE
        return b'Ljava/lang/Class;', dex.clsType(val)


[docs]class MFIdMixin:

[docs]    def triple(self):
        return self.cname, self.name, self.desc


[docs]class FieldId(MFIdMixin):

    def __init__(self, dex, field_idx):
        stream = dex.stream(dex.field_ids.off + field_idx * 8)
        self.cname = dex.clsType(stream.u16())
        self.desc = dex.type(stream.u16())
        self.name = dex.string(stream.u32())


[docs]class Field:

    def __init__(self, dex, field_idx, access):
        self.dex = dex
        self.id = FieldId(dex, field_idx)
        self.access = access
        self.constant_value = None  # will be set later


[docs]class MethodId(MFIdMixin):

    def __init__(self, dex, method_idx):
        stream = dex.stream(dex.method_ids.off + method_idx * 8)
        self.cname = dex.clsType(stream.u16())
        proto_idx = stream.u16()
        self.name = dex.string(stream.u32())

        # off = (dex.proto_ids.off + proto_idx * 12) // 4
        # shorty_idx, return_idx, parameters_off = dex.u32s[off:off + 3]
        stream2 = dex.stream(dex.proto_ids.off + proto_idx * 12)
        shorty_idx, return_idx, parameters_off = stream2.u32(), stream2.u32(), stream2.u32()
        self.return_type = dex.type(return_idx)
        self.param_types = typeList(dex, parameters_off)

        # rearrange things to Java format
        parts = [b'('] + self.param_types + [b')', self.return_type]
        self.desc = b''.join(parts)

[docs]    def getSpacedParamTypes(self, isstatic):
        results = []
        if not isstatic:
            if self.cname.startswith(b'['):
                results.append(self.cname)
            else:
                results.append(b'L' + self.cname + b';')

        for ptype in self.param_types:
            results.append(ptype)
            if ptype == b'J' or ptype == b'D':
                results.append(None)
        return results


[docs]class TryItem:

    def __init__(self, stream):
        self.start, self.count, self.handler_off = stream.u32(), stream.u16(), stream.u16()
        self.end = self.start + self.count
        self.catches = None  # to be filled in later

[docs]    def finish(self, dex, list_off):
        stream = dex.stream(list_off + self.handler_off)
        size = stream.sleb128()
        self.catches = results = []
        for _ in range(abs(size)):
            results.append((dex.clsType(stream.uleb128()), stream.uleb128()))
        if size <= 0:
            results.append((b'java/lang/Throwable', stream.uleb128()))


[docs]class CodeItem:

    def __init__(self, dex, offset):
        stream = dex.stream(offset)
        self.nregs = registers_size = stream.u16()
        ins_size = stream.u16()
        outs_size = stream.u16()
        tries_size = stream.u16()
        debug_off = stream.u32()
        self.insns_size = stream.u32()
        insns_start_pos = stream.pos
        insns = [stream.u16() for _ in range(self.insns_size)]
        if tries_size and self.insns_size & 1:
            stream.u16()  # padding
        self.tries = [TryItem(stream) for _ in range(tries_size)]
        self.list_off = stream.pos
        for item in self.tries:
            item.finish(dex, self.list_off)

        catch_addrs = set()
        for tryi in self.tries:
            catch_addrs.update(t[1] for t in tryi.catches)

        self.bytecode = parseBytecode(dex, insns_start_pos, insns, catch_addrs)


[docs]class Method:

    def __init__(self, dex, method_idx, access, code_off):
        self.dex = dex
        self.id = MethodId(dex, method_idx)
        self.access = access
        self.code_off = code_off
        self.code = CodeItem(dex, code_off) if code_off else None


[docs]class ClassData:

    def __init__(self, dex, offset):
        self.fields = []
        self.methods = []
        # for offset 0, leave dummy data with no fields or methods
        if offset != 0:
            self._parse(dex, dex.stream(offset))

    def _parse(self, dex, stream):
        numstatic = stream.uleb128()
        numinstance = stream.uleb128()
        numdirect = stream.uleb128()
        numvirtual = stream.uleb128()

        fields = self.fields
        for num in (numstatic, numinstance):
            field_idx = 0
            for i in range(num):
                field_idx += stream.uleb128()
                fields.append(Field(dex, field_idx, stream.uleb128()))

        methods = self.methods
        for num in (numdirect, numvirtual):
            method_idx = 0
            for i in range(num):
                method_idx += stream.uleb128()
                methods.append(
                    Method(dex, method_idx, stream.uleb128(), stream.uleb128()))
                # try:
                #     methods.append(
                #         Method(dex, method_idx, stream.uleb128(), stream.uleb128()))
                # except TypeError as e:
                # continue


[docs]class DexClass:

    def __init__(self, dex, base_off, i):
        self.dex = dex
        st = dex.stream(base_off + i * 32)

        self.name = dex.clsType(st.u32())
        self.access = st.u32()
        super_ = st.u32()
        self.super = dex.clsType(super_) if super_ != NO_INDEX else None
        self.interfaces = typeList(dex, st.u32(), parseClsDesc=True)
        _ = st.u32()
        _ = st.u32()
        self.data_off = st.u32()
        self.data = None  # parse data lazily in parseData()
        self.constant_values_off = st.u32()

        # offset = base_off // 4 + i * 8
        # words = dex.u32s[offset:offset + 8]
        # self.name = dex.clsType(words[0])
        # self.access = words[1]
        # self.super = dex.clsType(words[2]) if words[2] != NO_INDEX else None
        # self.interfaces = typeList(dex, words[3], parseClsDesc=True)
        # # ignore sourcefile for now
        # # ignore annotations for now
        # self.data_off = words[6]
        # self.data = None  # parse data lazily in parseData()
        # self.constant_values_off = words[7]

[docs]    def parseData(self):
        if self.data is None:
            self.data = ClassData(self.dex, self.data_off)
            if self.constant_values_off:
                stream = self.dex.stream(self.constant_values_off)
                for field in self.data.fields[:stream.uleb128()]:
                    field.constant_value = encodedValue(self.dex, stream)
            # if self.constant_values_off:
            #     stream = self.dex.stream(self.constant_values_off)
            #     size = stream.uleb128()
            #     constant_vals = [encodedValue(self.dex, stream)
            #                      for _ in range(size)]
            #     for field, val in zip(self.data.fields, constant_vals):
            #         field.constant_value = val


[docs]class SizeOff:

    def __init__(self, stream):
        self.size = stream.u32()
        self.off = stream.u32()


[docs]class DexFile:

    def __init__(self, data, flag=True):
        self.raw = data
        self.u16s = array.array('H', data[:len(data) & ~1])
        assert(self.u16s.itemsize == 2)
        self.u32s = array.array('I', data[:len(data) & ~3])
        assert(self.u32s.itemsize == 4)

        stream = Reader(data)

        # parse header
        # magic = stream.read(4)  # magic
        # magic_vers = stream.read(4)  # magic_vers
        # checksum = stream.u32()  # adler32 checksum
        # import binascii
        # sha1 = binascii.b2a_hex(stream.read(20)).decode('utf-8')
        stream.read(32)  # skip 32(magic, magic_vers, checksum, sha1)

        if stream.u32() != len(self.raw):
            print('Warning, unexpected file size!')

        if stream.u32() != 0x70:
            print('Warning, unexpected header size!')

        if stream.u32() != 0x12345678:
            print('Warning, unexpected endianess tag!')

        self.link = SizeOff(stream)
        self.map_off = stream.u32()
        self.string_ids = SizeOff(stream)
        self.type_ids = SizeOff(stream)
        self.proto_ids = SizeOff(stream)
        self.field_ids = SizeOff(stream)
        self.method_ids = SizeOff(stream)
        self.class_defs = SizeOff(stream)
        self.data = SizeOff(stream)

        if flag:  # parse dex class
            defs = self.class_defs
            self.classes = []
            for i in range(defs.size):
                self.classes.append(DexClass(self, defs.off, i))

[docs]    def stream(self, offset):
        return Reader(self.raw, offset)

[docs]    def string(self, i):
        # data_off = self.u32s[self.string_ids.off // 4 + i]
        data_off = self.stream(self.string_ids.off + i * 4).u32()
        stream = self.stream(data_off)
        stream.uleb128()  # ignore decoded length
        return stream.readCStr()

[docs]    def type(self, i):
        if 0 <= i < NO_INDEX:
            # return self.string(self.u32s[self.type_ids.off // 4 + i])
            str_idx = self.stream(self.type_ids.off + i * 4).u32()
            return self.string(str_idx)

[docs]    def clsType(self, i):
        # Can be either class _name_ or array _descriptor_
        desc = self.type(i)
        if desc.startswith(b'['):
            return desc
        elif desc.startswith(b'L'):
            return desc[1:-1]

[docs]    def field_id(self, i):
        return FieldId(self, i)

[docs]    def method_id(self, i):
        return MethodId(self, i)
Source code for apkutils.dex.dexparser

apkutils

Navigation

Related Topics