Source code for apkutils.dex.dalvik

# Copyright 2015 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from . import dalvikformats, util


[docs]class DalvikInstruction: def __init__(self, type_, pos, newpos, opcode, args): self.type = type_ self.pos = pos self.pos2 = newpos self.opcode = opcode self.args = args self.implicit_casts = None self.prev_result = None # for move-result/exception self.fillarrdata = None self.switchdata = None
_it = iter(range(999)) Nop = next(_it) Move = next(_it) MoveWide = next(_it) MoveResult = next(_it) Return = next(_it) Const32 = next(_it) Const64 = next(_it) ConstString = next(_it) ConstClass = next(_it) MonitorEnter = next(_it) MonitorExit = next(_it) CheckCast = next(_it) InstanceOf = next(_it) ArrayLen = next(_it) NewInstance = next(_it) NewArray = next(_it) FilledNewArray = next(_it) FillArrayData = next(_it) Throw = next(_it) Goto = next(_it) Switch = next(_it) Cmp = next(_it) If = next(_it) IfZ = next(_it) ArrayGet = next(_it) ArrayPut = next(_it) InstanceGet = next(_it) InstancePut = next(_it) StaticGet = next(_it) StaticPut = next(_it) # Invoke = next(_it) InvokeVirtual = next(_it) InvokeSuper = next(_it) InvokeDirect = next(_it) InvokeStatic = next(_it) InvokeInterface = next(_it) # actual ops for these are defined in jvm/mathops.py UnaryOp = next(_it) BinaryOp = next(_it) BinaryOpConst = next(_it) INVOKE_TYPES = InvokeVirtual, InvokeSuper, InvokeDirect, InvokeStatic, InvokeInterface # instructions which Dalvik considers to throw THROW_TYPES = INVOKE_TYPES + (ConstString, ConstClass, MonitorEnter, MonitorExit, CheckCast, InstanceOf, ArrayLen, NewArray, NewInstance, FilledNewArray, FillArrayData, Throw, ArrayGet, ArrayPut, InstanceGet, InstancePut, StaticGet, StaticPut, BinaryOp, BinaryOpConst) # last two only if it is int/long div or rem # ignore the possiblity of linkage errors (i.e. constants and instanceof can't throw) # in theory MonitorExit can't throw either due to the structured locking checks, but these are broken and work inconsistently PRUNED_THROW_TYPES = INVOKE_TYPES + (MonitorEnter, MonitorExit, CheckCast, ArrayLen, NewArray, NewInstance, FilledNewArray, FillArrayData, Throw, ArrayGet, ArrayPut, InstanceGet, InstancePut, StaticGet, StaticPut, BinaryOp, BinaryOpConst) OPCODES = util.keysToRanges({ 0x00: Nop, 0x01: Move, 0x04: MoveWide, 0x07: Move, 0x0a: MoveResult, 0x0e: Return, 0x12: Const32, 0x16: Const64, 0x1a: ConstString, 0x1c: ConstClass, 0x1d: MonitorEnter, 0x1e: MonitorExit, 0x1f: CheckCast, 0x20: InstanceOf, 0x21: ArrayLen, 0x22: NewInstance, 0x23: NewArray, 0x24: FilledNewArray, 0x26: FillArrayData, 0x27: Throw, 0x28: Goto, 0x2b: Switch, 0x2d: Cmp, 0x32: If, 0x38: IfZ, 0x3e: Nop, # unused 0x44: ArrayGet, 0x4b: ArrayPut, 0x52: InstanceGet, 0x59: InstancePut, 0x60: StaticGet, 0x67: StaticPut, 0x6e: InvokeVirtual, 0x6f: InvokeSuper, 0x70: InvokeDirect, 0x71: InvokeStatic, 0x72: InvokeInterface, 0x73: Nop, # unused 0x74: InvokeVirtual, 0x75: InvokeSuper, 0x76: InvokeDirect, 0x77: InvokeStatic, 0x78: InvokeInterface, 0x79: Nop, # unused 0x7b: UnaryOp, 0x90: BinaryOp, 0xd0: BinaryOpConst, 0xe3: Nop, # unused }, 256)
[docs]def parseInstruction(dex, insns_start_pos, shorts, pos): word = shorts[pos] opcode = word & 0xFF newpos, args = dalvikformats.decode(shorts, pos, opcode) # parse special data instructions switchdata = None fillarrdata = None if word == 0x100 or word == 0x200: # switch size = shorts[pos + 1] st = dex.stream(insns_start_pos + pos * 2 + 4) if word == 0x100: # packed first_key = st.u32() targets = [st.u32() for _ in range(size)] newpos = pos + 2 + (1 + size) * 2 switchdata = {(i + first_key): x for i, x in enumerate(targets)} else: # sparse keys = [st.u32() for _ in range(size)] targets = [st.u32() for _ in range(size)] newpos = pos + 2 + (size + size) * 2 switchdata = dict(zip(keys, targets)) if word == 0x300: width = shorts[pos + 1] % 16 size = shorts[pos + 2] ^ (shorts[pos + 3] << 16) newpos = pos + ((size * width + 1) // 2 + 4) # get array data stream = dex.stream(insns_start_pos + pos * 2 + 8) func = { 1: stream.u8, 2: stream.u16, 4: stream.u32, 8: stream.u64 }[width] fillarrdata = width, [func() for _ in range(size)] # warning, this must go below the special data handling that calculates newpos instruction = DalvikInstruction(OPCODES[opcode], pos, newpos, opcode, args) instruction.fillarrdata = fillarrdata instruction.switchdata = switchdata return newpos, instruction
[docs]def parseBytecode(dex, insns_start_pos, shorts, catch_addrs): ops = [] pos = 0 while pos < len(shorts): pos, op = parseInstruction(dex, insns_start_pos, shorts, pos) ops.append(op) # Fill in data for move-result for instr, instr2 in zip(ops, ops[1:]): if not instr2.type == MoveResult: continue if instr.type in INVOKE_TYPES: called_id = dex.method_id(instr.args[0]) if called_id.return_type != b'V': instr2.prev_result = called_id.return_type elif instr.type == FilledNewArray: instr2.prev_result = dex.type(instr.args[0]) elif instr2.pos in catch_addrs: instr2.prev_result = b'Ljava/lang/Throwable;' # assert 0 not in catch_addrs # Fill in implicit cast data for i, instr in enumerate(ops): if instr.opcode in (0x38, 0x39): # if-eqz, if-nez if i > 0 and ops[i - 1].type == InstanceOf: prev = ops[i - 1] desc_ind = prev.args[2] regs = {prev.args[1]} if i > 1 and ops[i - 2].type == Move: prev2 = ops[i - 2] if prev2.args[0] == prev.args[1]: regs.add(prev2.args[1]) # Don't cast result of instanceof if it overwrites the input regs.discard(prev.args[0]) if regs: instr.implicit_casts = desc_ind, sorted(regs) return ops