Skip to content

Unicorn 2: Hooks don't take effect on code which has already been executed. #2200

Open
@gerph

Description

@gerph

Problem

Hooks can be set at any time. However, If the hooks are set after code has been executed within their region, they are not honoured. The failure case is:

  • You run some code.
  • You set code hooks for the region that has just executed
  • You run the same code again.

The second pass through the code never triggers the hooks.

I presume this is related to the already translated code not being invalidated when the hooks are set.

This used to work on Unicorn 1, but does not on Unicorn 2.

Example

Code

This demonstration code is Python. It will:

  • Write some code which does a very rudimentary string to integer conversion to memory.
  • It calls the code, and you can see that the right answer is received (12345) and there is no hook output (because no hooks are set).
  • Then it repeats this process with hooks set for the code and blocks
  • In the Unicorn 1 case, the hooks are called.
  • In the Unicorn 2 case, the hooks are not called.
#!/usr/bin/env python
# Sample code for ARM64 of Unicorn. Nguyen Anh Quynh <[email protected]>
# Python sample ported by Loi Anh Tuan <[email protected]>

from __future__ import print_function

import binascii
import re

from unicorn import *
from unicorn.arm64_const import *


# code to be emulated
ARM64_CODE = """\
0000005c : a9bf7bfd : .{.. : STP     x29, x30, [sp, #-&10]!
00000060 : 910003fd : .... : MOV     x29, sp
00000064 : d2800143 : C... : MOVZ    x3, #&a
00000068 : aa1f03e2 : .... : MOV     x2, xzr
0000006c : 38401401 : ..@8 : LDRB    w1, [x0], #1
00000070 : d100c021 : !... : SUB     x1, x1, #&30              ; #48  = '0'
00000074 : f100283f : ?(.. : CMP     x1, #&a
00000078 : 54000083 : ...T : B.LO    &00000088
0000007c : d1001c21 : !... : SUB     x1, x1, #7
00000080 : f100403f : ?@.. : CMP     x1, #&10                  ; #16 = bit 4
00000084 : 54000082 : ...T : B.HS    &00000094
00000088 : 9b037c42 : B|.. : MUL     x2, x2, x3
0000008c : 8b010042 : B... : ADD     x2, x2, x1
00000090 : 17fffff7 : .... : B       &0000006c
00000094 : aa0203e0 : .... : MOV     x0, x2
00000098 : a8c17bfd : .{.. : LDP     x29, x30, [sp], #&10
0000009c : d65f03c0 : .._. : RET
"""


# memory address where emulation starts
ADDRESS    = 0x10000

code_strings = {}


# callback for tracing basic blocks
def hook_block(uc, address, size, user_data):
    print(">>> Tracing basic block at 0x%x, block size = 0x%x" %(address, size))


# callback for tracing instructions
def hook_code(uc, address, size, user_data):
    print(">>> Tracing instruction at 0x%x, instruction size = 0x%x : %s" %(address, size, code_strings.get(address, '?')))


# Test ARM64
def test_arm64():
    print("Emulate ARM64 code")
    try:
        # Initialize emulator in ARM mode
        mu = Uc(UC_ARCH_ARM64, UC_MODE_ARM)

        def hook_intr(uc, intno, data):
            print("hook_intr: intno = %s, data = %r" % (intno, data))
            uc.emu_stop()

        mu.hook_add(UC_HOOK_INTR, hook_intr)

        # map 2MB memory for this emulation
        mu.mem_map(ADDRESS, 2 * 1024 * 1024)

        # Extract everything from the code dump
        extract_re = re.compile('^[0-9a-f]{8} : ([0-9a-f]{8}) : .... : (.*)')
        for index, line in enumerate(ARM64_CODE.splitlines()):
            match = extract_re.search(line)
            if match:
                value = bytearray(reversed(binascii.unhexlify(match.group(1))))
                addr = ADDRESS + (4*index)
                code_strings[addr] = match.group(2)
                #print("&%08x : value %s = %r" % (addr, match.group(1), value))
                mu.mem_write(addr, bytes(value))

        for passtype in ('no-hook', 'hooks'):
            print("------ PASS : %s ------" % (passtype,))

            if passtype == 'hooks':
                # tracing all basic blocks with customized callback
                mu.hook_add(UC_HOOK_BLOCK, hook_block)

                # tracing one instruction with customized callback
                mu.hook_add(UC_HOOK_CODE, hook_code, begin=ADDRESS, end=ADDRESS + 2*1024*1024)

            # initialize machine registers
            mu.reg_write(UC_ARM64_REG_X0, ADDRESS + 0x5000)
            mu.mem_write(ADDRESS + 0x5000, b'12345 ')

            mu.reg_write(UC_ARM64_REG_SP, ADDRESS + 0x8000)
            mu.reg_write(UC_ARM64_REG_LR, ADDRESS + 0x8000)

            # emulate machine code in infinite time
            mu.emu_start(ADDRESS, ADDRESS + 0x8000)

            # now print out some registers
            print(">>> Emulation done.")

            x0 = mu.reg_read(UC_ARM64_REG_X0)
            print(">>> X0 = 0x%08x = %i" % (x0, x0))

    except UcError as e:
        print("ERROR: %s" % e)

if __name__ == '__main__':
    test_arm64()
    print("=" * 26)

Execution on Unicorn 1

Using Unicorn 1.0.3, installed from pip.

charles@mooncake ~/projects/RO/pyromaniac (master)> python check_hook.py
Emulate ARM64 code
------ PASS : no-hook ------
>>> Emulation done.
>>> X0 = 0x00003039 = 12345
------ PASS : hooks ------
>>> Tracing basic block at 0x10000, block size = 0x20
>>> Tracing basic block at 0x10000, block size = 0x20
>>> Tracing instruction at 0x10000, instruction size = 0x4 : STP     x29, x30, [sp, #-&10]!
>>> Tracing instruction at 0x10004, instruction size = 0x4 : MOV     x29, sp
>>> Tracing instruction at 0x10008, instruction size = 0x4 : MOVZ    x3, #&a
>>> Tracing instruction at 0x1000c, instruction size = 0x4 : MOV     x2, xzr
>>> Tracing instruction at 0x10010, instruction size = 0x4 : LDRB    w1, [x0], #1
>>> Tracing instruction at 0x10014, instruction size = 0x4 : SUB     x1, x1, #&30              ; #48  = '0'
>>> Tracing instruction at 0x10018, instruction size = 0x4 : CMP     x1, #&a
>>> Tracing instruction at 0x1001c, instruction size = 0x4 : B.LO    &00000088
>>> Tracing basic block at 0x1002c, block size = 0xc
>>> Tracing instruction at 0x1002c, instruction size = 0x4 : MUL     x2, x2, x3
>>> Tracing instruction at 0x10030, instruction size = 0x4 : ADD     x2, x2, x1
>>> Tracing instruction at 0x10034, instruction size = 0x4 : B       &0000006c
>>> Tracing basic block at 0x10010, block size = 0x10
>>> Tracing instruction at 0x10010, instruction size = 0x4 : LDRB    w1, [x0], #1
>>> Tracing instruction at 0x10014, instruction size = 0x4 : SUB     x1, x1, #&30              ; #48  = '0'
>>> Tracing instruction at 0x10018, instruction size = 0x4 : CMP     x1, #&a
>>> Tracing instruction at 0x1001c, instruction size = 0x4 : B.LO    &00000088
>>> Tracing basic block at 0x1002c, block size = 0xc
>>> Tracing instruction at 0x1002c, instruction size = 0x4 : MUL     x2, x2, x3
>>> Tracing instruction at 0x10030, instruction size = 0x4 : ADD     x2, x2, x1
>>> Tracing instruction at 0x10034, instruction size = 0x4 : B       &0000006c
>>> Tracing basic block at 0x10010, block size = 0x10
>>> Tracing instruction at 0x10010, instruction size = 0x4 : LDRB    w1, [x0], #1
>>> Tracing instruction at 0x10014, instruction size = 0x4 : SUB     x1, x1, #&30              ; #48  = '0'
>>> Tracing instruction at 0x10018, instruction size = 0x4 : CMP     x1, #&a
>>> Tracing instruction at 0x1001c, instruction size = 0x4 : B.LO    &00000088
>>> Tracing basic block at 0x1002c, block size = 0xc
>>> Tracing instruction at 0x1002c, instruction size = 0x4 : MUL     x2, x2, x3
>>> Tracing instruction at 0x10030, instruction size = 0x4 : ADD     x2, x2, x1
>>> Tracing instruction at 0x10034, instruction size = 0x4 : B       &0000006c
>>> Tracing basic block at 0x10010, block size = 0x10
>>> Tracing instruction at 0x10010, instruction size = 0x4 : LDRB    w1, [x0], #1
>>> Tracing instruction at 0x10014, instruction size = 0x4 : SUB     x1, x1, #&30              ; #48  = '0'
>>> Tracing instruction at 0x10018, instruction size = 0x4 : CMP     x1, #&a
>>> Tracing instruction at 0x1001c, instruction size = 0x4 : B.LO    &00000088
>>> Tracing basic block at 0x1002c, block size = 0xc
>>> Tracing instruction at 0x1002c, instruction size = 0x4 : MUL     x2, x2, x3
>>> Tracing instruction at 0x10030, instruction size = 0x4 : ADD     x2, x2, x1
>>> Tracing instruction at 0x10034, instruction size = 0x4 : B       &0000006c
>>> Tracing basic block at 0x10010, block size = 0x10
>>> Tracing instruction at 0x10010, instruction size = 0x4 : LDRB    w1, [x0], #1
>>> Tracing instruction at 0x10014, instruction size = 0x4 : SUB     x1, x1, #&30              ; #48  = '0'
>>> Tracing instruction at 0x10018, instruction size = 0x4 : CMP     x1, #&a
>>> Tracing instruction at 0x1001c, instruction size = 0x4 : B.LO    &00000088
>>> Tracing basic block at 0x1002c, block size = 0xc
>>> Tracing instruction at 0x1002c, instruction size = 0x4 : MUL     x2, x2, x3
>>> Tracing instruction at 0x10030, instruction size = 0x4 : ADD     x2, x2, x1
>>> Tracing instruction at 0x10034, instruction size = 0x4 : B       &0000006c
>>> Tracing basic block at 0x10010, block size = 0x10
>>> Tracing instruction at 0x10010, instruction size = 0x4 : LDRB    w1, [x0], #1
>>> Tracing instruction at 0x10014, instruction size = 0x4 : SUB     x1, x1, #&30              ; #48  = '0'
>>> Tracing instruction at 0x10018, instruction size = 0x4 : CMP     x1, #&a
>>> Tracing instruction at 0x1001c, instruction size = 0x4 : B.LO    &00000088
>>> Tracing basic block at 0x10020, block size = 0xc
>>> Tracing instruction at 0x10020, instruction size = 0x4 : SUB     x1, x1, #7
>>> Tracing instruction at 0x10024, instruction size = 0x4 : CMP     x1, #&10                  ; #16 = bit 4
>>> Tracing instruction at 0x10028, instruction size = 0x4 : B.HS    &00000094
>>> Tracing basic block at 0x10038, block size = 0xc
>>> Tracing instruction at 0x10038, instruction size = 0x4 : MOV     x0, x2
>>> Tracing instruction at 0x1003c, instruction size = 0x4 : LDP     x29, x30, [sp], #&10
>>> Tracing instruction at 0x10040, instruction size = 0x4 : RET
>>> Emulation done.
>>> X0 = 0x00003039 = 12345
==========================

Execution on Unicorn 2

Using Unicorn 2.1.3, installed from pip.

Emulate ARM64 code
------ PASS : no-hook ------
>>> Emulation done.
>>> X0 = 0x00003039 = 12345
------ PASS : hooks ------
>>> Emulation done.
>>> X0 = 0x00003039 = 12345
==========================

The second call pass doesn't trigger the hooks.

Expected behaviour

I would expect that the addition of hooks take immediate effect, and not to need any manual invalidation of code regions, etc. If this is required, it might need to be documented - it was not an expected change in behaviour.

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions