Skip to content

Commit a6deada

Browse files
committed
Potential fix for #2: Treating unknown data as THUMB code, not bytes.
1 parent 735212b commit a6deada

File tree

4 files changed

+45
-27
lines changed

4 files changed

+45
-27
lines changed

README.md

+15
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ Luvdis is a tool for disassembling GBA ROMs. Features include:
1919
- [From Releases](#from-releases)
2020
- [From latest source](#from-latest-source)
2121
- [Usage](#usage)
22+
- [FAQ](#faq)
2223
- [Options](#options)
2324
- [ROM detection](#rom-detection)
2425

@@ -73,6 +74,17 @@ To disassemble only part of a ROM, say, up to the start of read-only data, provi
7374
$ luvdis rom.gba --start 0x0800024C --stop 0x0x81b32b4 -o rom.s
7475
```
7576

77+
### FAQ
78+
79+
#### How can I get rid of large blocks of raw bytes in the disassembly?
80+
81+
By default, Luvdis treats areas of a ROM that it can't determine are executable as byte data. You can change this behavior
82+
with the `default_mode` option:
83+
84+
```sh
85+
$ luvdis rom.gba --default_mode THUMB -o rom.s
86+
```
87+
7688
### Options
7789

7890
```
@@ -105,6 +117,9 @@ Options:
105117
--min-length INTEGER RANGE Minimum valid instruction length required in
106118
order to 'guess' a function. Must be at least 1,
107119
defaults to 3.
120+
--default-mode [THUMB|BYTE|WORD]
121+
Default disassembly mode when the nature of
122+
an address is unknown. Defaults to 'BYTE'.
108123
--help Show this message and exit.
109124
```
110125

luvdis/__init__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,6 @@
22
33
Copyright (C) 2020 A. Antonitis. Licensed under the MIT license.
44
"""
5-
__version__ = '0.6.1'
5+
__version__ = '0.7.0'
66
__doc__ = __doc__.replace('__version__', __version__)
77
url = __url__ = 'https://github.com/arantonitis/luvdis'

luvdis/__main__.py

+11-7
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from luvdis.config import read_config
88
from luvdis.common import eprint, set_debug
99
from luvdis.rom import ROM
10-
from luvdis.analyze import State, BASE_ADDRESS, END_ADDRESS
10+
from luvdis.analyze import State, BASE_ADDRESS, END_ADDRESS, THUMB, BYTE, WORD
1111

1212

1313
class AddressInt(click.ParamType):
@@ -31,6 +31,7 @@ def convert(self, value, param, ctx):
3131

3232

3333
ADDRESS_INT = AddressInt()
34+
MODE_MAP = {'byte': BYTE, 'thumb': THUMB, 'word': WORD}
3435

3536

3637
@click.group(cls=DefaultGroup, default='disasm', default_if_no_args=True)
@@ -47,7 +48,7 @@ def main():
4748
'output path.')
4849
@click.option('-c', '--config', type=click.Path(exists=True, dir_okay=False, readable=True),
4950
help='Function configuration file.')
50-
@click.option('-co', '--config-out', 'config_out', type=click.Path(writable=True, dir_okay=False),
51+
@click.option('-co', '--config-out', type=click.Path(writable=True, dir_okay=False),
5152
help="Output configuration. If any functions are 'guessed' by Luvdis, they will appear here.")
5253
@click.option('-D', '--debug', is_flag=True, help='Turn on/off debugging behavior.')
5354
@click.option('--start', type=ADDRESS_INT, default=BASE_ADDRESS,
@@ -58,15 +59,18 @@ def main():
5859
help="Assembler macro file to '.include' in disassembly. If not specified, default macros are embedded.")
5960
@click.option('--guess/--no-guess', default=True,
6061
help='Turn on/off function guessing & discovery. Default is to perform guessing.')
61-
@click.option('--min-calls', 'min_calls', type=click.IntRange(1), default=2,
62+
@click.option('--min-calls', type=click.IntRange(1), default=2,
6263
help="Minimum number of calls to a function required in order to 'guess' it. Must be at least 1, "
6364
"defaults to 2.")
64-
@click.option('--min-length', 'min_length', type=click.IntRange(1), default=3,
65+
@click.option('--min-length', type=click.IntRange(1), default=3,
6566
help="Minimum valid instruction length required in order to 'guess' a function. Must be at least 1, "
6667
"defaults to 3.")
67-
def disasm(rom, output, config, config_out, debug, start, stop, macros, guess, min_calls, min_length, **kwargs):
68+
@click.option('--default-mode', type=click.Choice(('THUMB', 'BYTE', 'WORD'), case_sensitive=False), default='BYTE',
69+
help="Default disassembly mode when the nature of an address is unknown. Defaults to 'BYTE'.")
70+
def disasm(rom, output, config, config_out, debug, start, stop, macros, guess, min_calls, min_length, default_mode,
71+
**kw):
6872
""" Analyze and disassemble a GBA ROM. """
69-
for k, v in kwargs.items():
73+
for k, v in kw.items():
7074
print(k, v)
7175
set_debug(debug)
7276
functions = read_config(config) if config else None
@@ -76,7 +80,7 @@ def disasm(rom, output, config, config_out, debug, start, stop, macros, guess, m
7680
if output in (None, '-'):
7781
output = None
7882
eprint(f'No output file specified. Printing to stdout.')
79-
state.dump(rom, output, config_out)
83+
state.dump(rom, output, config_out, default_mode=MODE_MAP[default_mode.lower()])
8084

8185

8286
@main.command(name='info')

luvdis/analyze.py

+18-19
Original file line numberDiff line numberDiff line change
@@ -382,7 +382,7 @@ def __init__(self, functions=None, min_calls=2, min_length=3, start=BASE_ADDRESS
382382
else:
383383
name = value
384384
self.unexpanded[addr] = name
385-
self.functions = {} # addr -> (name, end)
385+
self.functions = {} # addr -> (name, end_address)
386386
self.not_funcs = set()
387387
self.min_calls, self.min_length, self.start, self.stop = min_calls, min_length, start, stop
388388
self.macros = macros
@@ -468,7 +468,7 @@ def analyze_func(self, rom, addr, state=None):
468468
exit_behaved = False
469469
end = rom.size | 0x08000000 if ins is None else ins.address
470470
break
471-
elif ins.id == Opcode.ldr: # Mark target as WORD
471+
elif ins.id == Opcode.ldr: # Mark load target as WORD
472472
end = addr = ins.address+2
473473
target = ins.target
474474
if target < self.stop: # TODO: Is this necessary?
@@ -559,25 +559,27 @@ def label_for(self, addr):
559559
return name
560560
return f'_{addr:08X}'
561561

562-
def dump(self, rom, path=None, config_output=None):
563-
if config_output: # Optionally, write updated function list
562+
def dump(self, rom, path=None, config_output=None, default_mode=BYTE):
563+
if config_output: # Optionally write updated function list
564564
addr_map = {addr: (name, self.module_addrs.get(addr, None)) for addr, (name, _) in self.functions.items()}
565565
write_config(addr_map, config_output)
566566
# Setup initial module & file
567567
folder, module = os.path.split(path) if path else (None, None)
568-
if DEBUG and path: # Output function range info if debugging
568+
if DEBUG and path:
569569
import pickle
570+
# Output function range info if debugging
570571
with open(os.path.join(folder, 'funcs.pickle'), 'wb') as f:
571572
pickle.dump(self.debug_ranges, f)
573+
# Also output a linker script
572574
fl = open('luvdis.ld', 'w')
573575
f = None if path else sys.stdout
574576
# Setup start and end addresses
575577
addr = self.start
576-
if type(self.stop) is float: # End is the final address in the ROM
578+
if type(self.stop) is float: # End at the final address in the ROM
577579
end = rom.size | BASE_ADDRESS
578580
else:
579581
end = min(rom.size, self.stop & 0xffffff) | BASE_ADDRESS
580-
if addr not in self.module_addrs and module: # Set module of initial address to the path output
582+
if addr not in self.module_addrs and module: # Mark the very first address as belonging to the initial module
581583
self.module_addrs[addr] = module
582584
mode, flags, bytecount = BYTE, 0, 0
583585
# Initialize progress bar & messages
@@ -594,8 +596,8 @@ def warn(*args):
594596
old_mode = mode
595597

596598
# Switch output modes
597-
if addr_flags == 0 and flags != 0: # Switch to byte mode when address flags are zero
598-
mode = BYTE
599+
if addr_flags == 0 and flags != 0: # Switch to default mode when address flags are zero
600+
mode = default_mode # By default, BYTE mode
599601
elif addr_flags & FLAG_EXEC and not (flags & FLAG_EXEC): # Output code
600602
mode = THUMB
601603
elif addr_flags & FLAG_WORD and not (flags & FLAG_WORD) and not (addr_flags & FLAG_EXEC): # Output words
@@ -638,7 +640,7 @@ def warn(*args):
638640
# Switch module output
639641
if f is not sys.stdout and addr in self.module_addrs: # Address has module info
640642
new_module = self.module_addrs[addr]
641-
if new_module != module or f is None: # New/first module seen
643+
if new_module != module or f is None: # Entering new/first module
642644
module = new_module
643645
path = os.path.join(folder, module)
644646
eprint(f"{addr:08X}: module '{path}'")
@@ -651,12 +653,12 @@ def warn(*args):
651653
f = open(path, 'w', buffering=1)
652654
f.write(ASM_PRELUDE)
653655
f.write(f'.include "{self.macros}"\n' if self.macros else MACROS)
654-
bytecount = 0 # Reset byte bytecount
655-
if DEBUG: # Output link script if debugging
656+
bytecount = 0 # Reset bytecount
657+
if DEBUG: # Output linker script if debugging
656658
fl.write(f'{path[:-2]}.o(.text);\n')
657659

658660
# Emit code or data
659-
if mode == THUMB:
661+
if mode == THUMB: # THUMB code
660662
offset = ins.size
661663
if ins.id == Opcode.bl or ins.id in BRANCHES:
662664
target = ins.target
@@ -672,7 +674,7 @@ def warn(*args):
672674
emit = f'.2byte 0x{i:04X} @ {ins.mnemonic} _{target:08X}'
673675
elif ins.id == Opcode.bx:
674676
value = rom.read(addr, 2)
675-
# Assembler cannot emit bx with nonzero rd, see THUMB.5 TODO: Should these be illegal?
677+
# Assembler will not emit bx with nonzero rd, see THUMB.5 TODO: Should these be treated as illegal?
676678
emit = f'.inst 0x{value:04X}' if value & 3 != 0 else str(ins)
677679
elif ins.id == Opcode.ldr and isinstance(ins, Thumb6): # Convert PC-relative loads into labels
678680
target = ins.target
@@ -700,10 +702,7 @@ def warn(*args):
700702
value = self.label_for(value-1)
701703
else:
702704
value = f'0x{value:08X}'
703-
if label:
704-
emit = f'{label} .4byte {value}'
705-
else:
706-
emit = f'\t.4byte {value}'
705+
emit = f'{label} .4byte {value}' if label else f'\t.4byte {value}'
707706
if DEBUG:
708707
comment += f' @ {addr_flags}'
709708
f.write(f'{emit}{comment}\n')
@@ -729,7 +728,7 @@ def warn(*args):
729728
flags = addr_flags
730729
addr += offset
731730
bar.update(offset)
732-
# Close current module
731+
# Done with output; close file handles and cleanup
733732
if f is not sys.stdout and f:
734733
if bytecount:
735734
f.write('\n')

0 commit comments

Comments
 (0)