|
| 1 | +#!/usr/bin/env python3 |
| 2 | +# Written by W.J. van der Laan, provided under MIT license. |
| 3 | +# |
| 4 | +# Usage: ../do_build.py <hash> [<hash> ...] |
| 5 | +# Will produce a ../bitcoind.$1.stripped for binary comparison |
| 6 | +import os,subprocess,sys,argparse,logging,shutil,re,hashlib |
| 7 | +from collections import defaultdict |
| 8 | + |
| 9 | +logger = logging.getLogger('do_build') |
| 10 | +# Use this command to compare resulting directories |
| 11 | +# git diff -W --word-diff /tmp/compare/4b5b263 /tmp/compare/d1bc5bf |
| 12 | + |
| 13 | +# WARNING WARNING WARNING |
| 14 | +# DO NOT RUN this on working tree if you have any local additions, it will nuke all non-repository files, multiple times over. |
| 15 | +# Ideally this would close a git tree first to a temporary directory. Suffice to say, it doesn't. |
| 16 | +# WARNING WARNING WARNING |
| 17 | + |
| 18 | +CONFIGURE_EXTRA=[ |
| 19 | +'EVENT_CFLAGS=-I/opt/libevent/include', |
| 20 | +'EVENT_LIBS=-L/opt/libevent/lib -levent', |
| 21 | +'EVENT_PTHREADS_CFLAGS=-I/opt/libevent/include', |
| 22 | +'EVENT_PTHREADS_LIBS=-L/opt/libevent/lib -levent_pthreads' |
| 23 | +] |
| 24 | +DEFAULT_PARALLELISM=4 |
| 25 | + |
| 26 | +# No debugging information (not used by analysis at the moment, saves on I/O) |
| 27 | +OPTFLAGS=["-O0","-g0"] |
| 28 | +# Some options from -O to reduce code size |
| 29 | +# can't use -O or -Os as it does some weird cross-contamination between unchanged functions in compilation unit |
| 30 | +# Selectively enable opts that don't interfere or cause excessive sensitivity to changes |
| 31 | +# |
| 32 | +OPTFLAGS+=["-fcombine-stack-adjustments","-fcompare-elim","-fcprop-registers","-fdefer-pop","-fforward-propagate","-fif-conversion","-fif-conversion2", |
| 33 | + "-finline-functions-called-once","-fshrink-wrap","-fsplit-wide-types","-ftree-bit-ccp","-ftree-ccp","-ftree-ch","-ftree-copy-prop","-ftree-copyrename", |
| 34 | + "-ftree-dce","-ftree-dominator-opts","-ftree-dse","-ftree-fre","-ftree-sink","-ftree-slsr","-ftree-sra","-ftree-ter" |
| 35 | +] |
| 36 | +# |
| 37 | +# -ffunctions-sections/-fdata-sections put every element in its own section. This is essential. |
| 38 | +OPTFLAGS+=['-ffunction-sections', '-fdata-sections'] |
| 39 | +# Fix the random seed |
| 40 | +OPTFLAGS+=['-frandom-seed=notsorandom'] |
| 41 | +# OFF: -fmerge-constants don't attempt to merge constants: this causes global interaction between sections/functions |
| 42 | +# this was reenabled because it doesn't matter, the numbered section names are annoying merged or unmerged |
| 43 | +OPTFLAGS+=['-fmerge-all-constants'] |
| 44 | +# -fipa-sra semi-randomly renames functions (or creates variants of functions with different names( |
| 45 | +OPTFLAGS+=['-fno-ipa-sra'] |
| 46 | +# -freorder-functions moves functions to .unlikely .hot sections |
| 47 | +OPTFLAGS+=['-fno-reorder-functions'] |
| 48 | +# no interprocedural optimizations |
| 49 | +# -fno-ipa-profile -fno-ipa-pure-const -fno-ipa-reference -fno-guess-branch-probability -fno-ipa-cp |
| 50 | + |
| 51 | +CPPFLAGS=[] |
| 52 | +# Prevent __LINE__ from messing with things |
| 53 | +#CPPFLAGS+=["-D__LINE__=0","-D__DATE__=\"\""] #-D__COUNTER__=0" |
| 54 | +# XXX unfortunately this approach does not work thanks to boost. |
| 55 | + |
| 56 | +# objcopy: strip all symbols, debug info, and the hash header section |
| 57 | +OBJCOPY_ARGS=['-R.note.gnu.build-id','-g','-S'] |
| 58 | +OBJDUMP_ARGS=['-C','--no-show-raw-insn','-d','-r'] |
| 59 | + |
| 60 | +# These can be overridden from the environment |
| 61 | +GIT=os.getenv('GIT', 'git') |
| 62 | +MAKE=os.getenv('MAKE', 'make') |
| 63 | +OBJCOPY=os.getenv('OBJCOPY', 'objcopy') |
| 64 | +OBJDUMP=os.getenv('OBJDUMP', 'objdump') |
| 65 | +OBJEXT=os.getenv('OBJEXT', '.o') # object file extension |
| 66 | + |
| 67 | +TGTDIR='/tmp/compare' |
| 68 | +PYDIR=os.path.dirname(os.path.abspath(__file__)) |
| 69 | +PATCHDIR=os.path.join(PYDIR,'patches') |
| 70 | + |
| 71 | +def init_logging(): |
| 72 | + LOG_PREFMT = { |
| 73 | + (logging.DEBUG, '\x1b[38;5;239m[%(name)-8s]\x1b[0m %(message)s\x1b[0m'), |
| 74 | + (logging.INFO, '\x1b[38;5;19m>\x1b[38;5;18m>\x1b[38;5;17m> \x1b[38;5;239m[%(name)-8s]\x1b[0m %(message)s\x1b[0m'), |
| 75 | + (logging.WARNING, '\x1b[38;5;228m>\x1b[38;5;227m>\x1b[38;5;226m> \x1b[38;5;239m[%(name)-8s]\x1b[38;5;226m %(message)s\x1b[0m'), |
| 76 | + (logging.ERROR, '\x1b[38;5;208m>\x1b[38;5;202m>\x1b[38;5;196m> \x1b[38;5;239m[%(name)-8s]\x1b[38;5;196m %(message)s\x1b[0m'), |
| 77 | + (logging.CRITICAL, '\x1b[48;5;196;38;5;16m>>> [%(name)-8s] %(message)s\x1b[0m'), |
| 78 | + } |
| 79 | + |
| 80 | + class MyStreamHandler(logging.StreamHandler): |
| 81 | + def __init__(self, stream, formatters): |
| 82 | + logging.StreamHandler.__init__(self, stream) |
| 83 | + self.formatters = formatters |
| 84 | + def format(self, record): |
| 85 | + return self.formatters[record.levelno].format(record) |
| 86 | + |
| 87 | + formatters = {} |
| 88 | + for (level, fmtstr) in LOG_PREFMT: |
| 89 | + formatters[level] = logging.Formatter(fmtstr) |
| 90 | + handler = MyStreamHandler(sys.stdout, formatters) |
| 91 | + logging.basicConfig(level=logging.DEBUG, handlers=[handler]) |
| 92 | + |
| 93 | +def check_call(args): |
| 94 | + '''Wrapper for subprocess.check_call that logs what command failed''' |
| 95 | + try: |
| 96 | + subprocess.check_call(args) |
| 97 | + except Exception: |
| 98 | + logger.error('Command failed: %s' % (' '.join(args))) |
| 99 | + raise |
| 100 | + |
| 101 | +def iterate_objs(srcdir): |
| 102 | + '''Iterate over all object files in srcdir''' |
| 103 | + for (root, dirs, files) in os.walk(srcdir): |
| 104 | + if not root.startswith(srcdir): |
| 105 | + raise ValueError |
| 106 | + root = root[len(srcdir)+1:] |
| 107 | + for filename in files: |
| 108 | + if filename.endswith(OBJEXT): |
| 109 | + yield os.path.join(root, filename) |
| 110 | + |
| 111 | +def copy_o_files(srcdir,tgtdir): |
| 112 | + '''Copy all object files from srcdir to dstdir, keeping the same directory hierarchy''' |
| 113 | + for objname in iterate_objs(srcdir): |
| 114 | + outname = os.path.join(tgtdir, objname) |
| 115 | + os.makedirs(os.path.dirname(outname), exist_ok=True) |
| 116 | + shutil.copy(os.path.join(srcdir, objname), outname) |
| 117 | + |
| 118 | +def objdump_all(srcdir,tgtdir): |
| 119 | + ''' |
| 120 | + Object analysis pass using objdump. |
| 121 | + ''' |
| 122 | + for objname in iterate_objs(srcdir): |
| 123 | + objname = os.path.join(srcdir, objname) |
| 124 | + p = subprocess.Popen([OBJDUMP] + OBJDUMP_ARGS + [objname], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) |
| 125 | + (out,err) = p.communicate() |
| 126 | + if p.returncode != 0: |
| 127 | + raise Exception('objdump failed') |
| 128 | + (out,err) = (out.decode(),err.decode()) |
| 129 | + |
| 130 | + # postprocess- break into sections separated by 'Disassembly of section...' |
| 131 | + sections = defaultdict(list) |
| 132 | + funcname = '' |
| 133 | + for line in out.split('\n'): |
| 134 | + match = re.match('^Disassembly of section (.*):$', line) |
| 135 | + if match: |
| 136 | + funcname = match.group(1) |
| 137 | + if not '.rodata' in line: # filter out 'ebc: R_X86_64_32 .rodata+0x1944' |
| 138 | + sections[funcname].append(line) |
| 139 | + |
| 140 | + ''' |
| 141 | + lines = [] |
| 142 | + for section in sorted(sections.keys()): # '' header section automatically comes first |
| 143 | + #lines.extend(sections[section]) |
| 144 | + lines.append(sections[section][0]) |
| 145 | + out = '\n'.join(lines) |
| 146 | +
|
| 147 | + outname = os.path.join(tgtdir, objname[:-len(OBJEXT)] + '.dis') |
| 148 | + make_parent_dirs(outname) |
| 149 | + with open(outname, 'w') as f: |
| 150 | + f.write(out) |
| 151 | + ''' |
| 152 | + for section in sections.keys(): |
| 153 | + if not section: |
| 154 | + continue |
| 155 | + name = hashlib.sha1(section.encode()).hexdigest() |
| 156 | + outname = os.path.join(tgtdir, name + '.dis') |
| 157 | + os.makedirs(os.path.dirname(outname), exist_ok=True) |
| 158 | + with open(outname, 'w') as f: |
| 159 | + f.write('\n'.join(sections[section])) |
| 160 | + |
| 161 | + # some TODO s, learning about the objdump output: |
| 162 | + # - demangle section names |
| 163 | + # - remove/make relative addresses |
| 164 | + # - sort/combine sections |
| 165 | + # - remove duplicate sections? (sounds like linker's work - can we do a partial link that preserves sections, such as for inlines?) |
| 166 | + # - resolve callq's relocations - these are ugly right now - integrate reloc result into instruction by substituting argument |
| 167 | + # - [- 17: R_X86_64_32S vtable for boost::exception_detail::bad_exception_+0x30-] |
| 168 | + # (at the very least delete callq's arguments) |
| 169 | + # - for data (mov etc): fill in data? pointers change arbitrarily especially in combined string tables (.rodata.str1...) |
| 170 | + # and these entries don't have names/symbols |
| 171 | + # - or could use a different disassembler completely, such as capstone. Parsing objdump output is a hack. |
| 172 | + |
| 173 | +def parse_arguments(): |
| 174 | + parser = argparse.ArgumentParser(description='Build to compare binaries. Execute this from a repository directory.') |
| 175 | + parser.add_argument('commitids', metavar='COMMITID', nargs='+') |
| 176 | + parser.add_argument('--executables', default='src/bitcoind', help='Comma-separated list of executables to build, default is "src/bitcoind"') |
| 177 | + parser.add_argument('--tgtdir', default=TGTDIR, help='Target directory, default is "'+TGTDIR+'"') |
| 178 | + parser.add_argument('--parallelism', '-j', default=DEFAULT_PARALLELISM, type=int, help='Make parallelism, default is %s' % (DEFAULT_PARALLELISM)) |
| 179 | + args = parser.parse_args() |
| 180 | + args.executables = args.executables.split(',') |
| 181 | + return args |
| 182 | + |
| 183 | +def main(): |
| 184 | + args = parse_arguments() |
| 185 | + init_logging() |
| 186 | + try: |
| 187 | + try: |
| 188 | + os.makedirs(args.tgtdir) |
| 189 | + except FileExistsError: |
| 190 | + logger.warning("%s already exists, remove it if you don't want to continue a current comparison session" % args.tgtdir) |
| 191 | + |
| 192 | + for commit in args.commitids: |
| 193 | + try: |
| 194 | + int(commit,16) |
| 195 | + except ValueError: |
| 196 | + logger.error('%s is not a hexadecimal commit id. It\'s the only thing we know.' % commit) |
| 197 | + exit(1) |
| 198 | + |
| 199 | + make_args = [] |
| 200 | + if args.parallelism is not None: |
| 201 | + make_args += ['-j%i' % args.parallelism] |
| 202 | + |
| 203 | + for commit in args.commitids: |
| 204 | + logger.info("Building %s..." % commit) |
| 205 | + commitdir = os.path.join(args.tgtdir, commit) |
| 206 | + commitdir_obj = os.path.join(args.tgtdir, commit+'.o') |
| 207 | + |
| 208 | + try: |
| 209 | + os.makedirs(commitdir) |
| 210 | + except FileExistsError: |
| 211 | + logger.error("%s already exists, remove it to continue" % commitdir) |
| 212 | + exit(1) |
| 213 | + check_call([GIT,'reset','--hard']) |
| 214 | + check_call([GIT,'clean','-f','-x','-d']) |
| 215 | + check_call([GIT,'checkout',commit]) |
| 216 | + try: |
| 217 | + check_call([GIT,'apply', os.path.join(PATCHDIR,'stripbuildinfo.patch')]) |
| 218 | + except subprocess.CalledProcessError: |
| 219 | + logger.error('Could not apply patch to strip build info. Probably it needs to be updated') |
| 220 | + exit(1) |
| 221 | + |
| 222 | + check_call(['./autogen.sh']) |
| 223 | + logger.info('Running configure script') |
| 224 | + check_call(['./configure', '--disable-hardening', '--with-incompatible-bdb', '--without-cli', '--disable-tests', '--disable-ccache', |
| 225 | + 'CPPFLAGS='+(' '.join(CPPFLAGS)), |
| 226 | + 'CFLAGS='+(' '.join(OPTFLAGS)), 'CXXFLAGS='+(' '.join(OPTFLAGS)), 'LDFLAGS='+(' '.join(OPTFLAGS))] + CONFIGURE_EXTRA) |
| 227 | + |
| 228 | + for name in args.executables: |
| 229 | + logger.info('Building executable %s' % name) |
| 230 | + target_name = os.path.join(args.tgtdir, os.path.basename(name) + '.' + commit) |
| 231 | + check_call([MAKE] + make_args + [name]) |
| 232 | + shutil.copy(name, target_name) |
| 233 | + check_call([OBJCOPY] + OBJCOPY_ARGS + [name, target_name + '.stripped']) |
| 234 | + |
| 235 | + logger.info('Copying object files...') |
| 236 | + copy_o_files('.', commitdir_obj) |
| 237 | + |
| 238 | + logger.info('Performing basic analysis pass...') |
| 239 | + objdump_all(commitdir_obj, commitdir) |
| 240 | + |
| 241 | + if len(args.commitids)>1: |
| 242 | + logger.info('Use this command to compare resulting directories:') |
| 243 | + logger.info('$ git diff -W --word-diff /tmp/compare/%s /tmp/compare/%s' % (args.commitids[0], args.commitids[1])) |
| 244 | + except Exception: |
| 245 | + logger.exception('Error:') |
| 246 | + |
| 247 | +if __name__ == '__main__': |
| 248 | + main() |
| 249 | + |
0 commit comments