#!/usr/bin/env python3 """ strip_asm.py - Cleanup ASM output for the specified file """ from argparse import ArgumentParser import sys import os import re def find_used_labels(asm): found = set() label_re = re.compile("\s*j[a-z]+\s+\.L([a-zA-Z0-9][a-zA-Z0-9_]*)") for l in asm.splitlines(): m = label_re.match(l) if m: found.add('.L%s' % m.group(1)) return found def normalize_labels(asm): decls = set() label_decl = re.compile("^[.]{0,1}L([a-zA-Z0-9][a-zA-Z0-9_]*)(?=:)") for l in asm.splitlines(): m = label_decl.match(l) if m: decls.add(m.group(0)) if len(decls) == 0: return asm needs_dot = next(iter(decls))[0] != '.' if not needs_dot: return asm for ld in decls: asm = re.sub("(^|\s+)" + ld + "(?=:|\s)", '\\1.' + ld, asm) return asm def transform_labels(asm): asm = normalize_labels(asm) used_decls = find_used_labels(asm) new_asm = '' label_decl = re.compile("^\.L([a-zA-Z0-9][a-zA-Z0-9_]*)(?=:)") for l in asm.splitlines(): m = label_decl.match(l) if not m or m.group(0) in used_decls: new_asm += l new_asm += '\n' return new_asm def is_identifier(tk): if len(tk) == 0: return False first = tk[0] if not first.isalpha() and first != '_': return False for i in range(1, len(tk)): c = tk[i] if not c.isalnum() and c != '_': return False return True def process_identifiers(l): """ process_identifiers - process all identifiers and modify them to have consistent names across all platforms; specifically across ELF and MachO. For example, MachO inserts an additional understore at the beginning of names. This function removes that. """ parts = re.split(r'([a-zA-Z0-9_]+)', l) new_line = '' for tk in parts: if is_identifier(tk): if tk.startswith('__Z'): tk = tk[1:] elif tk.startswith('_') and len(tk) > 1 and \ tk[1].isalpha() and tk[1] != 'Z': tk = tk[1:] new_line += tk return new_line def process_asm(asm): """ Strip the ASM of unwanted directives and lines """ new_contents = '' asm = transform_labels(asm) # TODO: Add more things we want to remove discard_regexes = [ re.compile("\s+\..*$"), # directive re.compile("\s*#(NO_APP|APP)$"), #inline ASM re.compile("\s*#.*$"), # comment line re.compile("\s*\.globa?l\s*([.a-zA-Z_][a-zA-Z0-9$_.]*)"), #global directive re.compile("\s*\.(string|asciz|ascii|[1248]?byte|short|word|long|quad|value|zero)"), ] keep_regexes = [ ] fn_label_def = re.compile("^[a-zA-Z_][a-zA-Z0-9_.]*:") for l in asm.splitlines(): # Remove Mach-O attribute l = l.replace('@GOTPCREL', '') add_line = True for reg in discard_regexes: if reg.match(l) is not None: add_line = False break for reg in keep_regexes: if reg.match(l) is not None: add_line = True break if add_line: if fn_label_def.match(l) and len(new_contents) != 0: new_contents += '\n' l = process_identifiers(l) new_contents += l new_contents += '\n' return new_contents def main(): parser = ArgumentParser( description='generate a stripped assembly file') parser.add_argument( 'input', metavar='input', type=str, nargs=1, help='An input assembly file') parser.add_argument( 'out', metavar='output', type=str, nargs=1, help='The output file') args, unknown_args = parser.parse_known_args() input = args.input[0] output = args.out[0] if not os.path.isfile(input): print(("ERROR: input file '%s' does not exist") % input) sys.exit(1) contents = None with open(input, 'r') as f: contents = f.read() new_contents = process_asm(contents) with open(output, 'w') as f: f.write(new_contents) if __name__ == '__main__': main() # vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 # kate: tab-width: 4; replace-tabs on; indent-width 4; tab-indents: off; # kate: indent-mode python; remove-trailing-spaces modified;