strip_asm.py
4.31 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
#!/usr/bin/env python
"""
strip_asm.py - Cleanup ASM output for the specified file
"""
from argparse import ArgumentParser
import sys
import os
import re
def find_used_labels(asm):
found = set()
label_re = re.compile("\s*j[a-z]+\s+\.L([a-zA-Z0-9][a-zA-Z0-9_]*)")
for l in asm.splitlines():
m = label_re.match(l)
if m:
found.add('.L%s' % m.group(1))
return found
def normalize_labels(asm):
decls = set()
label_decl = re.compile("^[.]{0,1}L([a-zA-Z0-9][a-zA-Z0-9_]*)(?=:)")
for l in asm.splitlines():
m = label_decl.match(l)
if m:
decls.add(m.group(0))
if len(decls) == 0:
return asm
needs_dot = next(iter(decls))[0] != '.'
if not needs_dot:
return asm
for ld in decls:
asm = re.sub("(^|\s+)" + ld + "(?=:|\s)", '\\1.' + ld, asm)
return asm
def transform_labels(asm):
asm = normalize_labels(asm)
used_decls = find_used_labels(asm)
new_asm = ''
label_decl = re.compile("^\.L([a-zA-Z0-9][a-zA-Z0-9_]*)(?=:)")
for l in asm.splitlines():
m = label_decl.match(l)
if not m or m.group(0) in used_decls:
new_asm += l
new_asm += '\n'
return new_asm
def is_identifier(tk):
if len(tk) == 0:
return False
first = tk[0]
if not first.isalpha() and first != '_':
return False
for i in range(1, len(tk)):
c = tk[i]
if not c.isalnum() and c != '_':
return False
return True
def process_identifiers(l):
"""
process_identifiers - process all identifiers and modify them to have
consistent names across all platforms; specifically across ELF and MachO.
For example, MachO inserts an additional understore at the beginning of
names. This function removes that.
"""
parts = re.split(r'([a-zA-Z0-9_]+)', l)
new_line = ''
for tk in parts:
if is_identifier(tk):
if tk.startswith('__Z'):
tk = tk[1:]
elif tk.startswith('_') and len(tk) > 1 and \
tk[1].isalpha() and tk[1] != 'Z':
tk = tk[1:]
new_line += tk
return new_line
def process_asm(asm):
"""
Strip the ASM of unwanted directives and lines
"""
new_contents = ''
asm = transform_labels(asm)
# TODO: Add more things we want to remove
discard_regexes = [
re.compile("\s+\..*$"), # directive
re.compile("\s*#(NO_APP|APP)$"), #inline ASM
re.compile("\s*#.*$"), # comment line
re.compile("\s*\.globa?l\s*([.a-zA-Z_][a-zA-Z0-9$_.]*)"), #global directive
re.compile("\s*\.(string|asciz|ascii|[1248]?byte|short|word|long|quad|value|zero)"),
]
keep_regexes = [
]
fn_label_def = re.compile("^[a-zA-Z_][a-zA-Z0-9_.]*:")
for l in asm.splitlines():
# Remove Mach-O attribute
l = l.replace('@GOTPCREL', '')
add_line = True
for reg in discard_regexes:
if reg.match(l) is not None:
add_line = False
break
for reg in keep_regexes:
if reg.match(l) is not None:
add_line = True
break
if add_line:
if fn_label_def.match(l) and len(new_contents) != 0:
new_contents += '\n'
l = process_identifiers(l)
new_contents += l
new_contents += '\n'
return new_contents
def main():
parser = ArgumentParser(
description='generate a stripped assembly file')
parser.add_argument(
'input', metavar='input', type=str, nargs=1,
help='An input assembly file')
parser.add_argument(
'out', metavar='output', type=str, nargs=1,
help='The output file')
args, unknown_args = parser.parse_known_args()
input = args.input[0]
output = args.out[0]
if not os.path.isfile(input):
print(("ERROR: input file '%s' does not exist") % input)
sys.exit(1)
contents = None
with open(input, 'r') as f:
contents = f.read()
new_contents = process_asm(contents)
with open(output, 'w') as f:
f.write(new_contents)
if __name__ == '__main__':
main()
# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
# kate: tab-width: 4; replace-tabs on; indent-width 4; tab-indents: off;
# kate: indent-mode python; remove-trailing-spaces modified;