llvm_checksum.py
6.06 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
#!/usr/bin/python
""" A small program to compute checksums of LLVM checkout.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import hashlib
import logging
import re
import sys
from argparse import ArgumentParser
from project_tree import *
SVN_DATES_REGEX = re.compile(r"\$(Date|LastChangedDate)[^\$]+\$")
def main():
parser = ArgumentParser()
parser.add_argument(
"-v", "--verbose", action="store_true", help="enable debug logging")
parser.add_argument(
"-c",
"--check",
metavar="reference_file",
help="read checksums from reference_file and " +
"check they match checksums of llvm_path.")
parser.add_argument(
"--partial",
action="store_true",
help="ignore projects from reference_file " +
"that are not checked out in llvm_path.")
parser.add_argument(
"--multi_dir",
action="store_true",
help="indicates llvm_path contains llvm, checked out " +
"into multiple directories, as opposed to a " +
"typical single source tree checkout.")
parser.add_argument("llvm_path")
args = parser.parse_args()
if args.check is not None:
with open(args.check, "r") as f:
reference_checksums = ReadLLVMChecksums(f)
else:
reference_checksums = None
if args.verbose:
logging.basicConfig(level=logging.DEBUG)
llvm_projects = CreateLLVMProjects(not args.multi_dir)
checksums = ComputeLLVMChecksums(args.llvm_path, llvm_projects)
if reference_checksums is None:
WriteLLVMChecksums(checksums, sys.stdout)
sys.exit(0)
if not ValidateChecksums(reference_checksums, checksums, args.partial):
sys.stdout.write("Checksums differ.\nNew checksums:\n")
WriteLLVMChecksums(checksums, sys.stdout)
sys.stdout.write("Reference checksums:\n")
WriteLLVMChecksums(reference_checksums, sys.stdout)
sys.exit(1)
else:
sys.stdout.write("Checksums match.")
def ComputeLLVMChecksums(root_path, projects):
"""Compute checksums for LLVM sources checked out using svn.
Args:
root_path: a directory of llvm checkout.
projects: a list of LLVMProject instances, which describe checkout paths,
relative to root_path.
Returns:
A dict mapping from project name to project checksum.
"""
hash_algo = hashlib.sha256
def collapse_svn_substitutions(contents):
# Replace svn substitutions for $Date$ and $LastChangedDate$.
# Unfortunately, these are locale-specific.
return SVN_DATES_REGEX.sub("$\1$", contents)
def read_and_collapse_svn_subsitutions(file_path):
with open(file_path, "rb") as f:
contents = f.read()
new_contents = collapse_svn_substitutions(contents)
if contents != new_contents:
logging.debug("Replaced svn keyword substitutions in %s", file_path)
logging.debug("\n\tBefore\n%s\n\tAfter\n%s", contents, new_contents)
return new_contents
project_checksums = dict()
# Hash each project.
for proj in projects:
project_root = os.path.join(root_path, proj.relpath)
if not os.path.exists(project_root):
logging.info("Folder %s doesn't exist, skipping project %s", proj.relpath,
proj.name)
continue
files = list()
def add_file_hash(file_path):
if os.path.islink(file_path) and not os.path.exists(file_path):
content = os.readlink(file_path)
else:
content = read_and_collapse_svn_subsitutions(file_path)
hasher = hash_algo()
hasher.update(content)
file_digest = hasher.hexdigest()
logging.debug("Checksum %s for file %s", file_digest, file_path)
files.append((file_path, file_digest))
logging.info("Computing checksum for %s", proj.name)
WalkProjectFiles(root_path, projects, proj, add_file_hash)
# Compute final checksum.
files.sort(key=lambda x: x[0])
hasher = hash_algo()
for file_path, file_digest in files:
file_path = os.path.relpath(file_path, project_root)
hasher.update(file_path)
hasher.update(file_digest)
project_checksums[proj.name] = hasher.hexdigest()
return project_checksums
def WriteLLVMChecksums(checksums, f):
"""Writes checksums to a text file.
Args:
checksums: a dict mapping from project name to project checksum (result of
ComputeLLVMChecksums).
f: a file object to write into.
"""
for proj in sorted(checksums.keys()):
f.write("{} {}\n".format(checksums[proj], proj))
def ReadLLVMChecksums(f):
"""Reads checksums from a text file, produced by WriteLLVMChecksums.
Returns:
A dict, mapping from project name to project checksum.
"""
checksums = {}
while True:
line = f.readline()
if line == "":
break
checksum, proj = line.split()
checksums[proj] = checksum
return checksums
def ValidateChecksums(reference_checksums,
new_checksums,
allow_missing_projects=False):
"""Validates that reference_checksums and new_checksums match.
Args:
reference_checksums: a dict of reference checksums, mapping from a project
name to a project checksum.
new_checksums: a dict of checksums to be checked, mapping from a project
name to a project checksum.
allow_missing_projects:
When True, reference_checksums may contain more projects than
new_checksums. Projects missing from new_checksums are ignored.
When False, new_checksums and reference_checksums must contain checksums
for the same set of projects. If there is a project in
reference_checksums, missing from new_checksums, ValidateChecksums
will return False.
Returns:
True, if checksums match with regards to allow_missing_projects flag value.
False, otherwise.
"""
if not allow_missing_projects:
if len(new_checksums) != len(reference_checksums):
return False
for proj, checksum in new_checksums.items():
# We never computed a checksum for this project.
if proj not in reference_checksums:
return False
# Checksum did not match.
if reference_checksums[proj] != checksum:
return False
return True
if __name__ == "__main__":
main()