10
10
import math
11
11
import subprocess
12
12
import shutil
13
+ import hashlib
13
14
TATUM_ROOT = os .path .dirname (os .path .dirname (os .path .abspath (__file__ )))
14
15
15
16
@@ -165,13 +166,18 @@ def download_extract_test(args, test_name, test_url):
165
166
#A tar file of benchmark files
166
167
benchmark_tar = os .path .join (os .path .join (TATUM_ROOT , os .path .basename (test_url )))
167
168
168
- get_url (args , test_url , benchmark_tar )
169
+ new_tar = get_url (args , test_url , benchmark_tar )
169
170
170
171
test_files_dir = os .path .join (TATUM_ROOT , "test" )
171
172
172
- print ("Extracting test files to {}" .format (test_files_dir ))
173
- with tarfile .TarFile .open (benchmark_tar , mode = "r|*" ) as tar_file :
174
- tar_file .extractall (path = test_files_dir )
173
+ if new_tar or args .force :
174
+
175
+ print ("Extracting test files to {}" .format (test_files_dir ))
176
+ with tarfile .TarFile .open (benchmark_tar , mode = "r|*" ) as tar_file :
177
+ tar_file .extractall (path = test_files_dir )
178
+ else :
179
+ print ("Skipping file extraction" .format (test_files_dir ))
180
+
175
181
176
182
test_files += glob .glob ("{}/{}/*.tatum*" .format (test_files_dir , test_name ))
177
183
else :
@@ -183,14 +189,22 @@ def download_extract_test(args, test_name, test_url):
183
189
184
190
def get_url (args , url , filename ):
185
191
if not args .force and os .path .exists (filename ):
186
- print ("Found existing {}, skipping download" .format (filename ))
187
- return
192
+ print ("Found existing file {}, checking if hash matches" .format (filename ))
193
+ file_matches = check_hash_match (args , url , filename )
194
+
195
+ if file_matches :
196
+ print ("Existing file {} matches, skipping download" .format (filename ))
197
+ return False
198
+ else :
199
+ print ("Existing file {} contents differ, re-downloading" .format (filename ))
188
200
189
201
if '://' in url :
190
202
download_url (url , filename )
191
203
else :
192
204
shutl .copytree (url , filename )
193
205
206
+ return True
207
+
194
208
def download_url (url , filename ):
195
209
"""
196
210
Downloads the specifed url to filename
@@ -210,5 +224,35 @@ def download_progress_callback(block_num, block_size, expected_size):
210
224
if block_num * block_size >= expected_size :
211
225
print ("" )
212
226
227
+ def check_hash_match (args , url , filename ):
228
+ checksum_url = url + ".sha256"
229
+ try :
230
+ web_hash = urllib .request .urlopen (checksum_url ).read ()
231
+ except urllib .error .HTTPError as e :
232
+ print ("Failed to find expected SHA256 checksum at {} (reason '{}')" .format (checksum_url , e ))
233
+ return False
234
+
235
+ local_hash = hash_file (filename )
236
+
237
+ web_digest_bytes = web_hash .split ()[0 ]
238
+ local_digest_bytes = str .encode (local_hash )
239
+
240
+ if web_digest_bytes == local_digest_bytes :
241
+ return True
242
+
243
+ return False
244
+
245
+ def hash_file (filepath ):
246
+ BUF_SIZE = 65536
247
+ sha256 = hashlib .sha256 ()
248
+ with open (filepath , "rb" ) as f :
249
+ while True :
250
+ data = f .read (BUF_SIZE )
251
+ if not data :
252
+ break
253
+ sha256 .update (data )
254
+
255
+ return sha256 .hexdigest ()
256
+
213
257
if __name__ == "__main__" :
214
258
main ()
0 commit comments