|
6 | 6 | from __future__ import division |
7 | 7 | import os |
8 | 8 | import sys |
| 9 | +import stat |
9 | 10 | import numpy as np |
10 | 11 | from six import text_type, binary_type, integer_types |
11 | 12 | import mmdnn.conversion.common.IR.graph_pb2 as graph_pb2 |
@@ -190,6 +191,29 @@ def _multi_thread_download(url, file_name, file_size, thread_count): |
190 | 191 |
|
191 | 192 | return file_name |
192 | 193 |
|
| 194 | +def is_within_directory(base_dir, target_path): |
| 195 | + base_dir = os.path.realpath(base_dir) + os.sep |
| 196 | + target_path = os.path.realpath(target_path) + os.sep |
| 197 | + return target_path.startswith(base_dir) |
| 198 | + |
| 199 | +def check_tar_file(directory, tarf): |
| 200 | + for member in tarf.getmembers(): |
| 201 | + if member.islnk(): |
| 202 | + raise ValueError(f"Hard link detected in archive: {member.name}.") |
| 203 | + if member.issym(): |
| 204 | + raise ValueError(f"Symbolic link detected in archive: {member.name}.") |
| 205 | + member_path = os.path.join(directory, member.name) |
| 206 | + if not is_within_directory(directory, member_path): |
| 207 | + raise ValueError(f"Path traversal detected: {member.name}.") |
| 208 | + |
| 209 | +def check_zip_file(directory, zipf): |
| 210 | + for info in zipf.infolist(): |
| 211 | + perm = info.external_attr >> 16 |
| 212 | + if stat.S_ISLNK(perm): |
| 213 | + raise ValueError(f"Symbolic link detected in archive: {info.filename}.") |
| 214 | + target_path = os.path.join(directory, info.filename) |
| 215 | + if not is_within_directory(directory, target_path): |
| 216 | + raise ValueError(f"Path traversal detected: {info.filename}.") |
193 | 217 |
|
194 | 218 | def download_file(url, directory='./', local_fname=None, force_write=False, auto_unzip=False, compre_type=''): |
195 | 219 | """Download the data from source url, unless it's already here. |
@@ -229,30 +253,23 @@ def download_file(url, directory='./', local_fname=None, force_write=False, auto |
229 | 253 | if ret.endswith(".tar.gz") or ret.endswith(".tgz"): |
230 | 254 | try: |
231 | 255 | import tarfile |
232 | | - tar = tarfile.open(ret) |
233 | | - for name in tar.getnames(): |
234 | | - if not (os.path.realpath(os.path.join(directory, name))+ os.sep).startswith(os.path.realpath(directory) + os.sep): |
235 | | - raise ValueError('The decompression path does not match the current path. For more info: https://docs.python.org/3/library/tarfile.html#tarfile.TarFile.extractall') |
236 | | - tar.extractall(directory) |
237 | | - tar.close() |
| 256 | + with tarfile.open(ret) as tarf: |
| 257 | + check_tar_file(directory, tarf) |
| 258 | + tarf.extractall(directory) |
238 | 259 | except ValueError: |
239 | 260 | raise |
240 | | - except: |
241 | | - print("Unzip file [{}] failed.".format(ret)) |
242 | | - |
| 261 | + except Exception as e: |
| 262 | + print(f"Failed to decompress file: {ret} - {e}") |
243 | 263 | elif ret.endswith('.zip'): |
244 | 264 | try: |
245 | 265 | import zipfile |
246 | | - zip_ref = zipfile.ZipFile(ret, 'r') |
247 | | - for name in zip_ref.namelist(): |
248 | | - if not (os.path.realpath(os.path.join(directory, name))+ os.sep).startswith(os.path.realpath(directory) + os.sep): |
249 | | - raise ValueError('The decompression path does not match the current path. For more info: https://docs.python.org/3/library/zipfile.html?highlight=zipfile#zipfile.ZipFile.extractall') |
250 | | - zip_ref.extractall(directory) |
251 | | - zip_ref.close() |
| 266 | + with zipfile.ZipFile(ret, 'r') as zipf: |
| 267 | + check_zip_file(directory, zipf) |
| 268 | + zipf.extractall(directory) |
252 | 269 | except ValueError: |
253 | 270 | raise |
254 | | - except: |
255 | | - print("Unzip file [{}] failed.".format(ret)) |
| 271 | + except Exception as e: |
| 272 | + print(f"Failed to decompress file: {ret} - {e}") |
256 | 273 | return ret |
257 | 274 | """ |
258 | 275 | r = requests.head(url) |
|
0 commit comments