Skip to content

Commit 3eb8db1

Browse files
authored
Add security checks for auto-decompression of downloaded archives: (#949)
1. Prevent path traversal by verifying extracted file paths remain inside the target directory. 2. Reject tar files containing hard links or symbolic links. 3. Reject zip files containing symbolic links. 4. Use context managers for safe file handling. Co-authored-by: HX Lin
1 parent 5cf01b2 commit 3eb8db1

File tree

1 file changed

+34
-17
lines changed

1 file changed

+34
-17
lines changed

mmdnn/conversion/common/utils.py

Lines changed: 34 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from __future__ import division
77
import os
88
import sys
9+
import stat
910
import numpy as np
1011
from six import text_type, binary_type, integer_types
1112
import mmdnn.conversion.common.IR.graph_pb2 as graph_pb2
@@ -190,6 +191,29 @@ def _multi_thread_download(url, file_name, file_size, thread_count):
190191

191192
return file_name
192193

194+
def is_within_directory(base_dir, target_path):
195+
base_dir = os.path.realpath(base_dir) + os.sep
196+
target_path = os.path.realpath(target_path) + os.sep
197+
return target_path.startswith(base_dir)
198+
199+
def check_tar_file(directory, tarf):
200+
for member in tarf.getmembers():
201+
if member.islnk():
202+
raise ValueError(f"Hard link detected in archive: {member.name}.")
203+
if member.issym():
204+
raise ValueError(f"Symbolic link detected in archive: {member.name}.")
205+
member_path = os.path.join(directory, member.name)
206+
if not is_within_directory(directory, member_path):
207+
raise ValueError(f"Path traversal detected: {member.name}.")
208+
209+
def check_zip_file(directory, zipf):
210+
for info in zipf.infolist():
211+
perm = info.external_attr >> 16
212+
if stat.S_ISLNK(perm):
213+
raise ValueError(f"Symbolic link detected in archive: {info.filename}.")
214+
target_path = os.path.join(directory, info.filename)
215+
if not is_within_directory(directory, target_path):
216+
raise ValueError(f"Path traversal detected: {info.filename}.")
193217

194218
def download_file(url, directory='./', local_fname=None, force_write=False, auto_unzip=False, compre_type=''):
195219
"""Download the data from source url, unless it's already here.
@@ -229,30 +253,23 @@ def download_file(url, directory='./', local_fname=None, force_write=False, auto
229253
if ret.endswith(".tar.gz") or ret.endswith(".tgz"):
230254
try:
231255
import tarfile
232-
tar = tarfile.open(ret)
233-
for name in tar.getnames():
234-
if not (os.path.realpath(os.path.join(directory, name))+ os.sep).startswith(os.path.realpath(directory) + os.sep):
235-
raise ValueError('The decompression path does not match the current path. For more info: https://docs.python.org/3/library/tarfile.html#tarfile.TarFile.extractall')
236-
tar.extractall(directory)
237-
tar.close()
256+
with tarfile.open(ret) as tarf:
257+
check_tar_file(directory, tarf)
258+
tarf.extractall(directory)
238259
except ValueError:
239260
raise
240-
except:
241-
print("Unzip file [{}] failed.".format(ret))
242-
261+
except Exception as e:
262+
print(f"Failed to decompress file: {ret} - {e}")
243263
elif ret.endswith('.zip'):
244264
try:
245265
import zipfile
246-
zip_ref = zipfile.ZipFile(ret, 'r')
247-
for name in zip_ref.namelist():
248-
if not (os.path.realpath(os.path.join(directory, name))+ os.sep).startswith(os.path.realpath(directory) + os.sep):
249-
raise ValueError('The decompression path does not match the current path. For more info: https://docs.python.org/3/library/zipfile.html?highlight=zipfile#zipfile.ZipFile.extractall')
250-
zip_ref.extractall(directory)
251-
zip_ref.close()
266+
with zipfile.ZipFile(ret, 'r') as zipf:
267+
check_zip_file(directory, zipf)
268+
zipf.extractall(directory)
252269
except ValueError:
253270
raise
254-
except:
255-
print("Unzip file [{}] failed.".format(ret))
271+
except Exception as e:
272+
print(f"Failed to decompress file: {ret} - {e}")
256273
return ret
257274
"""
258275
r = requests.head(url)

0 commit comments

Comments
 (0)