Skip to content

Commit 0e6e026

Browse files
author
Andrew Fasano
committed
Safe_tarfile: make paths safe during extraction
1 parent fc60755 commit 0e6e026

File tree

1 file changed

+57
-18
lines changed

1 file changed

+57
-18
lines changed

unblob/handlers/archive/_safe_tarfile.py

+57-18
Original file line numberDiff line numberDiff line change
@@ -76,24 +76,63 @@ def extract(self, tarinfo: tarfile.TarInfo, extract_root: Path): # noqa: C901
7676

7777
# prevent traversal attempts through links
7878
if tarinfo.islnk() or tarinfo.issym():
79-
if Path(tarinfo.linkname).is_absolute():
80-
self.record_problem(
81-
tarinfo,
82-
"Absolute path as link target.",
83-
"Converted to extraction relative path.",
84-
)
85-
tarinfo.linkname = f"{extract_root}/{tarinfo.linkname}"
86-
87-
if not is_safe_path(
88-
basedir=extract_root,
89-
path=extract_root / Path(tarinfo.name).parent / tarinfo.linkname,
90-
):
91-
self.record_problem(
92-
tarinfo,
93-
"Traversal attempt through link path.",
94-
"Skipped.",
95-
)
96-
return
79+
link_target = Path(tarinfo.linkname)
80+
81+
# Check if the link is absolute and make it relative to extract_root
82+
if link_target.is_absolute():
83+
# Strip leading '/' to make the path relative
84+
rel_target = link_target.relative_to('/')
85+
86+
if Path(tarinfo.linkname).is_absolute():
87+
self.record_problem(
88+
tarinfo,
89+
"Absolute path as link target.",
90+
"Converted to extraction relative path.",
91+
)
92+
else:
93+
# Directly use the relative link target. If it points to an unsafe path, we'll
94+
# check and fix below
95+
rel_target = link_target
96+
97+
# The symlink will point to our relative target (may be updated below if unsafe)
98+
tarinfo.linkname = rel_target
99+
100+
# Resolve the link target to an absolute path
101+
resolved_path = (extract_root / tarinfo.name).parent / rel_target
102+
103+
# If the resolved path points outside of extract_root, we need to fix it!
104+
if not is_safe_path(extract_root, resolved_path):
105+
logger.warning("Traversal attempt through link path.", src=tarinfo.name, dest=tarinfo.linkname, basedir=extract_root, resovled_path=resolved_path)
106+
107+
for drop_count in range(0, len(str(rel_target).split('/'))):
108+
new_path = (extract_root / tarinfo.name).parent / Path("/".join(["placeholder"] * drop_count)) / rel_target
109+
resolved_path = os.path.abspath(new_path)
110+
if str(resolved_path).startswith(str(extract_root)):
111+
break
112+
else:
113+
# We didn't hit the break, we couldn't resolve the path safely
114+
self.record_problem(
115+
tarinfo,
116+
"Traversal attempt through link path.",
117+
"Skipped.",
118+
)
119+
return
120+
121+
# Double check that it's safe now
122+
if not is_safe_path(extract_root, resolved_path):
123+
self.record_problem(
124+
tarinfo,
125+
"Traversal attempt through link path.",
126+
"Skipped.",
127+
)
128+
return
129+
130+
# Prepend placeholder directories before rel_target to get a valid path
131+
# within extract_root. This is the relative version of resolved_path.
132+
rel_target = Path("/".join(["placeholder"] * drop_count)) / rel_target
133+
tarinfo.linkname = rel_target
134+
135+
logger.debug("Creating symlink", points_to=resolved_path, name=tarinfo.name)
97136

98137
target_path = extract_root / tarinfo.name
99138
# directories are special: we can not set their metadata now + they might also be already existing

0 commit comments

Comments
 (0)