From 844e058721da051e2607501a10591d10c5881789 Mon Sep 17 00:00:00 2001 From: Krisztian Fekete <1246751+e3krisztian@users.noreply.github.com> Date: Fri, 16 Feb 2024 17:07:04 +0100 Subject: [PATCH] fixup! fix(tar) absolute symlink extraction --- unblob/handlers/archive/_safe_tarfile.py | 32 ++++++++++++++++-------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/unblob/handlers/archive/_safe_tarfile.py b/unblob/handlers/archive/_safe_tarfile.py index 7898758767..86203f454b 100644 --- a/unblob/handlers/archive/_safe_tarfile.py +++ b/unblob/handlers/archive/_safe_tarfile.py @@ -77,23 +77,33 @@ def extract(self, tarinfo: tarfile.TarInfo, extract_root: Path): # noqa: C901 # prevent traversal attempts through links if tarinfo.islnk() or tarinfo.issym(): if Path(tarinfo.linkname).is_absolute(): - self.record_problem( - tarinfo, - "Absolute path as link target.", - "Converted to extraction relative path.", - ) def calculate_linkname(): root = extract_root.resolve() path = (extract_root / tarinfo.name).resolve() - common_path = Path(os.path.commonpath([root, path])) - # normally root == common_path - # if it is not, the output will be bad - depth = max(0, len(path.parts) - len(common_path.parts) - 1) + + if path.parts[: len(root.parts)] != root.parts: + return None + + depth = max(0, len(path.parts) - len(root.parts) - 1) return ("/".join([".."] * depth) or ".") + tarinfo.linkname - tarinfo.linkname = calculate_linkname() - assert not Path(tarinfo.linkname).is_absolute() + relative_linkname = calculate_linkname() + if relative_linkname is None: + self.record_problem( + tarinfo, + "Absolute path conversion to extraction relative failed - would escape root.", + "Skipped.", + ) + return + + assert not Path(relative_linkname).is_absolute() + self.record_problem( + tarinfo, + "Absolute path as link target.", + "Converted to extraction relative path.", + ) + tarinfo.linkname = relative_linkname resolved_path = (extract_root / tarinfo.name).parent / tarinfo.linkname if not is_safe_path(basedir=extract_root, path=resolved_path):