diff --git a/unblob/handlers/archive/_safe_tarfile.py b/unblob/handlers/archive/_safe_tarfile.py index 7898758767..86203f454b 100644 --- a/unblob/handlers/archive/_safe_tarfile.py +++ b/unblob/handlers/archive/_safe_tarfile.py @@ -77,23 +77,33 @@ def extract(self, tarinfo: tarfile.TarInfo, extract_root: Path): # noqa: C901 # prevent traversal attempts through links if tarinfo.islnk() or tarinfo.issym(): if Path(tarinfo.linkname).is_absolute(): - self.record_problem( - tarinfo, - "Absolute path as link target.", - "Converted to extraction relative path.", - ) def calculate_linkname(): root = extract_root.resolve() path = (extract_root / tarinfo.name).resolve() - common_path = Path(os.path.commonpath([root, path])) - # normally root == common_path - # if it is not, the output will be bad - depth = max(0, len(path.parts) - len(common_path.parts) - 1) + + if path.parts[: len(root.parts)] != root.parts: + return None + + depth = max(0, len(path.parts) - len(root.parts) - 1) return ("/".join([".."] * depth) or ".") + tarinfo.linkname - tarinfo.linkname = calculate_linkname() - assert not Path(tarinfo.linkname).is_absolute() + relative_linkname = calculate_linkname() + if relative_linkname is None: + self.record_problem( + tarinfo, + "Absolute path conversion to extraction relative failed - would escape root.", + "Skipped.", + ) + return + + assert not Path(relative_linkname).is_absolute() + self.record_problem( + tarinfo, + "Absolute path as link target.", + "Converted to extraction relative path.", + ) + tarinfo.linkname = relative_linkname resolved_path = (extract_root / tarinfo.name).parent / tarinfo.linkname if not is_safe_path(basedir=extract_root, path=resolved_path):