@@ -76,24 +76,63 @@ def extract(self, tarinfo: tarfile.TarInfo, extract_root: Path): # noqa: C901
76
76
77
77
# prevent traversal attempts through links
78
78
if tarinfo .islnk () or tarinfo .issym ():
79
- if Path (tarinfo .linkname ).is_absolute ():
80
- self .record_problem (
81
- tarinfo ,
82
- "Absolute path as link target." ,
83
- "Converted to extraction relative path." ,
84
- )
85
- tarinfo .linkname = f"{ extract_root } /{ tarinfo .linkname } "
86
-
87
- if not is_safe_path (
88
- basedir = extract_root ,
89
- path = extract_root / Path (tarinfo .name ).parent / tarinfo .linkname ,
90
- ):
91
- self .record_problem (
92
- tarinfo ,
93
- "Traversal attempt through link path." ,
94
- "Skipped." ,
95
- )
96
- return
79
+ link_target = Path (tarinfo .linkname )
80
+
81
+ # Check if the link is absolute and make it relative to extract_root
82
+ if link_target .is_absolute ():
83
+ # Strip leading '/' to make the path relative
84
+ rel_target = link_target .relative_to ('/' )
85
+
86
+ if Path (tarinfo .linkname ).is_absolute ():
87
+ self .record_problem (
88
+ tarinfo ,
89
+ "Absolute path as link target." ,
90
+ "Converted to extraction relative path." ,
91
+ )
92
+ else :
93
+ # Directly use the relative link target. If it points to an unsafe path, we'll
94
+ # check and fix below
95
+ rel_target = link_target
96
+
97
+ # The symlink will point to our relative target (may be updated below if unsafe)
98
+ tarinfo .linkname = rel_target
99
+
100
+ # Resolve the link target to an absolute path
101
+ resolved_path = (extract_root / tarinfo .name ).parent / rel_target
102
+
103
+ # If the resolved path points outside of extract_root, we need to fix it!
104
+ if not is_safe_path (extract_root , resolved_path ):
105
+ logger .warning ("Traversal attempt through link path." , src = tarinfo .name , dest = tarinfo .linkname , basedir = extract_root , resovled_path = resolved_path )
106
+
107
+ for drop_count in range (0 , len (str (rel_target ).split ('/' ))):
108
+ new_path = (extract_root / tarinfo .name ).parent / Path ("/" .join (["placeholder" ] * drop_count )) / rel_target
109
+ resolved_path = os .path .abspath (new_path )
110
+ if str (resolved_path ).startswith (str (extract_root )):
111
+ break
112
+ else :
113
+ # We didn't hit the break, we couldn't resolve the path safely
114
+ self .record_problem (
115
+ tarinfo ,
116
+ "Traversal attempt through link path." ,
117
+ "Skipped." ,
118
+ )
119
+ return
120
+
121
+ # Double check that it's safe now
122
+ if not is_safe_path (extract_root , resolved_path ):
123
+ self .record_problem (
124
+ tarinfo ,
125
+ "Traversal attempt through link path." ,
126
+ "Skipped." ,
127
+ )
128
+ return
129
+
130
+ # Prepend placeholder directories before rel_target to get a valid path
131
+ # within extract_root. This is the relative version of resolved_path.
132
+ rel_target = Path ("/" .join (["placeholder" ] * drop_count )) / rel_target
133
+ tarinfo .linkname = rel_target
134
+
135
+ logger .debug ("Creating symlink" , points_to = resolved_path , name = tarinfo .name )
97
136
98
137
target_path = extract_root / tarinfo .name
99
138
# directories are special: we can not set their metadata now + they might also be already existing
0 commit comments