Skip to content

Commit

Permalink
pythonGH-126212: Fix removal of slashes in file URIs on Windows (pyth…
Browse files Browse the repository at this point in the history
…on#126214)

Adjust `urllib.request.pathname2url()` and `url2pathname()` so that they
don't remove slashes from Windows DOS drive paths and URLs. There was no
basis for this behaviour, and it conflicts with how UNC and POSIX paths are
handled.
  • Loading branch information
barneygale authored Nov 8, 2024
1 parent fa40922 commit 54c63a3
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 21 deletions.
25 changes: 6 additions & 19 deletions Lib/nturl2path.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,23 +24,15 @@ def url2pathname(url):
# convert this to \\host\path\on\remote\host
# (notice halving of slashes at the start of the path)
url = url[2:]
components = url.split('/')
# make sure not to convert quoted slashes :-)
return urllib.parse.unquote('\\'.join(components))
return urllib.parse.unquote(url.replace('/', '\\'))
comp = url.split('|')
if len(comp) != 2 or comp[0][-1] not in string.ascii_letters:
error = 'Bad URL: ' + url
raise OSError(error)
drive = comp[0][-1].upper()
components = comp[1].split('/')
path = drive + ':'
for comp in components:
if comp:
path = path + '\\' + urllib.parse.unquote(comp)
# Issue #11474 - handing url such as |c/|
if path.endswith(':') and url.endswith('/'):
path += '\\'
return path
tail = urllib.parse.unquote(comp[1].replace('/', '\\'))
return drive + ':' + tail

def pathname2url(p):
"""OS-specific conversion from a file system path to a relative URL
Expand All @@ -60,17 +52,12 @@ def pathname2url(p):
raise OSError('Bad path: ' + p)
if not ':' in p:
# No drive specifier, just convert slashes and quote the name
components = p.split('\\')
return urllib.parse.quote('/'.join(components))
return urllib.parse.quote(p.replace('\\', '/'))
comp = p.split(':', maxsplit=2)
if len(comp) != 2 or len(comp[0]) > 1:
error = 'Bad path: ' + p
raise OSError(error)

drive = urllib.parse.quote(comp[0].upper())
components = comp[1].split('\\')
path = '///' + drive + ':'
for comp in components:
if comp:
path = path + '/' + urllib.parse.quote(comp)
return path
tail = urllib.parse.quote(comp[1].replace('\\', '/'))
return '///' + drive + ':' + tail
11 changes: 9 additions & 2 deletions Lib/test/test_urllib.py
Original file line number Diff line number Diff line change
Expand Up @@ -1526,8 +1526,10 @@ def test_pathname2url_win(self):
self.assertEqual(fn('\\\\?\\C:\\dir'), '///C:/dir')
self.assertEqual(fn('\\\\?\\unc\\server\\share\\dir'), '//server/share/dir')
self.assertEqual(fn("C:"), '///C:')
self.assertEqual(fn("C:\\"), '///C:')
self.assertEqual(fn("C:\\"), '///C:/')
self.assertEqual(fn('C:\\a\\b.c'), '///C:/a/b.c')
self.assertEqual(fn('C:\\a\\b.c\\'), '///C:/a/b.c/')
self.assertEqual(fn('C:\\a\\\\b.c'), '///C:/a//b.c')
self.assertEqual(fn('C:\\a\\b%#c'), '///C:/a/b%25%23c')
self.assertEqual(fn('C:\\a\\b\xe9'), '///C:/a/b%C3%A9')
self.assertEqual(fn('C:\\foo\\bar\\spam.foo'), "///C:/foo/bar/spam.foo")
Expand Down Expand Up @@ -1563,13 +1565,15 @@ def test_url2pathname_win(self):
self.assertEqual(fn("///C|"), 'C:')
self.assertEqual(fn("///C:"), 'C:')
self.assertEqual(fn('///C:/'), 'C:\\')
self.assertEqual(fn('/C|//'), 'C:\\')
self.assertEqual(fn('/C|//'), 'C:\\\\')
self.assertEqual(fn('///C|/path'), 'C:\\path')
# No DOS drive
self.assertEqual(fn("///C/test/"), '\\\\\\C\\test\\')
self.assertEqual(fn("////C/test/"), '\\\\C\\test\\')
# DOS drive paths
self.assertEqual(fn('C:/path/to/file'), 'C:\\path\\to\\file')
self.assertEqual(fn('C:/path/to/file/'), 'C:\\path\\to\\file\\')
self.assertEqual(fn('C:/path/to//file'), 'C:\\path\\to\\\\file')
self.assertEqual(fn('C|/path/to/file'), 'C:\\path\\to\\file')
self.assertEqual(fn('/C|/path/to/file'), 'C:\\path\\to\\file')
self.assertEqual(fn('///C|/path/to/file'), 'C:\\path\\to\\file')
Expand All @@ -1583,6 +1587,9 @@ def test_url2pathname_win(self):
# Localhost paths
self.assertEqual(fn('//localhost/C:/path/to/file'), 'C:\\path\\to\\file')
self.assertEqual(fn('//localhost/C|/path/to/file'), 'C:\\path\\to\\file')
# Percent-encoded forward slashes are preserved for backwards compatibility
self.assertEqual(fn('C:/foo%2fbar'), 'C:\\foo/bar')
self.assertEqual(fn('//server/share/foo%2fbar'), '\\\\server\\share\\foo/bar')
# Round-tripping
paths = ['C:',
r'\\\C\test\\',
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Fix issue where :func:`urllib.request.pathname2url` and
:func:`~urllib.request.url2pathname` removed slashes from Windows DOS drive
paths and URLs.

0 comments on commit 54c63a3

Please sign in to comment.