Skip to content

Commit b5f9391

Browse files
authored
Fix: HTTPFileSystem isdir downloads the whole file issue (#1889)
1 parent b669a80 commit b5f9391

File tree

2 files changed

+23
-8
lines changed

2 files changed

+23
-8
lines changed

fsspec/implementations/http.py

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -158,14 +158,24 @@ async def _ls_real(self, url, detail=True, **kwargs):
158158
session = await self.set_session()
159159
async with session.get(self.encode_url(url), **self.kwargs) as r:
160160
self._raise_not_found_for_status(r, url)
161-
try:
162-
text = await r.text()
163-
if self.simple_links:
164-
links = ex2.findall(text) + [u[2] for u in ex.findall(text)]
165-
else:
166-
links = [u[2] for u in ex.findall(text)]
167-
except UnicodeDecodeError:
168-
links = [] # binary, not HTML
161+
162+
if "Content-Type" in r.headers:
163+
mimetype = r.headers["Content-Type"].partition(";")[0]
164+
else:
165+
mimetype = None
166+
167+
if mimetype in ("text/html", None):
168+
try:
169+
text = await r.text(errors="ignore")
170+
if self.simple_links:
171+
links = ex2.findall(text) + [u[2] for u in ex.findall(text)]
172+
else:
173+
links = [u[2] for u in ex.findall(text)]
174+
except UnicodeDecodeError:
175+
links = [] # binary, not HTML
176+
else:
177+
links = []
178+
169179
out = set()
170180
parts = urlparse(url)
171181
for l in links:

fsspec/implementations/tests/test_http.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,11 @@ def test_glob_return_subfolders(server):
139139

140140

141141
def test_isdir(server):
142+
h = fsspec.filesystem("http", headers={"give_mimetype": "true"})
143+
assert h.isdir(server.address + "/index/")
144+
assert not h.isdir(server.realfile)
145+
assert not h.isdir(server.address + "doesnotevenexist")
146+
142147
h = fsspec.filesystem("http")
143148
assert h.isdir(server.address + "/index/")
144149
assert not h.isdir(server.realfile)

0 commit comments

Comments
 (0)