Skip to content

Commit b842cf4

Browse files
authored
http file system directed to stream by an "Accept-Ranges": "none" response (#1631)
1 parent 1f61512 commit b842cf4

File tree

3 files changed

+31
-25
lines changed

3 files changed

+31
-25
lines changed

fsspec/implementations/http.py

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -358,9 +358,10 @@ def _open(
358358
kw = self.kwargs.copy()
359359
kw["asynchronous"] = self.asynchronous
360360
kw.update(kwargs)
361-
size = size or self.info(path, **kwargs)["size"]
361+
info = {}
362+
size = size or info.update(self.info(path, **kwargs)) or info["size"]
362363
session = sync(self.loop, self.set_session)
363-
if block_size and size:
364+
if block_size and size and info.get("partial", True):
364365
return HTTPFile(
365366
self,
366367
path,
@@ -520,9 +521,9 @@ async def _isdir(self, path):
520521

521522
class HTTPFile(AbstractBufferedFile):
522523
"""
523-
A file-like object pointing to a remove HTTP(S) resource
524+
A file-like object pointing to a remote HTTP(S) resource
524525
525-
Supports only reading, with read-ahead of a predermined block-size.
526+
Supports only reading, with read-ahead of a predetermined block-size.
526527
527528
In the case that the server does not supply the filesize, only reading of
528529
the complete file in one go is supported.
@@ -835,10 +836,6 @@ async def _file_info(url, session, size_policy="head", **kwargs):
835836
async with r:
836837
r.raise_for_status()
837838

838-
# TODO:
839-
# recognise lack of 'Accept-Ranges',
840-
# or 'Accept-Ranges': 'none' (not 'bytes')
841-
# to mean streaming only, no random access => return None
842839
if "Content-Length" in r.headers:
843840
# Some servers may choose to ignore Accept-Encoding and return
844841
# compressed content, in which case the returned size is unreliable.
@@ -853,6 +850,11 @@ async def _file_info(url, session, size_policy="head", **kwargs):
853850
if "Content-Type" in r.headers:
854851
info["mimetype"] = r.headers["Content-Type"].partition(";")[0]
855852

853+
if r.headers.get("Accept-Ranges") == "none":
854+
# Some servers may explicitly discourage partial content requests, but
855+
# the lack of "Accept-Ranges" does not always indicate they would fail
856+
info["partial"] = False
857+
856858
info["url"] = str(r.url)
857859

858860
for checksum_field in ["ETag", "Content-MD5", "Digest"]:

fsspec/implementations/tests/test_http.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -237,18 +237,22 @@ def test_random_access(server, headers):
237237
@pytest.mark.parametrize(
238238
"headers",
239239
[
240-
{"ignore_range": "true", "head_ok": "true", "head_give_length": "true"},
240+
# HTTPFile seeks, response headers lack size, assumed no range support
241+
{"head_ok": "true", "head_give_length": "true"},
242+
# HTTPFile seeks, response is not a range
241243
{"ignore_range": "true", "give_length": "true"},
242244
{"ignore_range": "true", "give_range": "true"},
245+
# HTTPStreamFile does not seek (past 0)
246+
{"accept_range": "none", "head_ok": "true", "give_length": "true"},
243247
],
244248
)
245249
def test_no_range_support(server, headers):
246250
h = fsspec.filesystem("http", headers=headers)
247251
url = server + "/index/realfile"
248252
with h.open(url, "rb") as f:
249253
# Random access is not possible if the server doesn't respect Range
250-
f.seek(5)
251254
with pytest.raises(ValueError):
255+
f.seek(5)
252256
f.read(10)
253257

254258
# Reading from the beginning should still work

fsspec/tests/conftest.py

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -135,10 +135,10 @@ def read_chunks(self):
135135
self.rfile.readline()
136136

137137
def do_HEAD(self):
138+
r_headers = {}
138139
if "head_not_auth" in self.headers:
139-
return self._respond(
140-
403, {"Content-Length": 123}, b"not authorized for HEAD request"
141-
)
140+
r_headers["Content-Length"] = 123
141+
return self._respond(403, r_headers, b"not authorized for HEAD request")
142142
elif "head_ok" not in self.headers:
143143
return self._respond(405)
144144

@@ -148,23 +148,23 @@ def do_HEAD(self):
148148
return self._respond(404)
149149

150150
if ("give_length" in self.headers) or ("head_give_length" in self.headers):
151-
response_headers = {"Content-Length": len(file_data)}
152151
if "zero_length" in self.headers:
153-
response_headers["Content-Length"] = 0
152+
r_headers["Content-Length"] = 0
154153
elif "gzip_encoding" in self.headers:
155154
file_data = gzip.compress(file_data)
156-
response_headers["Content-Encoding"] = "gzip"
157-
response_headers["Content-Length"] = len(file_data)
158-
159-
self._respond(200, response_headers)
155+
r_headers["Content-Encoding"] = "gzip"
156+
r_headers["Content-Length"] = len(file_data)
157+
else:
158+
r_headers["Content-Length"] = len(file_data)
160159
elif "give_range" in self.headers:
161-
self._respond(
162-
200, {"Content-Range": f"0-{len(file_data) - 1}/{len(file_data)}"}
163-
)
160+
r_headers["Content-Range"] = f"0-{len(file_data) - 1}/{len(file_data)}"
164161
elif "give_etag" in self.headers:
165-
self._respond(200, {"ETag": "xxx"})
166-
else:
167-
self._respond(200) # OK response, but no useful info
162+
r_headers["ETag"] = "xxx"
163+
164+
if self.headers.get("accept_range") == "none":
165+
r_headers["Accept-Ranges"] = "none"
166+
167+
self._respond(200, r_headers)
168168

169169

170170
@contextlib.contextmanager

0 commit comments

Comments
 (0)