Skip to content

Commit dd26d23

Browse files
committed
updated _http_get_bytes_range to resolve Sudden data error during training #766
1 parent e192068 commit dd26d23

File tree

1 file changed

+20
-7
lines changed

1 file changed

+20
-7
lines changed

olmo/util.py

+20-7
Original file line numberDiff line numberDiff line change
@@ -704,14 +704,27 @@ def _http_file_size(scheme: str, host_name: str, path: str) -> int:
704704
def _http_get_bytes_range(scheme: str, host_name: str, path: str, bytes_start: int, num_bytes: int) -> bytes:
705705
import requests
706706

707-
response = requests.get(
708-
f"{scheme}://{host_name}/{path}", headers={"Range": f"bytes={bytes_start}-{bytes_start+num_bytes-1}"}
707+
max_retries = 5
708+
attempt = 0
709+
while attempt < max_retries:
710+
try:
711+
response = requests.get(
712+
f"{scheme}://{host_name}/{path}",
713+
headers={"Range": f"bytes={bytes_start}-{bytes_start+num_bytes-1}"},
714+
)
715+
result = response.content
716+
if len(result) == num_bytes:
717+
return result
718+
719+
log.warning(f"Expected {num_bytes} bytes, but got {len(result)}. Retrying...")
720+
721+
except requests.exceptions.RequestException as e:
722+
log.warning(f"Attempt {attempt+1}/{max_retries}. Network error: {e}. Retrying...")
723+
attempt += 1
724+
time.sleep(2**attempt)
725+
raise ValueError(
726+
f"Failed to download {num_bytes} bytes from {scheme}://{host_name}/{path} after {max_retries} attempts."
709727
)
710-
result = response.content
711-
assert (
712-
len(result) == num_bytes
713-
), f"expected {num_bytes} bytes, got {len(result)}" # Some web servers silently ignore range requests and send everything
714-
return result
715728

716729

717730
def save_hf_dataset_to_disk(

0 commit comments

Comments
 (0)