8
8
import os
9
9
import pathlib
10
10
import re
11
- import requests
12
11
import tempfile
13
12
import time
14
13
import urllib .error
15
- from urllib .parse import urlparse
16
14
from dataclasses import dataclass
17
15
from typing import TYPE_CHECKING , Optional
16
+ from urllib .parse import urlparse
18
17
19
18
import pandas as pd
20
19
import PIL .Image
21
20
import PIL .ImageFile
21
+ import requests
22
22
import torch
23
23
import torchvision
24
24
29
29
30
30
PIL .ImageFile .LOAD_TRUNCATED_IMAGES = True
31
31
32
+ # This is polite and required by some hosts
33
+ # see: https://foundation.wikimedia.org/wiki/Policy:User-Agent_policy
34
+ USER_AGENT = "AntennaInsectDataPlatform/1.0 (https://insectai.org)"
35
+
32
36
33
37
def get_device (device_str = None ) -> torch .device :
34
38
"""
@@ -51,7 +55,8 @@ def get_or_download_file(
51
55
Fetch a file from a URL or local path. If the path is a URL, download the file.
52
56
If the URL has already been downloaded, return the existing local path.
53
57
If the path is a local path, return the path.
54
- >>> filepath = get_or_download_file("https://example.uk/images/31-20230919033000-snapshot.jpg?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=451d406b7eb1113e1bb05c083ce51481%2F20240429%2F")
58
+ >>> filepath = get_or_download_file(
59
+ "https://example.uk/images/31-20230919033000-snapshot.jpg?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=451d406b7eb1113e1bb05c083ce51481%2F20240429%2F")
55
60
>>> filepath.name
56
61
'31-20230919033000-snapshot.jpg'
57
62
>>> filepath = get_or_download_file("/home/user/images/31-20230919033000-snapshot.jpg")
@@ -60,10 +65,10 @@ def get_or_download_file(
60
65
"""
61
66
if not path_or_url :
62
67
raise Exception ("Specify a URL or path to fetch file from." )
63
-
68
+
64
69
destination_dir = destination_dir or os .environ .get ("LOCAL_WEIGHTS_PATH" )
65
70
fname = pathlib .Path (urlparse (path_or_url ).path ).name
66
-
71
+
67
72
if destination_dir :
68
73
destination_dir = pathlib .Path (destination_dir )
69
74
if prefix :
@@ -78,22 +83,23 @@ def get_or_download_file(
78
83
raise Exception (
79
84
"No destination directory specified by LOCAL_WEIGHTS_PATH or app settings."
80
85
)
81
-
86
+
82
87
if local_filepath and local_filepath .exists ():
83
88
logger .info (f"Using existing { local_filepath } " )
84
89
return local_filepath
85
90
else :
86
91
logger .info (f"Downloading { path_or_url } to { local_filepath } " )
87
-
92
+
88
93
# Check if the path is a URL
89
- if path_or_url .startswith (('http://' , 'https://' )):
90
- response = requests .get (path_or_url , stream = True )
94
+ if path_or_url .startswith (("http://" , "https://" )):
95
+ headers = {"User-Agent" : USER_AGENT }
96
+ response = requests .get (path_or_url , stream = True , headers = headers )
91
97
response .raise_for_status () # Raise an exception for HTTP errors
92
-
93
- with open (local_filepath , 'wb' ) as f :
98
+
99
+ with open (local_filepath , "wb" ) as f :
94
100
for chunk in response .iter_content (chunk_size = 8192 ):
95
101
f .write (chunk )
96
-
102
+
97
103
logger .info (f"Downloaded to { local_filepath } " )
98
104
return local_filepath
99
105
else :
0 commit comments