Skip to content

Commit f8bd6b4

Browse files
authored
Merge pull request #102 from kuefmz/config_via_auth
Update request path based on what was found in Archivo
2 parents 60bfb9c + 967ecba commit f8bd6b4

File tree

3 files changed

+51
-22
lines changed

3 files changed

+51
-22
lines changed

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,9 @@ __pycache__/
44
*$py.class
55
*.pem
66

7+
ontologytimemachine/utils/archivo_ontologies_download.txt
8+
ontologytimemachine/utils/archivo_ontologies_hash.txt
9+
710
# C extensions
811
*.so
912

ontologytimemachine/proxy_wrapper.py

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,10 @@ def get_request_host(self) -> str:
3939
def get_request_path(self) -> str:
4040
pass
4141

42+
@abstractmethod
43+
def set_request_path(self, new_path) -> None:
44+
pass
45+
4246
@abstractmethod
4347
def get_request_headers(self) -> Dict[str, str]:
4448
pass
@@ -84,6 +88,10 @@ def get_request_host(self) -> str:
8488
def get_request_path(self) -> str:
8589
return self.request.path.decode("utf-8")
8690

91+
def set_request_path(self, new_path: str) -> None:
92+
self.request.path = new_path.encode("utf-8")
93+
logger.info(f"Request path set to: {new_path}")
94+
8795
def get_request_headers(self) -> Dict[str, str]:
8896
headers: Dict[str, str] = {}
8997
for k, v in self.request.headers.items():
@@ -100,16 +108,20 @@ def set_request_accept_header(self, mime_type: str) -> None:
100108

101109
def get_request_url_host_path(self) -> Tuple[str, str, str]:
102110
logger.info("Get ontology from request")
103-
if (self.is_get_request or self.is_head_request) and not self.request.host:
111+
if (
112+
(self.is_get_request or self.is_head_request)
113+
and not self.request.host
114+
and not self.get_request_host()
115+
):
104116
for k, v in self.request.headers.items():
105117
if v[0].decode("utf-8") == "Host":
106118
host = v[1].decode("utf-8")
107-
path = self.request.path.decode("utf-8")
119+
path = self.get_request_path()
108120
url = f"https://{host}{path}"
109121
else:
110-
host = self.request.host.decode("utf-8")
111-
path = self.request.path.decode("utf-8")
112-
url = str(self.request._url)
122+
host = self.get_request_host()
123+
path = self.get_request_path()
124+
url = f"http://{host}{path}"
113125

114126
logger.info(f"Ontology: {url}")
115127
return url, host, path

ontologytimemachine/utils/proxy_logic.py

Lines changed: 31 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,7 @@ def is_archivo_ontology_request(wrapped_request):
109109
if request_path.endswith("/"):
110110
request_path = request_path.rstrip("/")
111111
if (request_host, request_path) in ARCHIVO_PARSED_URLS:
112+
wrapped_request.set_request_path(request_path)
112113
logger.info(f"Requested URL: {request_host+request_path} is in Archivo")
113114
return True
114115

@@ -117,17 +118,27 @@ def is_archivo_ontology_request(wrapped_request):
117118
path_parts = request_path.split("/")
118119
new_path = "/".join(path_parts[:-1])
119120

120-
if ((request_host, new_path) in ARCHIVO_PARSED_URLS) or (
121-
(request_host, new_path + "/") in ARCHIVO_PARSED_URLS
122-
):
123-
logger.info(f"Requested URL: {request_host+request_path} is in Archivo")
121+
if (request_host, new_path) in ARCHIVO_PARSED_URLS:
122+
wrapped_request.set_request_path(new_path)
123+
logger.info(f"Requested URL: {request_host+new_path} is in Archivo")
124+
return True
125+
126+
new_path = new_path + "/"
127+
if (request_host, new_path) in ARCHIVO_PARSED_URLS:
128+
wrapped_request.set_request_path(new_path)
129+
logger.info(f"Requested URL: {request_host+new_path} is in Archivo")
124130
return True
125131

126132
new_path = "/".join(path_parts[:-2])
127-
if ((request_host, new_path) in ARCHIVO_PARSED_URLS) or (
128-
(request_host, new_path + "/") in ARCHIVO_PARSED_URLS
129-
):
130-
logger.info(f"Requested URL: {request_host+request_path} is in Archivo")
133+
if (request_host, new_path) in ARCHIVO_PARSED_URLS:
134+
wrapped_request.set_request_path(new_path)
135+
logger.info(f"Requested URL: {request_host+new_path} is in Archivo")
136+
return True
137+
138+
new_path = new_path + "/"
139+
if (request_host, new_path) in ARCHIVO_PARSED_URLS:
140+
wrapped_request.set_request_path(new_path)
141+
logger.info(f"Requested URL: {request_host+new_path} is in Archivo")
131142
return True
132143

133144
logger.info(f"Requested URL: {request_host+request_path} is NOT in Archivo")
@@ -140,7 +151,7 @@ def request_ontology(url, headers, disableRemovingRedirects=False, timeout=5):
140151
response = requests.get(
141152
url=url, headers=headers, allow_redirects=allow_redirects, timeout=5
142153
)
143-
logger.info("Successfully fetched original ontology")
154+
logger.info("Successfully fetched ontology")
144155
return response
145156
except Exception as e:
146157
logger.error(f"Error fetching original ontology: {e}")
@@ -154,7 +165,6 @@ def proxy_logic(wrapped_request, config):
154165
set_onto_format_headers(wrapped_request, config)
155166

156167
headers = wrapped_request.get_request_headers()
157-
ontology, _, _ = wrapped_request.get_request_url_host_path()
158168

159169
# if the requested format is not in Archivo and the ontoVersion is not original
160170
# we can stop because the archivo request will not go through
@@ -164,15 +174,16 @@ def proxy_logic(wrapped_request, config):
164174
return mock_response_500
165175

166176
if config.ontoVersion == OntoVersion.ORIGINAL:
177+
ontology, _, _ = wrapped_request.get_request_url_host_path()
167178
response = fetch_original(ontology, headers, config)
168179
elif config.ontoVersion == OntoVersion.ORIGINAL_FAILOVER_LIVE_LATEST:
169180
response = fetch_failover(
170-
wrapped_request, ontology, headers, config.disableRemovingRedirects
181+
wrapped_request, headers, config.disableRemovingRedirects
171182
)
172183
elif config.ontoVersion == OntoVersion.LATEST_ARCHIVED:
173184
response = fetch_latest_archived(wrapped_request, ontology, headers)
174185
elif config.ontoVersion == OntoVersion.LATEST_ARCHIVED:
175-
response = fetch_timestamp_archived(wrapped_request, ontology, headers, config)
186+
response = fetch_timestamp_archived(wrapped_request, headers, config)
176187
# Commenting the manifest related part because it is not supported in the current version
177188
# elif ontoVersion == 'dependencyManifest':
178189
# response = fetch_dependency_manifest(ontology, headers, manifest)
@@ -187,7 +198,8 @@ def fetch_original(ontology, headers, disableRemovingRedirects):
187198

188199

189200
# Failover mode
190-
def fetch_failover(wrapped_request, ontology, headers, disableRemovingRedirects):
201+
def fetch_failover(wrapped_request, headers, disableRemovingRedirects):
202+
ontology, _, _ = wrapped_request.get_request_url_host_path()
191203
logger.info(f"Fetching original ontology with failover from URL: {ontology}")
192204
original_response = request_ontology(ontology, headers, disableRemovingRedirects)
193205
if original_response.status_code in passthrough_status_codes:
@@ -204,36 +216,38 @@ def fetch_failover(wrapped_request, ontology, headers, disableRemovingRedirects)
204216
return original_response
205217
else:
206218
logging.info(f"The returned type is not the same as the requested one")
207-
return fetch_latest_archived(wrapped_request, ontology, headers)
219+
return fetch_latest_archived(wrapped_request, headers)
208220
else:
209221
logger.info(
210222
f"The returend status code is not accepted: {original_response.status_code}"
211223
)
212-
return fetch_latest_archived(wrapped_request, ontology, headers)
224+
return fetch_latest_archived(wrapped_request, headers)
213225

214226

215227
# Fetch the lates version from archivo (no timestamp defined)
216-
def fetch_latest_archived(wrapped_request, ontology, headers):
228+
def fetch_latest_archived(wrapped_request, headers):
217229
if not is_archivo_ontology_request(wrapped_request):
218230
logger.info(
219231
"Data needs to be fetched from Archivo, but ontology is not available on Archivo."
220232
)
221233
return mock_response_404()
222234
logger.info("Fetch latest archived")
223235
format = get_format_from_accept_header(headers)
236+
ontology, _, _ = wrapped_request.get_request_url_host_path()
224237
dbpedia_url = f"{archivo_api}?o={ontology}&f={format}"
225238
logger.info(f"Fetching from DBpedia Archivo API: {dbpedia_url}")
226239
return request_ontology(dbpedia_url, headers)
227240

228241

229-
def fetch_timestamp_archived(wrapped_request, ontology, headers, config):
242+
def fetch_timestamp_archived(wrapped_request, headers, config):
230243
if not is_archivo_ontology_request(wrapped_request):
231244
logger.info(
232245
"Data needs to be fetched from Archivo, but ontology is not available on Archivo."
233246
)
234247
return mock_response_404()
235248
logger.info("Fetch archivo timestamp")
236249
format = get_format_from_accept_header(headers)
250+
ontology, _, _ = wrapped_request.get_request_url_host_path()
237251
dbpedia_url = f"{archivo_api}?o={ontology}&f={format}&v={config.timestamp}"
238252
logger.info(f"Fetching from DBpedia Archivo API: {dbpedia_url}")
239253
return request_ontology(dbpedia_url, headers)

0 commit comments

Comments
 (0)