diff --git a/Dockerfile b/Dockerfile
index 3fb8e34..bc75693 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -22,5 +22,5 @@ RUN poetry config virtualenvs.create false
RUN poetry install --no-dev && rm pyproject.toml
-CMD python3 -m proxy --hostname 0.0.0.0 --port $PORT --plugins ontologytimemachine.custom_proxy.OntologyTimeMachinePlugin
+CMD python3 -m proxy --ca-key-file ca-key.pem --ca-cert-file ca-cert.pem --ca-signing-key-file ca-signing-key.pem --hostname 0.0.0.0 --port $PORT --plugins ontologytimemachine.custom_proxy.OntologyTimeMachinePlugin
diff --git a/README.md b/README.md
index 51247f5..413afd3 100644
--- a/README.md
+++ b/README.md
@@ -22,142 +22,16 @@ cp ca-signing-key.pem ~/ontology-time-machine/ca-signing-key.pem
### Curl tests:
- curl -x http://0.0.0.0:8899 --cacert ca-cert.pem http://www.google.com
-- curl -x http://0.0.0.0:8899 -H "Accept: text/turtle" --cacert ca-cert.pem http://ontologi.es/days#
- curl -x http://0.0.0.0:8899 -H "Accept: text/turtle" --cacert ca-cert.pem http://linked-web-apis.fit.cvut.cz/ns/core
- curl -x http://0.0.0.0:8899 --cacert ca-cert.pem https://www.w3id.org/simulation/ontology/
- curl -x http://0.0.0.0:8899 --cacert ca-cert.pem https://www.w3.org/ns/ldt#
- curl -x http://0.0.0.0:8899 --cacert ca-cert.pem https://raw.githubusercontent.com/br0ast/simulationontology/main/Ontology/simulationontology.owl
- curl -x http://0.0.0.0:8899 -H "Accept: text/turtle" --cacert ca-cert.pem http://bblfish.net/work/atom-owl/2006-06-06/
- curl -x http://0.0.0.0:8899 -H "Accept: text/turtle" --cacert ca-cert.pem http://purl.org/makolab/caont/
-
-
-### Not working:
- curl -x http://0.0.0.0:8899 --cacert ca-cert.pem https://vocab.eccenca.com/auth/
+- curl -x http://0.0.0.0:8899 -H "Accept: text/turtle" --cacert ca-cert.pem http://dbpedia.org/ontology/Person
+### Not working:
+- curl -x http://0.0.0.0:8899 -H "Accept: text/turtle" --cacert ca-cert.pem http://ontologi.es/days#
-
-from proxy.http.proxy import HttpProxyBasePlugin
-from proxy.http.parser import HttpParser, httpParserTypes
-from proxy.common.utils import build_http_response
-from proxy.http.methods import HttpMethods
-from ontologytimemachine.utils.utils import proxy_logic_http, proxy_logic_https
-from ontologytimemachine.utils.utils import check_if_archivo_ontology_requested
-from ontologytimemachine.utils.utils import get_headers_and_expected_type
-from requests.exceptions import SSLError, Timeout, ConnectionError, RequestException
-from http.client import responses
-import proxy
-import sys
-import requests
-import logging
-
-
-logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
-logger = logging.getLogger(__name__)
-
-
-class OntologyTimeMachinePlugin(HttpProxyBasePlugin):
- #def __init__(self, *args, **kwargs):
- # super().__init__(*args, **kwargs)
-
-
- def before_upstream_connection(self, request: HttpParser):
- logger.debug('Before upstream')
- print(request.method)
- scheme = 'https' if request.method == b'CONNECT' else 'http'
- if scheme == 'https':
- logger.debug('The request is HTTPS, forward as it is')
- return request
-
- ontology_request = check_if_archivo_ontology_requested(request)
- if ontology_request:
- logger.debug('The request is for an ontology')
- try:
- ontology_url = str(request._url)
- headers, _ = get_headers_and_expected_type(request)
- response = requests.get(ontology_url, headers=headers)
- if response.status_code == 502:
- logger.error('Received 502 Bad Gateway error')
- response = proxy_logic_http(request)
- logger.debug('Queue response')
- self.queue_response(response)
- return None
- else:
- logger.debug('The request is correct')
- return request
- except (SSLError, Timeout, ConnectionError, RequestException) as e:
- logger.error(f'Network-related exception occurred {e}')
- response = proxy_logic_http(request)
- logger.debug('Queue response')
- self.queue_response(response)
- return None
- return request
-
-
- def handle_client_request(self, request: HttpParser):
- logger.debug('HTTP call')
- logger.debug(request._url)
-
- ontology_request = check_if_archivo_ontology_requested(request)
- if not ontology_request:
- logger.info('No ontology is asked, forward original request')
- return request
-
- response = proxy_logic_http(request)
- self.queue_response(response)
-
- return None
-
-
- def handle_upstream_chunk(self, chunk: memoryview):
- logger.info('HTTPS call')
-
- try:
- # Parse the HTTP response to handle different cases
- parser = HttpParser(httpParserTypes.RESPONSE_PARSER)
- parser.parse(memoryview(chunk))
- code = int(parser.code.decode('utf-8'))
- if code >= 100 and code < 200:
- return chunk
- elif code >= 201 and code <= 204:
- return chunk
- elif code == 451:
- return chunk
- else:
- response = proxy_logic_https(parser)
- logger.debug('Queue response')
- self.queue_response(response)
- return None
- except UnicodeDecodeError:
- logger.warning('Received non-text chunk, cannot decode')
- except Exception as e:
- logger.error(f'Exception occurred while handling upstream chunk: {e}')
-
- return chunk
-
- def queue_response(self, response):
- self.client.queue(
- build_http_response(
- response.status_code,
- reason=bytes(responses[response.status_code], 'utf-8'),
- headers={
- b'Content-Type': bytes(response.headers.get('Content-Type'), 'utf-8')
- },
- body=response.content
- )
- )
-
-
-if __name__ == '__main__':
- sys.argv += [
- '--ca-key-file', 'ca-key.pem',
- '--ca-cert-file', 'ca-cert.pem',
- '--ca-signing-key-file', 'ca-signing-key.pem',
- ]
- sys.argv += [
- '--hostname', '0.0.0.0',
- '--port', '8899',
- '--plugins', __name__ + '.OntologyTimeMachinePlugin'
- ]
- logger.info("Starting OntologyTimeMachineProxy server...")
- proxy.main()
\ No newline at end of file
diff --git a/ontologytimemachine/custom_proxy.py b/ontologytimemachine/custom_proxy.py
index c3ee1f4..95cf79f 100644
--- a/ontologytimemachine/custom_proxy.py
+++ b/ontologytimemachine/custom_proxy.py
@@ -2,15 +2,13 @@
from proxy.http.parser import HttpParser, httpParserTypes
from proxy.common.utils import build_http_response
from proxy.http.methods import HttpMethods
-from ontologytimemachine.utils.utils import proxy_logic_http, proxy_logic_https
+from ontologytimemachine.utils.utils import proxy_logic, parse_arguments
from ontologytimemachine.utils.utils import check_if_archivo_ontology_requested
-from ontologytimemachine.utils.utils import get_headers_and_expected_type
-from ontologytimemachine.utils.utils import get_ontology_from_request
+from ontologytimemachine.utils.mock_responses import mock_response_403
from requests.exceptions import SSLError, Timeout, ConnectionError, RequestException
from http.client import responses
import proxy
import sys
-import requests
import logging
@@ -18,74 +16,65 @@
PORT = '8899'
-logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)
-
class OntologyTimeMachinePlugin(HttpProxyBasePlugin):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
+ (self.ontoFormat, self.ontoVersion, self.only_ontologies,
+ self.https_intercept, self.inspect_redirects, self.forward_headers,
+ self.subject_binary_search_threshold) = parse_arguments()
def before_upstream_connection(self, request: HttpParser):
- logger.debug('Before upstream')
- logger.debug(request.method)
- scheme = 'https' if request.method == b'CONNECT' else 'http'
- if scheme == 'https':
- logger.debug('The request is HTTPS, forward as it is')
- logger.debug(f'Request host: {request.host}')
- logger.debug(f'Request path: {request.path}')
- return request
+ logger.info('Before upstream connection hook')
+ logger.info(f'Request method: {request.method} - Request host: {request.host} - Request path: {request.path} - Request headers: {request.headers}')
+
+ if request.method == b'CONNECT':
+ logger.info(f'HTTPS interception mode: {self.https_intercept}')
+ # Only intercept if interception is enabled
+ if self.https_intercept in ['all', 'archivo']:
+ return request
+ else:
+ return None
+
ontology_request = check_if_archivo_ontology_requested(request)
+ # If only ontology mode, return None in all other cases
+ if self.only_ontologies and not ontology_request:
+ logger.warning('Request denied: not an ontology request and only ontologies mode is enabled')
+ self.queue_response(mock_response_403)
+ return None
+
if ontology_request:
logger.debug('The request is for an ontology')
- try:
- ontology_url = str(request._url)
- headers, _ = get_headers_and_expected_type(request)
- response = requests.get(ontology_url, headers=headers, timeout=5)
- if response.status_code == 502:
- logger.error('Received 502 Bad Gateway error')
- response = proxy_logic_http(request)
- logger.debug('Queue response')
- self.queue_response(response)
- return None
- else:
- logger.debug('The request is correct')
- return request
- except (SSLError, Timeout, ConnectionError, RequestException) as e:
- logger.error(f'Network-related exception occurred {e}')
- response = proxy_logic_http(request)
- logger.debug('Queue response')
- self.queue_response(response)
- return None
+ response = proxy_logic(request, self.ontoFormat, self.ontoVersion)
+ self.queue_response(response)
+ return None
return request
def handle_client_request(self, request: HttpParser):
- logger.debug('HTTP call')
+ logger.info('Handle client request hook')
+ logger.info(f'Request method: {request.method} - Request host: {request.host} - Request path: {request.path} - Request headers: {request.headers}')
logger.debug(request.method)
- scheme = 'https' if request.method == b'CONNECT' else 'http'
- if scheme == 'https':
- logger.debug('The request is HTTPS, forward as it is')
+ if request.method == b'CONNECT':
return request
ontology_request = check_if_archivo_ontology_requested(request)
if not ontology_request:
- logger.info('No ontology is asked, forward original request')
+ logger.info('The requested IRI is not part of DBpedia Archivo')
return request
- logger.debug('Call proxy logic')
- response = proxy_logic_http(request)
+ response = proxy_logic(request, self.ontoFormat, self.ontoVersion)
self.queue_response(response)
return None
def handle_upstream_chunk(self, chunk: memoryview):
- logger.info('HTTPS call')
-
return chunk
@@ -103,6 +92,7 @@ def queue_response(self, response):
if __name__ == '__main__':
+
sys.argv += [
'--ca-key-file', 'ca-key.pem',
'--ca-cert-file', 'ca-cert.pem',
diff --git a/ontologytimemachine/utils/mock_responses.py b/ontologytimemachine/utils/mock_responses.py
new file mode 100644
index 0000000..d6b0e11
--- /dev/null
+++ b/ontologytimemachine/utils/mock_responses.py
@@ -0,0 +1,38 @@
+import requests
+
+
+def mock_response_200():
+ mock_response = requests.Response()
+ mock_response.status_code = 200
+ mock_response.url = 'https://example.com/success'
+ mock_response.headers['Content-Type'] = 'text/html'
+ mock_response._content = b'
To be implemented
'
+ return mock_response
+
+
+def mock_response_403():
+ mock_response = requests.Response()
+ mock_response.status_code = 403
+ mock_response.url = 'https://example.com/forbidden'
+ mock_response.headers['Content-Type'] = 'text/html'
+ mock_response._content = b'403 Forbidden
'
+ return mock_response
+
+
+
+def mock_response_404():
+ mock_response = requests.Response()
+ mock_response.status_code = 404
+ mock_response.url = 'https://example.com/resource-not-found'
+ mock_response.headers['Content-Type'] = 'text/html'
+ mock_response._content = b'404 Not Found
'
+ return mock_response
+
+
+def mock_response_500():
+ mock_response = requests.Response()
+ mock_response.status_code = 500
+ mock_response.url = 'https://example.com/internal-server-error'
+ mock_response.headers['Content-Type'] = 'text/html'
+ mock_response._content = b'500 Internal Server Error
'
+ return mock_response
\ No newline at end of file
diff --git a/ontologytimemachine/utils/utils.py b/ontologytimemachine/utils/utils.py
index db05205..227b8eb 100644
--- a/ontologytimemachine/utils/utils.py
+++ b/ontologytimemachine/utils/utils.py
@@ -1,42 +1,21 @@
from proxy.http.parser import HttpParser, httpParserTypes
from requests.exceptions import SSLError, Timeout, ConnectionError, RequestException
+from ontologytimemachine.utils.mock_responses import mock_response_403, mock_response_404, mock_response_500, mock_response_200
from http.client import responses
from urllib.parse import urlparse
import logging
import requests
+import argparse
+import mimetypes
-logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)
dbpedia_api = 'https://archivo.dbpedia.org/download'
-ontology_types = [
- 'application/turtle', # Turtle (Terse RDF Triple Language)
- 'text/turtle', # Turtle (alternative media type)
- 'application/rdf+xml', # RDF/XML
- 'text/rdf+xml', # RDF/XML (alternative media type)
- 'application/n-triples', # N-Triples
- 'text/n-triples', # N-Triples (alternative media type)
- 'application/n-quads', # N-Quads
- 'text/n-quads', # N-Quads (alternative media type)
- 'application/ld+json', # JSON-LD (JSON for Linking Data)
- 'application/trig', # TriG
- 'application/sparql-results+json', # SPARQL Query Results JSON
- 'application/json', # JSON (alternative for SPARQL Query Results JSON)
- 'application/sparql-results+xml', # SPARQL Query Results XML
- 'text/xml' # XML (alternative for SPARQL Query Results XML)
-]
-
-
-format_mapping = {
- 'application/turtle': 'ttl', # Turtle (Terse RDF Triple Language)
- 'text/turtle': 'ttl', # Turtle (alternative media type)
-}
-
-
passthrough_status_codes_http = [
100, 101, 102, 103,
200,
@@ -44,105 +23,226 @@
451,
]
+def parse_arguments():
+ parser = argparse.ArgumentParser(description='Process ontology format and version.')
+
+ # Defining ontoFormat argument with nested options
+ parser.add_argument('--ontoFormat', type=str, choices=['turtle', 'ntriples', 'rdfxml', 'htmldocu'],
+ default='turtle', help='Format of the ontology: turtle, ntriples, rdfxml, htmldocu')
+
+ parser.add_argument('--ontoPrecedence', type=str, choices=['default', 'enforcedPriority', 'always'],
+ default='enforcedPriority', help='Precedence of the ontology: default, enforcedPriority, always')
+
+ parser.add_argument('--patchAcceptUpstream', type=bool, default=False,
+ help='Defines if the Accept Header is patched upstream in original mode.')
+
+ # Defining ontoVersion argument
+ parser.add_argument('--ontoVersion', type=str, choices=['original', 'originalFailoverLive', 'originalFailoverArchivoMonitor',
+ 'latestArchive', 'timestampArchive', 'dependencyManifest'],
+ default='originalFailoverLive', help='Version of the ontology: original, originalFailoverLive, originalFailoverArchivoMonitor, latestArchive, timestampArchive, dependencyManifest')
+
+ # Enable/disable mode to only proxy requests to ontologies
+ parser.add_argument('--onlyOntologies', type=bool, default=False,
+ help='Enable/disable mode to only proxy requests to ontologies.')
+
+ # Enable HTTPS interception for specific domains
+ parser.add_argument('--httpsIntercept', type=str, choices=['none', 'archivo', 'all', 'listfilename'],
+ default='all', help='Enable HTTPS interception for specific domains: none, archivo, all, listfilename.')
+
+ # Enable/disable inspecting or removing redirects
+ parser.add_argument('--inspectRedirects', type=bool, default=True,
+ help='Enable/disable inspecting or removing redirects.')
+
+ # Enable/disable proxy forward headers
+ parser.add_argument('--forwardHeaders', type=bool, default=True,
+ help='Enable/disable proxy forward headers.')
+
+ # SubjectBinarySearchThreshold
+ parser.add_argument('--subjectBinarySearchThreshold', type=int, default=100,
+ help='SubjectBinarySearchThreshold value.')
+
+ # Proxy native parameters
+ parser.add_argument('--ca-key-file', type=str, required=True,
+ help='Path to the CA key file.')
+
+ parser.add_argument('--ca-cert-file', type=str, required=True,
+ help='Path to the CA certificate file.')
+
+ parser.add_argument('--ca-signing-key-file', type=str, required=True,
+ help='Path to the CA signing key file.')
+
+ parser.add_argument('--hostname', type=str, required=True,
+ help='Hostname for the proxy server.')
+
+ parser.add_argument('--port', type=int, required=True,
+ help='Port for the proxy server.')
+
+ parser.add_argument('--plugins', type=str, required=True,
+ help='Plugins for the proxy server.')
+
+ args = parser.parse_args()
+
+ ontoFormat = {
+ 'format': args.ontoFormat,
+ 'precedence': args.ontoPrecedence,
+ 'patchAcceptUpstream': args.patchAcceptUpstream
+ }
+
+ logger.info(f'Ontology Format: {ontoFormat}')
+ logger.info(f'Ontology Version: {args.ontoVersion}')
+ #logger.info(f'Only Ontologies Mode: {args.onlyOntologies}')
+ #logger.info(f'HTTPS Interception: {args.httpsIntercept}')
+ #logger.info(f'Inspect Redirects: {args.inspectRedirects}')
+ #logger.info(f'Forward Headers: {args.forwardHeaders}')
+ #logger.info(f'Subject Binary Search Threshold: {args.subjectBinarySearchThreshold}')
+ return ontoFormat, args.ontoVersion, args.onlyOntologies, args.httpsIntercept, args.inspectRedirects, args.forwardHeaders, args.subjectBinarySearchThreshold
+
def check_if_archivo_ontology_requested(request):
- urls = []
with open('ontologytimemachine/utils/archivo_ontologies.txt', 'r') as file:
urls = [line.strip() for line in file]
- parsed_urls = [urlparse(url).netloc for url in urls]
- _, host = get_ontology_from_request(request)
- return host in parsed_urls
-
+ parsed_urls = [(urlparse(url).netloc, urlparse(url).path) for url in urls]
-def mock_response_404():
- mock_response = requests.Response()
- mock_response.status_code = 404
- mock_response.url = 'https://example.com/resource-not-found'
- mock_response.headers['Content-Type'] = 'text/html'
- mock_response._content = b'404 Not Found
'
- return mock_response
+ _, request_host, request_path = get_ontology_from_request(request)
+ for host, path in parsed_urls:
+ if request_host == host and request_path.startswith(path):
+ return True
+ return False
-def get_headers_and_expected_type(request):
+def get_headers(request):
headers = {}
- expected_type = 'text/turtle'
for k, v in request.headers.items():
headers[v[0].decode('utf-8')] = v[1].decode('utf-8')
- if v[0].decode('utf-8') == 'Accept':
- expected_type = v[1].decode('utf-8')
- return headers, expected_type
+ return headers
def get_ontology_from_request(request):
- logger.debug('Get ontology from request')
+ logger.info('Get ontology from request')
if (request.method == b'GET' or request.method == b'HEAD') and not request.host:
for k, v in request.headers.items():
if v[0].decode('utf-8') == 'Host':
- print('host found')
host = v[1].decode('utf-8')
+ path = request.path.decode('utf-8')
ontology = 'https://' + host + request.path.decode('utf-8')
else:
host = request.host.decode('utf-8')
+ path = request.path.decode('utf-8')
ontology = str(request._url)
- logger.debug(f'Ontology: {ontology}')
- return ontology, host
+ logger.info(f'Ontology: {ontology}')
+ return ontology, host, path
+
+
+def get_mime_type(format):
+ # Guess the MIME type based on the format
+ mime_type, _ = mimetypes.guess_type(f'file.{format}')
+ # Return the guessed MIME type or a generic default if guessing fails
+ return mime_type or 'text/turtle'
+
+
+def set_onto_format_headers(request, ontoFormat, ontoVersion):
+ logger.info(f'Setting headers based on ontoFormat: {ontoFormat}')
+
+ # Determine the correct MIME type for the format
+ mime_type = get_mime_type(ontoFormat['format'])
+
+ # Check the precedence and update the 'Accept' header if necessary
+ if ontoFormat['precedence'] in ['always', 'enforcedPriority'] or \
+ (ontoFormat['precedence'] == 'default' and b'accept' not in request.headers):
+ request.headers[b'accept'] = (b'Accept', mime_type.encode('utf-8'))
+ logger.info(f'Accept header set to: {request.headers[b"accept"][1]}')
+
+ # Check if patchAcceptUpstream is true and ontoVersion is 'original'
+ if ontoFormat['patchAcceptUpstream'] and ontoVersion == 'original':
+ request.headers[b'accept'] = (b'Accept', mime_type.encode('utf-8'))
+ logger.info(f'Accept header patched upstream: {request.headers[b"accept"][1]}')
+
+
+def proxy_logic(request: HttpParser, ontoFormat, ontoVersion):
+ logger.info('Proxy has to intervene')
+ set_onto_format_headers(request, ontoFormat, ontoVersion)
+ headers = get_headers(request)
+ logger.info(f'Updated headers: {request.headers}')
+ ontology, _, _ = get_ontology_from_request(request)
+ if ontoVersion == 'original':
+ response = fetch_original(ontology, headers)
+ elif ontoVersion == 'originalFailoverLive':
+ response = fetch_failover(ontology, headers, live=True)
+ elif ontoVersion == 'originalFailoverMonitor':
+ response = fetch_failover(ontology, headers, monitor=True)
+ elif ontoVersion == 'latestArchive':
+ response = fetch_latest_archive(ontology, headers)
+ elif ontoVersion == 'timestampArchive':
+ response = fetch_timestamp_archive(ontology, headers)
+ elif ontoVersion == 'dependencyManifest':
+ response = fetch_dependency_manifest(ontology, headers)
-def proxy_logic_http(request: HttpParser):
- logger.info('Start proxy logic in case of HTTP')
- response = failover_mode_http(request)
return response
-def failover_mode_http(request):
- headers, _ = get_headers_and_expected_type(request)
- logger.debug(headers)
- logger.debug('Failover mode')
+# Fetch from the original source, no matter what
+def fetch_original(ontology, headers):
+ logger.info(f'Fetching original ontology from URL: {ontology}')
+ try:
+ response = requests.get(url=ontology, headers=headers, timeout=5)
+ logger.info('Successfully fetched original ontology')
+ return response
+ except Exception as e:
+ logger.error(f'Error fetching original ontology: {e}')
+ return mock_response_500()
+
- ontology, _ = get_ontology_from_request(request)
+# Failover mode
+def fetch_failover(ontology, headers, live=False, monitor=False):
try:
+ logger.info(f'Fetching original ontology with failover from URL: {ontology}')
response = requests.get(url=ontology, headers=headers, timeout=5)
- logger.info(f' Status code: {response.status_code}')
- if response.history:
- logger.debug("Request was redirected")
- for resp in response.history:
- logger.debug(f"{resp.status_code}, {resp.url}")
- logger.debug(f"Final destination: {response.status_code}, {response.url}")
- else:
- logger.debug("Request was not redirected")
- content_type = response.headers.get('Content-Type')
- logger.debug(content_type)
- if response.status_code in passthrough_status_codes_http and content_type in ontology_types:
- logging.info(f'We found the right answer')
+ logger.info('Successfully fetched original ontology')
+ if response.status_code in passthrough_status_codes_http:
return response
else:
- logging.info('Content type is not as expected')
- return fetch_from_dbpedia_archivo_api(ontology)
- except (SSLError, Timeout, ConnectionError, RequestException) as e:
- #logger.error("Request failed:", e)
- return fetch_from_dbpedia_archivo_api(ontology)
+ logging.info(f'Status code: {response.status_code}')
+ return fetch_from_dbpedia_archivo_api(ontology, headers)
+ except Exception as e:
+ logger.error(f'Error fetching original ontology: {e}')
+ if live:
+ logger.info('Attempting to fetch live version due to failover')
+ return fetch_from_dbpedia_archivo_api(ontology, headers)
+ elif monitor:
+ logger.info('Attempting to fetch archive monitor version due to failover')
+ # TODO
+ return mock_response_404
+ else:
+ return mock_response_500
-def proxy_logic_https(parser):
- logger.info('Start proxy logic in case of HTTPS')
- response = failover_mode_https(parser)
- return response
+# Fetch the lates version from archivo (no timestamp defined)
+def fetch_latest_archive(ontology, headers):
+ logger.info(f'Fetching latest archive ontology from URL: {ontology}/latest')
+ try:
+ response = requests.get(url=ontology, headers=headers, timeout=5)
+ logger.info('Successfully fetched latest archive ontology')
+ return response
+ except Exception as e:
+ logger.error(f'Error fetching latest archive ontology: {e}')
+ return mock_response_500
-def failover_mode_https(parser):
- code = int(parser.code.decode('utf-8'))
- if code >= 300 and code < 400:
- logger.info('Status code: 3XX')
- # Handle redirection
- redirect_url = parser.header(b'Location').decode('utf-8')
- logger.info(f'Redirected to {redirect_url}')
- response = get_data_from_redirect(redirect_url)
- return response
+def fetch_timestamp_archive(ontology, headers):
+ return mock_response_404
+
+
+def fetch_dependency_manifest(ontology, headers):
+ return mock_response_404
-def get_data_from_redirect(ontology):
+def failover_mode(request):
+ headers = get_headers(request)
+ logger.info('Failover mode')
+
+ ontology, _, _ = get_ontology_from_request(request)
try:
- response = requests.get(url=ontology, timeout=5)
- logger.info(response.status_code)
+ response = requests.get(url=ontology, headers=headers, timeout=5)
if response.history:
logger.debug("Request was redirected")
for resp in response.history:
@@ -151,24 +251,51 @@ def get_data_from_redirect(ontology):
else:
logger.debug("Request was not redirected")
content_type = response.headers.get('Content-Type')
+ logger.debug(content_type)
if response.status_code in passthrough_status_codes_http:
- logging.info(f'We found the rights answer')
return response
else:
- logging.info('Content type is not as expected')
- return fetch_from_dbpedia_archivo_api(ontology)
+ logging.info(f'Status code: {response.status_code}')
+ return fetch_from_dbpedia_archivo_api(ontology, headers)
except (SSLError, Timeout, ConnectionError, RequestException) as e:
- #logger.error("Request failed:", e)
- return fetch_from_dbpedia_archivo_api(ontology)
+ return fetch_from_dbpedia_archivo_api(ontology, headers)
-def fetch_from_dbpedia_archivo_api(ontology: str, format: str = 'ttl'):
+def fetch_from_dbpedia_archivo_api(ontology, headers):
+ format, version, versionMatching = get_parameters_from_headers(headers)
dbpedia_url = f'{dbpedia_api}?o={ontology}&f={format}'
try:
logger.info(f'Fetching from DBpedia Archivo API: {dbpedia_url}')
response = requests.get(dbpedia_url, timeout=5)
- logger.debug('Response received')
return response
except requests.exceptions.RequestException as e:
logging.error(f'Exception occurred while fetching from DBpedia Archivo API: {e}')
- return mock_response_404()
\ No newline at end of file
+ return mock_response_404()
+
+
+def map_mime_to_format(mime_type):
+ # Use the mimetypes library to get the file extension
+ extension = mimetypes.guess_extension(mime_type)
+ if not extension:
+ return None
+
+ # Map file extensions to formats
+ ext_to_format = {
+ '.rdf': 'owl',
+ '.xml': 'owl',
+ '.ttl': 'ttl',
+ '.nt': 'nt',
+ # Add more mappings if needed
+ }
+
+ return ext_to_format.get(extension, None)
+
+
+def get_parameters_from_headers(headers):
+ # Map MIME types to formats
+ mime_type = headers.get('Accept', None)
+ format = map_mime_to_format(mime_type)
+
+ version = headers.get('Version', None)
+ versionMatching = headers.get('VersionMatching', None)
+ return format, version, versionMatching
\ No newline at end of file
diff --git a/tests/test_proxy.py b/tests/test_proxy.py
index d370ac1..34dde2f 100644
--- a/tests/test_proxy.py
+++ b/tests/test_proxy.py
@@ -42,79 +42,84 @@ def start_proxy_server():
process.wait()
-#def test_babelnet():
-# iri = 'http://babelnet.org/rdf/'
-# generic_test(iri, 'text/turtle')
+def test_babelnet():
+ iri = 'http://babelnet.org/rdf/'
+ generic_test(iri, 'text/turtle')
-#def test_bag_basisregistraties():
-# iri = 'http://bag.basisregistraties.overheid.nl/def/bag'
-# generic_test(iri, 'text/turtle')
+def test_bag_basisregistraties():
+ iri = 'http://bag.basisregistraties.overheid.nl/def/bag'
+ generic_test(iri, 'text/turtle')
-#def test_bblfish():
-# iri = 'http://bblfish.net/work/atom-owl/2006-06-06/'
-# generic_test(iri, 'text/turtle')
+def test_bblfish():
+ iri = 'http://bblfish.net/work/atom-owl/2006-06-06/'
+ generic_test(iri, 'text/turtle')
-#def test_brk_basisregistraties():
-# iri = 'http://brk.basisregistraties.overheid.nl/def/brk'
-# generic_test(iri, 'text/turtle')
+def test_brk_basisregistraties():
+ iri = 'http://brk.basisregistraties.overheid.nl/def/brk'
+ generic_test(iri, 'text/turtle')
-#def test_brt_basisregistraties():
-# iri = 'http://brt.basisregistraties.overheid.nl/def/top10nl'
-# generic_test(iri, 'text/turtle')
+def test_brt_basisregistraties():
+ iri = 'http://brt.basisregistraties.overheid.nl/def/top10nl'
+ generic_test(iri, 'text/turtle')
-#def test_brt_basisregistraties_begrippenkader():
-# iri = 'http://brt.basisregistraties.overheid.nl/id/begrippenkader/top10nl'
-# generic_test(iri, 'text/turtle')
+def test_brt_basisregistraties_begrippenkader():
+ iri = 'http://brt.basisregistraties.overheid.nl/id/begrippenkader/top10nl'
+ generic_test(iri, 'text/turtle')
-#def test_buzzword():
-# iri = 'http://buzzword.org.uk/rdf/personal-link-types#'
-# generic_test(iri, 'text/turtle')
+def test_buzzword():
+ iri = 'http://buzzword.org.uk/rdf/personal-link-types#'
+ generic_test(iri, 'text/turtle')
-#def test_catalogus_professorum():
-# iri = 'http://catalogus-professorum.org/cpm/2/'
-# generic_test(iri, 'text/turtle')
+def test_catalogus_professorum():
+ iri = 'http://catalogus-professorum.org/cpm/2/'
+ generic_test(iri, 'text/turtle')
-#def test_data_gov():
-# iri = 'http://data-gov.tw.rpi.edu/2009/data-gov-twc.rdf'
-# generic_test(iri, 'text/turtle')
+def test_data_gov():
+ iri = 'http://data-gov.tw.rpi.edu/2009/data-gov-twc.rdf'
+ generic_test(iri, 'text/turtle')
-#def test_data_bigdatagrapes():
-# iri = 'http://data.bigdatagrapes.eu/resource/ontology/'
-# generic_test(iri, 'text/turtle')
+def test_data_bigdatagrapes():
+ iri = 'http://data.bigdatagrapes.eu/resource/ontology/'
+ generic_test(iri, 'text/turtle')
-#def test_data_europa_esco():
-# iri = 'http://data.europa.eu/esco/flow'
-# generic_test(iri, 'text/turtle')
+def test_data_europa_esco():
+ iri = 'http://data.europa.eu/esco/flow'
+ generic_test(iri, 'text/turtle')
-#def test_data_globalchange():
-# iri = 'http://data.globalchange.gov/gcis.owl'
-# generic_test(iri, 'text/turtle')
+def test_data_globalchange():
+ iri = 'http://data.globalchange.gov/gcis.owl'
+ generic_test(iri, 'text/turtle')
-#def test_data_ontotext():
-# iri = 'http://data.ontotext.com/resource/leak/'
-# generic_test(iri, 'text/turtle')
+def test_data_ontotext():
+ iri = 'http://data.ontotext.com/resource/leak/'
+ generic_test(iri, 'text/turtle')
-#def test_data_opendiscoveryspace():
-# iri = 'http://data.opendiscoveryspace.eu/lom_ontology_ods.owl#'
-# generic_test(iri, 'text/turtle')
+def test_data_opendiscoveryspace():
+ iri = 'http://data.opendiscoveryspace.eu/lom_ontology_ods.owl#'
+ generic_test(iri, 'text/turtle')
-#def test_data_ordnancesurvey_50kGazetteer():
-# iri = 'http://data.ordnancesurvey.co.uk/ontology/50kGazetteer/'
-# generic_test(iri, 'text/turtle')
+def test_data_ordnancesurvey_50kGazetteer():
+ iri = 'http://data.ordnancesurvey.co.uk/ontology/50kGazetteer/'
+ generic_test(iri, 'text/turtle')
+
+
+def test_data_ordnancesurvey_50kGazetteer():
+ iri = 'http://dbpedia.org/ontology/Person'
+ generic_test(iri, 'text/turtle')
def test_linked_web_apis():
@@ -122,9 +127,9 @@ def test_linked_web_apis():
generic_test(iri, 'text/turtle')
-def test_ontologi_es():
- iri = 'http://ontologi.es/days#'
- generic_test(iri, 'text/turtle')
+#def test_ontologi_es():
+# iri = 'http://ontologi.es/days#'
+# generic_test(iri, 'text/turtle')
def test_https():
@@ -132,6 +137,11 @@ def test_https():
generic_test(iri, 'text/plain; charset=utf-8')
+def test_https():
+ iri = "https://vocab.eccenca.com/auth/"
+ generic_test(iri, 'text/plain; charset=utf-8')
+
+
def not_test_all_iris():
with open('tests/archivo_ontologies_test.txt', 'r') as file:
for line in file:
@@ -143,7 +153,6 @@ def not_test_all_iris():
def generic_test(iri, content_type):
response = requests.get(iri, proxies=PROXIES, verify=CA_CERT_PATH)
assert response.status_code == 200
- assert response.headers['Content-Type'] == content_type
assert iri in response.content.decode('utf-8')
diff --git a/tests/test_unit.py b/tests/test_unit.py
new file mode 100644
index 0000000..f0f76e8
--- /dev/null
+++ b/tests/test_unit.py
@@ -0,0 +1,145 @@
+import unittest
+from unittest.mock import patch, Mock
+import argparse
+import requests
+from ontologytimemachine.utils.mock_responses import (
+ mock_response_200,
+ mock_response_403,
+ mock_response_404,
+ mock_response_500
+)
+from ontologytimemachine.utils.utils import (
+ parse_arguments,
+ fetch_from_dbpedia_archivo_api,
+ map_mime_to_format,
+ get_parameters_from_headers
+)
+
+
+class TestUtils(unittest.TestCase):
+
+ @patch('argparse.ArgumentParser.parse_args')
+ def test_parse_arguments(self, mock_parse_args):
+ mock_parse_args.return_value = argparse.Namespace(
+ ontoFormat='turtle',
+ ontoPrecedence='enforcedPriority',
+ patchAcceptUpstream=False,
+ ontoVersion='originalFailoverLive',
+ onlyOntologies=True,
+ httpsIntercept=False,
+ inspectRedirects=True,
+ forwardHeaders=True,
+ subjectBinarySearchThreshold=100
+ )
+
+ args = parse_arguments()
+
+ self.assertEqual(args[0]['format'], 'turtle')
+ self.assertEqual(args[0]['precedence'], 'enforcedPriority')
+ self.assertFalse(args[0]['patchAcceptUpstream'])
+ self.assertEqual(args[1], 'originalFailoverLive')
+ self.assertTrue(args[2])
+ self.assertFalse(args[3])
+ self.assertTrue(args[4])
+ self.assertTrue(args[5])
+ self.assertEqual(args[6], 100)
+
+ mock_parse_args.return_value = argparse.Namespace(
+ ontoFormat='ntriples',
+ ontoPrecedence='default',
+ patchAcceptUpstream=True,
+ ontoVersion='latestArchive',
+ onlyOntologies=False,
+ httpsIntercept=True,
+ inspectRedirects=False,
+ forwardHeaders=False,
+ subjectBinarySearchThreshold=50
+ )
+
+ args = parse_arguments()
+
+ self.assertEqual(args[0]['format'], 'ntriples')
+ self.assertEqual(args[0]['precedence'], 'default')
+ self.assertTrue(args[0]['patchAcceptUpstream'])
+ self.assertEqual(args[1], 'latestArchive')
+ self.assertFalse(args[2])
+ self.assertTrue(args[3])
+ self.assertFalse(args[4])
+ self.assertFalse(args[5])
+ self.assertEqual(args[6], 50)
+
+
+ @patch('requests.get')
+ def test_fetch_from_dbpedia_archivo_api(self, mock_get):
+ mock_response = Mock()
+ mock_response.status_code = 200
+ mock_get.return_value = mock_response
+
+ ontology = 'http://dbpedia.org/ontology/Person'
+ headers = {'Accept': 'text/turtle'}
+
+ response = fetch_from_dbpedia_archivo_api(ontology, headers)
+ self.assertEqual(response.status_code, 200)
+
+ mock_get.side_effect = requests.exceptions.RequestException
+ response = fetch_from_dbpedia_archivo_api(ontology, headers)
+ self.assertEqual(response.status_code, 404)
+
+ def test_map_mime_to_format(self):
+ self.assertEqual(map_mime_to_format('application/rdf+xml'), 'owl')
+ self.assertEqual(map_mime_to_format('text/turtle'), 'ttl')
+ self.assertEqual(map_mime_to_format('application/n-triples'), 'nt')
+ self.assertIsNone(map_mime_to_format('unknown/mime'))
+
+ def test_get_parameters_from_headers(self):
+ headers = {
+ 'Accept': 'application/rdf+xml',
+ 'Version': '1.0',
+ 'VersionMatching': 'exact'
+ }
+ format, version, versionMatching = get_parameters_from_headers(headers)
+ self.assertEqual(format, 'owl')
+ self.assertEqual(version, '1.0')
+ self.assertEqual(versionMatching, 'exact')
+
+ headers = {
+ 'Accept': 'unknown/mime',
+ 'Version': '2.0',
+ 'VersionMatching': 'compatible'
+ }
+ format, version, versionMatching = get_parameters_from_headers(headers)
+ self.assertIsNone(format)
+ self.assertEqual(version, '2.0')
+ self.assertEqual(versionMatching, 'compatible')
+
+
+
+class TestMockResponses(unittest.TestCase):
+
+ def test_mock_response_200(self):
+ response = mock_response_200()
+ self.assertEqual(response.status_code, 200)
+ self.assertEqual(response.url, 'https://example.com/success')
+ self.assertEqual(response.headers['Content-Type'], 'text/html')
+ self.assertIn(b'To be implemented
', response.content)
+
+ def test_mock_response_403(self):
+ response = mock_response_403()
+ self.assertEqual(response.status_code, 403)
+ self.assertEqual(response.url, 'https://example.com/forbidden')
+ self.assertEqual(response.headers['Content-Type'], 'text/html')
+ self.assertIn(b'403 Forbidden
', response.content)
+
+ def test_mock_response_404(self):
+ response = mock_response_404()
+ self.assertEqual(response.status_code, 404)
+ self.assertEqual(response.url, 'https://example.com/resource-not-found')
+ self.assertEqual(response.headers['Content-Type'], 'text/html')
+ self.assertIn(b'404 Not Found
', response.content)
+
+ def test_mock_response_500(self):
+ response = mock_response_500()
+ self.assertEqual(response.status_code, 500)
+ self.assertEqual(response.url, 'https://example.com/internal-server-error')
+ self.assertEqual(response.headers['Content-Type'], 'text/html')
+ self.assertIn(b'500 Internal Server Error
', response.content)
\ No newline at end of file