Skip to content

Commit

Permalink
Merge pull request #26 from kuefmz/main
Browse files Browse the repository at this point in the history
Fixing Dockerfile and refactoring
  • Loading branch information
JJ-Author authored Aug 3, 2024
2 parents b881d73 + 3903e7e commit 0225a34
Show file tree
Hide file tree
Showing 7 changed files with 499 additions and 316 deletions.
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -22,5 +22,5 @@ RUN poetry config virtualenvs.create false
RUN poetry install --no-dev && rm pyproject.toml


CMD python3 -m proxy --hostname 0.0.0.0 --port $PORT --plugins ontologytimemachine.custom_proxy.OntologyTimeMachinePlugin
CMD python3 -m proxy --ca-key-file ca-key.pem --ca-cert-file ca-cert.pem --ca-signing-key-file ca-signing-key.pem --hostname 0.0.0.0 --port $PORT --plugins ontologytimemachine.custom_proxy.OntologyTimeMachinePlugin

132 changes: 3 additions & 129 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,142 +22,16 @@ cp ca-signing-key.pem ~/ontology-time-machine/ca-signing-key.pem

### Curl tests:
- curl -x http://0.0.0.0:8899 --cacert ca-cert.pem http://www.google.com
- curl -x http://0.0.0.0:8899 -H "Accept: text/turtle" --cacert ca-cert.pem http://ontologi.es/days#
- curl -x http://0.0.0.0:8899 -H "Accept: text/turtle" --cacert ca-cert.pem http://linked-web-apis.fit.cvut.cz/ns/core
- curl -x http://0.0.0.0:8899 --cacert ca-cert.pem https://www.w3id.org/simulation/ontology/
- curl -x http://0.0.0.0:8899 --cacert ca-cert.pem https://www.w3.org/ns/ldt#
- curl -x http://0.0.0.0:8899 --cacert ca-cert.pem https://raw.githubusercontent.com/br0ast/simulationontology/main/Ontology/simulationontology.owl
- curl -x http://0.0.0.0:8899 -H "Accept: text/turtle" --cacert ca-cert.pem http://bblfish.net/work/atom-owl/2006-06-06/
- curl -x http://0.0.0.0:8899 -H "Accept: text/turtle" --cacert ca-cert.pem http://purl.org/makolab/caont/


### Not working:
- curl -x http://0.0.0.0:8899 --cacert ca-cert.pem https://vocab.eccenca.com/auth/
- curl -x http://0.0.0.0:8899 -H "Accept: text/turtle" --cacert ca-cert.pem http://dbpedia.org/ontology/Person


### Not working:
- curl -x http://0.0.0.0:8899 -H "Accept: text/turtle" --cacert ca-cert.pem http://ontologi.es/days#


from proxy.http.proxy import HttpProxyBasePlugin
from proxy.http.parser import HttpParser, httpParserTypes
from proxy.common.utils import build_http_response
from proxy.http.methods import HttpMethods
from ontologytimemachine.utils.utils import proxy_logic_http, proxy_logic_https
from ontologytimemachine.utils.utils import check_if_archivo_ontology_requested
from ontologytimemachine.utils.utils import get_headers_and_expected_type
from requests.exceptions import SSLError, Timeout, ConnectionError, RequestException
from http.client import responses
import proxy
import sys
import requests
import logging


logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)


class OntologyTimeMachinePlugin(HttpProxyBasePlugin):
#def __init__(self, *args, **kwargs):
# super().__init__(*args, **kwargs)


def before_upstream_connection(self, request: HttpParser):
logger.debug('Before upstream')
print(request.method)
scheme = 'https' if request.method == b'CONNECT' else 'http'
if scheme == 'https':
logger.debug('The request is HTTPS, forward as it is')
return request

ontology_request = check_if_archivo_ontology_requested(request)
if ontology_request:
logger.debug('The request is for an ontology')
try:
ontology_url = str(request._url)
headers, _ = get_headers_and_expected_type(request)
response = requests.get(ontology_url, headers=headers)
if response.status_code == 502:
logger.error('Received 502 Bad Gateway error')
response = proxy_logic_http(request)
logger.debug('Queue response')
self.queue_response(response)
return None
else:
logger.debug('The request is correct')
return request
except (SSLError, Timeout, ConnectionError, RequestException) as e:
logger.error(f'Network-related exception occurred {e}')
response = proxy_logic_http(request)
logger.debug('Queue response')
self.queue_response(response)
return None
return request


def handle_client_request(self, request: HttpParser):
logger.debug('HTTP call')
logger.debug(request._url)

ontology_request = check_if_archivo_ontology_requested(request)
if not ontology_request:
logger.info('No ontology is asked, forward original request')
return request
response = proxy_logic_http(request)
self.queue_response(response)

return None


def handle_upstream_chunk(self, chunk: memoryview):
logger.info('HTTPS call')

try:
# Parse the HTTP response to handle different cases
parser = HttpParser(httpParserTypes.RESPONSE_PARSER)
parser.parse(memoryview(chunk))
code = int(parser.code.decode('utf-8'))
if code >= 100 and code < 200:
return chunk
elif code >= 201 and code <= 204:
return chunk
elif code == 451:
return chunk
else:
response = proxy_logic_https(parser)
logger.debug('Queue response')
self.queue_response(response)
return None
except UnicodeDecodeError:
logger.warning('Received non-text chunk, cannot decode')
except Exception as e:
logger.error(f'Exception occurred while handling upstream chunk: {e}')
return chunk

def queue_response(self, response):
self.client.queue(
build_http_response(
response.status_code,
reason=bytes(responses[response.status_code], 'utf-8'),
headers={
b'Content-Type': bytes(response.headers.get('Content-Type'), 'utf-8')
},
body=response.content
)
)


if __name__ == '__main__':
sys.argv += [
'--ca-key-file', 'ca-key.pem',
'--ca-cert-file', 'ca-cert.pem',
'--ca-signing-key-file', 'ca-signing-key.pem',
]
sys.argv += [
'--hostname', '0.0.0.0',
'--port', '8899',
'--plugins', __name__ + '.OntologyTimeMachinePlugin'
]
logger.info("Starting OntologyTimeMachineProxy server...")
proxy.main()
74 changes: 32 additions & 42 deletions ontologytimemachine/custom_proxy.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,90 +2,79 @@
from proxy.http.parser import HttpParser, httpParserTypes
from proxy.common.utils import build_http_response
from proxy.http.methods import HttpMethods
from ontologytimemachine.utils.utils import proxy_logic_http, proxy_logic_https
from ontologytimemachine.utils.utils import proxy_logic, parse_arguments
from ontologytimemachine.utils.utils import check_if_archivo_ontology_requested
from ontologytimemachine.utils.utils import get_headers_and_expected_type
from ontologytimemachine.utils.utils import get_ontology_from_request
from ontologytimemachine.utils.mock_responses import mock_response_403
from requests.exceptions import SSLError, Timeout, ConnectionError, RequestException
from http.client import responses
import proxy
import sys
import requests
import logging


IP = '0.0.0.0'
PORT = '8899'


logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)


class OntologyTimeMachinePlugin(HttpProxyBasePlugin):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
(self.ontoFormat, self.ontoVersion, self.only_ontologies,
self.https_intercept, self.inspect_redirects, self.forward_headers,
self.subject_binary_search_threshold) = parse_arguments()


def before_upstream_connection(self, request: HttpParser):
logger.debug('Before upstream')
logger.debug(request.method)
scheme = 'https' if request.method == b'CONNECT' else 'http'
if scheme == 'https':
logger.debug('The request is HTTPS, forward as it is')
logger.debug(f'Request host: {request.host}')
logger.debug(f'Request path: {request.path}')
return request
logger.info('Before upstream connection hook')
logger.info(f'Request method: {request.method} - Request host: {request.host} - Request path: {request.path} - Request headers: {request.headers}')

if request.method == b'CONNECT':
logger.info(f'HTTPS interception mode: {self.https_intercept}')
# Only intercept if interception is enabled
if self.https_intercept in ['all', 'archivo']:
return request
else:
return None


ontology_request = check_if_archivo_ontology_requested(request)
# If only ontology mode, return None in all other cases
if self.only_ontologies and not ontology_request:
logger.warning('Request denied: not an ontology request and only ontologies mode is enabled')
self.queue_response(mock_response_403)
return None

if ontology_request:
logger.debug('The request is for an ontology')
try:
ontology_url = str(request._url)
headers, _ = get_headers_and_expected_type(request)
response = requests.get(ontology_url, headers=headers, timeout=5)
if response.status_code == 502:
logger.error('Received 502 Bad Gateway error')
response = proxy_logic_http(request)
logger.debug('Queue response')
self.queue_response(response)
return None
else:
logger.debug('The request is correct')
return request
except (SSLError, Timeout, ConnectionError, RequestException) as e:
logger.error(f'Network-related exception occurred {e}')
response = proxy_logic_http(request)
logger.debug('Queue response')
self.queue_response(response)
return None
response = proxy_logic(request, self.ontoFormat, self.ontoVersion)
self.queue_response(response)
return None
return request


def handle_client_request(self, request: HttpParser):
logger.debug('HTTP call')
logger.info('Handle client request hook')
logger.info(f'Request method: {request.method} - Request host: {request.host} - Request path: {request.path} - Request headers: {request.headers}')

logger.debug(request.method)
scheme = 'https' if request.method == b'CONNECT' else 'http'
if scheme == 'https':
logger.debug('The request is HTTPS, forward as it is')
if request.method == b'CONNECT':
return request

ontology_request = check_if_archivo_ontology_requested(request)
if not ontology_request:
logger.info('No ontology is asked, forward original request')
logger.info('The requested IRI is not part of DBpedia Archivo')
return request

logger.debug('Call proxy logic')
response = proxy_logic_http(request)
response = proxy_logic(request, self.ontoFormat, self.ontoVersion)
self.queue_response(response)

return None


def handle_upstream_chunk(self, chunk: memoryview):
logger.info('HTTPS call')

return chunk


Expand All @@ -103,6 +92,7 @@ def queue_response(self, response):


if __name__ == '__main__':

sys.argv += [
'--ca-key-file', 'ca-key.pem',
'--ca-cert-file', 'ca-cert.pem',
Expand Down
38 changes: 38 additions & 0 deletions ontologytimemachine/utils/mock_responses.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import requests


def mock_response_200():
mock_response = requests.Response()
mock_response.status_code = 200
mock_response.url = 'https://example.com/success'
mock_response.headers['Content-Type'] = 'text/html'
mock_response._content = b'<html><body><h1>To be implemented</h1></body></html>'
return mock_response


def mock_response_403():
mock_response = requests.Response()
mock_response.status_code = 403
mock_response.url = 'https://example.com/forbidden'
mock_response.headers['Content-Type'] = 'text/html'
mock_response._content = b'<html><body><h1>403 Forbidden</h1></body></html>'
return mock_response



def mock_response_404():
mock_response = requests.Response()
mock_response.status_code = 404
mock_response.url = 'https://example.com/resource-not-found'
mock_response.headers['Content-Type'] = 'text/html'
mock_response._content = b'<html><body><h1>404 Not Found</h1></body></html>'
return mock_response


def mock_response_500():
mock_response = requests.Response()
mock_response.status_code = 500
mock_response.url = 'https://example.com/internal-server-error'
mock_response.headers['Content-Type'] = 'text/html'
mock_response._content = b'<html><body><h1>500 Internal Server Error</h1></body></html>'
return mock_response
Loading

0 comments on commit 0225a34

Please sign in to comment.