diff --git a/harmony/adapter.py b/harmony/adapter.py index 3295176..d26efa9 100644 --- a/harmony/adapter.py +++ b/harmony/adapter.py @@ -21,6 +21,7 @@ from pystac import Catalog, Item, Asset, read_file from harmony.exceptions import CanceledException +from harmony.http import request_context from harmony.logging import build_logger from harmony.message import Temporal from harmony.util import touch_health_check_file @@ -70,6 +71,10 @@ def __init__(self, message, catalog=None, config=None): warn('Invoking adapter.BaseHarmonyAdapter without a STAC catalog is deprecated', DeprecationWarning, stacklevel=2) + # set the request ID in the global context so we can use it in other places + request_id = message.requestId if hasattr(message, 'requestId') else None + request_context['request_id'] = request_id + self.message = message self.catalog = catalog self.config = config diff --git a/harmony/http.py b/harmony/http.py index 29a67c5..4ae10e5 100644 --- a/harmony/http.py +++ b/harmony/http.py @@ -13,7 +13,7 @@ from functools import lru_cache import json from time import sleep -from urllib.parse import urlparse +from urllib.parse import urlparse, parse_qs, urlencode, urlunparse import datetime import sys import os @@ -35,6 +35,10 @@ MAX_RETRY_DELAY_SECS = 90 +# `request_context` is used to provide information about the request to functions like `download` +# without adding extra function arguments +request_context = {} + def get_retry_delay(retry_num: int, max_delay: int = MAX_RETRY_DELAY_SECS) -> int: """The number of seconds to sleep before retrying. Exponential backoff starting @@ -132,6 +136,37 @@ def _earthdata_session(): return EarthdataSession() +def _add_api_request_uuid(url): + request_id = request_context.get('request_id') + + if request_id is None: + return url + + # Parse the URL into components + parsed_url = urlparse(url) + + # only add the request_id if this is an http/https url + if parsed_url.scheme != 'http' and parsed_url.scheme != 'https': + return url + + # Extract the current query parameters from the URL + query_params = parse_qs(parsed_url.query) + + # Add or update the 'request_id' parameter + query_params['A-api-request-uuid'] = request_id + + # Convert the query parameters back to a string + query_string = urlencode(query_params, doseq=True) + + # Rebuild the URL with the new query string + new_url = urlunparse( + (parsed_url.scheme, parsed_url.netloc, parsed_url.path, + parsed_url.params, query_string, parsed_url.fragment) + ) + + return new_url + + def _download( config, url: str, access_token: str, @@ -359,6 +394,8 @@ def download(config, url: str, access_token: str, data, destination_file, response = None logger = build_logger(config) + # Add the request ID to the download url so it can be used by Cloud Metrics + url = _add_api_request_uuid(url) start_time = datetime.datetime.now() logger.info(f'timing.download.start {url}') diff --git a/tests/test_util.py b/tests/test_util.py index 0ae0b65..baf859e 100644 --- a/tests/test_util.py +++ b/tests/test_util.py @@ -1,10 +1,12 @@ import pathlib +from requests import Session import unittest from unittest.mock import patch, MagicMock, mock_open, ANY from urllib.error import HTTPError from harmony import aws from harmony import util +from harmony.http import request_context from harmony.message import Variable from tests.test_cli import MockAdapter, cli_test from tests.util import mock_receive, config_fixture @@ -19,7 +21,7 @@ def setUp(self): @patch('harmony.aws.Config') def test_s3_download_sets_minimal_user_agent_on_boto_client(self, boto_cfg, client, get_version): fake_lib_version = '0.1.0' - get_version.return_value = fake_lib_version + get_version.return_value = fake_lib_version cfg = config_fixture() boto_cfg_instance = MagicMock() boto_cfg.return_value = boto_cfg_instance @@ -33,7 +35,7 @@ def test_s3_download_sets_minimal_user_agent_on_boto_client(self, boto_cfg, clie @patch('harmony.aws.Config') def test_s3_download_sets_harmony_user_agent_on_boto_client(self, boto_cfg, client, get_version): fake_lib_version = '0.1.0' - get_version.return_value = fake_lib_version + get_version.return_value = fake_lib_version harmony_user_agt = 'harmony/3.3.3 (harmony-test)' cfg = config_fixture(user_agent=harmony_user_agt) boto_cfg_instance = MagicMock() @@ -49,7 +51,7 @@ def test_s3_download_sets_harmony_user_agent_on_boto_client(self, boto_cfg, clie def test_s3_download_sets_app_name_on_boto_client(self, boto_cfg, client, get_version): app_name = 'gdal-subsetter' fake_lib_version = '0.1.0' - get_version.return_value = fake_lib_version + get_version.return_value = fake_lib_version cfg = config_fixture(app_name=app_name) boto_cfg_instance = MagicMock() boto_cfg.return_value = boto_cfg_instance @@ -58,6 +60,73 @@ def test_s3_download_sets_app_name_on_boto_client(self, boto_cfg, client, get_ve boto_cfg.assert_called_with(user_agent_extra=f'harmony (unknown version) harmony-service-lib/{fake_lib_version} ({app_name})') client.assert_called_with(service_name='s3', config=boto_cfg_instance, region_name=ANY) + @patch('harmony.util.get_version') + @patch('harmony.aws.download') + @patch('harmony.aws.Config') + def test_s3_download_does_not_set_api_request_uuid(self, boto_cfg, aws_download, get_version): + request_context['request_id'] = 'abc123' + app_name = 'gdal-subsetter' + fake_lib_version = '0.1.0' + get_version.return_value = fake_lib_version + cfg = config_fixture(app_name=app_name) + boto_cfg_instance = MagicMock() + boto_cfg.return_value = boto_cfg_instance + with patch('builtins.open', mock_open()): + util.download('s3://example/file.txt', 'tmp', access_token='', cfg=cfg) + aws_download.assert_called_with(ANY, 's3://example/file.txt', ANY, ANY ) + + @patch('harmony.util.get_version') + @patch.object(Session, 'get') + def test_http_download_sets_api_request_uuid(self, get, get_version): + request_context['request_id'] = 'abc123' + app_name = 'gdal-subsetter' + fake_lib_version = '0.1.0' + get_version.return_value = fake_lib_version + cfg = config_fixture(app_name=app_name) + with patch('builtins.open', mock_open()): + util.download('http://example/file.txt', 'tmp', access_token='', cfg=cfg) + get.assert_called_with('http://example/file.txt?A-api-request-uuid=abc123', headers={'user-agent': f'harmony (unknown version) harmony-service-lib/{fake_lib_version} (gdal-subsetter)'}, timeout=60, stream=True) + + @patch('harmony.util.get_version') + @patch.object(Session, 'get') + def test_https_download_sets_api_request_uuid(self, get, get_version): + request_context['request_id'] = 'abc123' + app_name = 'gdal-subsetter' + fake_lib_version = '0.1.0' + get_version.return_value = fake_lib_version + cfg = config_fixture(app_name=app_name) + with patch('builtins.open', mock_open()): + util.download('https://example/file.txt', 'tmp', access_token='', cfg=cfg) + get.assert_called_with('https://example/file.txt?A-api-request-uuid=abc123', headers={'user-agent': f'harmony (unknown version) harmony-service-lib/{fake_lib_version} (gdal-subsetter)'}, timeout=60, stream=True) + + @patch('harmony.util.get_version') + @patch.object(Session, 'post') + def test_http_download_with_post_sets_api_request_uuid(self, post, get_version): + request_context['request_id'] = 'abc123' + app_name = 'gdal-subsetter' + fake_lib_version = '0.1.0' + get_version.return_value = fake_lib_version + data = { 'foo': 'bar' } + cfg = config_fixture(app_name=app_name) + with patch('builtins.open', mock_open()): + util.download('http://example/file.txt', 'tmp', access_token='', data=data, cfg=cfg) + post.assert_called_with('http://example/file.txt?A-api-request-uuid=abc123', headers={'user-agent': f'harmony (unknown version) harmony-service-lib/{fake_lib_version} (gdal-subsetter)', 'Content-Type': 'application/x-www-form-urlencoded'}, data = { 'foo': 'bar' }, timeout=60, stream=True) + + + @patch('harmony.util.get_version') + @patch.object(Session, 'post') + def test_https_download_with_post_sets_api_request_uuid(self, post, get_version): + request_context['request_id'] = 'abc123' + app_name = 'gdal-subsetter' + fake_lib_version = '0.1.0' + get_version.return_value = fake_lib_version + data = { 'foo': 'bar' } + cfg = config_fixture(app_name=app_name) + with patch('builtins.open', mock_open()): + util.download('https://example/file.txt', 'tmp', access_token='', data=data, cfg=cfg) + post.assert_called_with('https://example/file.txt?A-api-request-uuid=abc123', headers={'user-agent': f'harmony (unknown version) harmony-service-lib/{fake_lib_version} (gdal-subsetter)', 'Content-Type': 'application/x-www-form-urlencoded'}, data = { 'foo': 'bar' }, timeout=60, stream=True) + + class TestStage(unittest.TestCase): def setUp(self): self.config = util.config(validate=False)