Skip to content

Commit

Permalink
Merge pull request #40 from nasa/harmony-1719
Browse files Browse the repository at this point in the history
HARMONY-1719: Add request-id to download requests to support Cloud Metrics
  • Loading branch information
indiejames authored Apr 16, 2024
2 parents aafe2a0 + 6cd141c commit 825b9f7
Show file tree
Hide file tree
Showing 3 changed files with 115 additions and 4 deletions.
5 changes: 5 additions & 0 deletions harmony/adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
from pystac import Catalog, Item, Asset, read_file

from harmony.exceptions import CanceledException
from harmony.http import request_context
from harmony.logging import build_logger
from harmony.message import Temporal
from harmony.util import touch_health_check_file
Expand Down Expand Up @@ -70,6 +71,10 @@ def __init__(self, message, catalog=None, config=None):
warn('Invoking adapter.BaseHarmonyAdapter without a STAC catalog is deprecated',
DeprecationWarning, stacklevel=2)

# set the request ID in the global context so we can use it in other places
request_id = message.requestId if hasattr(message, 'requestId') else None
request_context['request_id'] = request_id

self.message = message
self.catalog = catalog
self.config = config
Expand Down
39 changes: 38 additions & 1 deletion harmony/http.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from functools import lru_cache
import json
from time import sleep
from urllib.parse import urlparse
from urllib.parse import urlparse, parse_qs, urlencode, urlunparse
import datetime
import sys
import os
Expand All @@ -35,6 +35,10 @@

MAX_RETRY_DELAY_SECS = 90

# `request_context` is used to provide information about the request to functions like `download`
# without adding extra function arguments
request_context = {}


def get_retry_delay(retry_num: int, max_delay: int = MAX_RETRY_DELAY_SECS) -> int:
"""The number of seconds to sleep before retrying. Exponential backoff starting
Expand Down Expand Up @@ -132,6 +136,37 @@ def _earthdata_session():
return EarthdataSession()


def _add_api_request_uuid(url):
request_id = request_context.get('request_id')

if request_id is None:
return url

# Parse the URL into components
parsed_url = urlparse(url)

# only add the request_id if this is an http/https url
if parsed_url.scheme != 'http' and parsed_url.scheme != 'https':
return url

# Extract the current query parameters from the URL
query_params = parse_qs(parsed_url.query)

# Add or update the 'request_id' parameter
query_params['A-api-request-uuid'] = request_id

# Convert the query parameters back to a string
query_string = urlencode(query_params, doseq=True)

# Rebuild the URL with the new query string
new_url = urlunparse(
(parsed_url.scheme, parsed_url.netloc, parsed_url.path,
parsed_url.params, query_string, parsed_url.fragment)
)

return new_url


def _download(
config, url: str,
access_token: str,
Expand Down Expand Up @@ -359,6 +394,8 @@ def download(config, url: str, access_token: str, data, destination_file,

response = None
logger = build_logger(config)
# Add the request ID to the download url so it can be used by Cloud Metrics
url = _add_api_request_uuid(url)
start_time = datetime.datetime.now()
logger.info(f'timing.download.start {url}')

Expand Down
75 changes: 72 additions & 3 deletions tests/test_util.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
import pathlib
from requests import Session
import unittest
from unittest.mock import patch, MagicMock, mock_open, ANY
from urllib.error import HTTPError

from harmony import aws
from harmony import util
from harmony.http import request_context
from harmony.message import Variable
from tests.test_cli import MockAdapter, cli_test
from tests.util import mock_receive, config_fixture
Expand All @@ -19,7 +21,7 @@ def setUp(self):
@patch('harmony.aws.Config')
def test_s3_download_sets_minimal_user_agent_on_boto_client(self, boto_cfg, client, get_version):
fake_lib_version = '0.1.0'
get_version.return_value = fake_lib_version
get_version.return_value = fake_lib_version
cfg = config_fixture()
boto_cfg_instance = MagicMock()
boto_cfg.return_value = boto_cfg_instance
Expand All @@ -33,7 +35,7 @@ def test_s3_download_sets_minimal_user_agent_on_boto_client(self, boto_cfg, clie
@patch('harmony.aws.Config')
def test_s3_download_sets_harmony_user_agent_on_boto_client(self, boto_cfg, client, get_version):
fake_lib_version = '0.1.0'
get_version.return_value = fake_lib_version
get_version.return_value = fake_lib_version
harmony_user_agt = 'harmony/3.3.3 (harmony-test)'
cfg = config_fixture(user_agent=harmony_user_agt)
boto_cfg_instance = MagicMock()
Expand All @@ -49,7 +51,7 @@ def test_s3_download_sets_harmony_user_agent_on_boto_client(self, boto_cfg, clie
def test_s3_download_sets_app_name_on_boto_client(self, boto_cfg, client, get_version):
app_name = 'gdal-subsetter'
fake_lib_version = '0.1.0'
get_version.return_value = fake_lib_version
get_version.return_value = fake_lib_version
cfg = config_fixture(app_name=app_name)
boto_cfg_instance = MagicMock()
boto_cfg.return_value = boto_cfg_instance
Expand All @@ -58,6 +60,73 @@ def test_s3_download_sets_app_name_on_boto_client(self, boto_cfg, client, get_ve
boto_cfg.assert_called_with(user_agent_extra=f'harmony (unknown version) harmony-service-lib/{fake_lib_version} ({app_name})')
client.assert_called_with(service_name='s3', config=boto_cfg_instance, region_name=ANY)

@patch('harmony.util.get_version')
@patch('harmony.aws.download')
@patch('harmony.aws.Config')
def test_s3_download_does_not_set_api_request_uuid(self, boto_cfg, aws_download, get_version):
request_context['request_id'] = 'abc123'
app_name = 'gdal-subsetter'
fake_lib_version = '0.1.0'
get_version.return_value = fake_lib_version
cfg = config_fixture(app_name=app_name)
boto_cfg_instance = MagicMock()
boto_cfg.return_value = boto_cfg_instance
with patch('builtins.open', mock_open()):
util.download('s3://example/file.txt', 'tmp', access_token='', cfg=cfg)
aws_download.assert_called_with(ANY, 's3://example/file.txt', ANY, ANY )

@patch('harmony.util.get_version')
@patch.object(Session, 'get')
def test_http_download_sets_api_request_uuid(self, get, get_version):
request_context['request_id'] = 'abc123'
app_name = 'gdal-subsetter'
fake_lib_version = '0.1.0'
get_version.return_value = fake_lib_version
cfg = config_fixture(app_name=app_name)
with patch('builtins.open', mock_open()):
util.download('http://example/file.txt', 'tmp', access_token='', cfg=cfg)
get.assert_called_with('http://example/file.txt?A-api-request-uuid=abc123', headers={'user-agent': f'harmony (unknown version) harmony-service-lib/{fake_lib_version} (gdal-subsetter)'}, timeout=60, stream=True)

@patch('harmony.util.get_version')
@patch.object(Session, 'get')
def test_https_download_sets_api_request_uuid(self, get, get_version):
request_context['request_id'] = 'abc123'
app_name = 'gdal-subsetter'
fake_lib_version = '0.1.0'
get_version.return_value = fake_lib_version
cfg = config_fixture(app_name=app_name)
with patch('builtins.open', mock_open()):
util.download('https://example/file.txt', 'tmp', access_token='', cfg=cfg)
get.assert_called_with('https://example/file.txt?A-api-request-uuid=abc123', headers={'user-agent': f'harmony (unknown version) harmony-service-lib/{fake_lib_version} (gdal-subsetter)'}, timeout=60, stream=True)

@patch('harmony.util.get_version')
@patch.object(Session, 'post')
def test_http_download_with_post_sets_api_request_uuid(self, post, get_version):
request_context['request_id'] = 'abc123'
app_name = 'gdal-subsetter'
fake_lib_version = '0.1.0'
get_version.return_value = fake_lib_version
data = { 'foo': 'bar' }
cfg = config_fixture(app_name=app_name)
with patch('builtins.open', mock_open()):
util.download('http://example/file.txt', 'tmp', access_token='', data=data, cfg=cfg)
post.assert_called_with('http://example/file.txt?A-api-request-uuid=abc123', headers={'user-agent': f'harmony (unknown version) harmony-service-lib/{fake_lib_version} (gdal-subsetter)', 'Content-Type': 'application/x-www-form-urlencoded'}, data = { 'foo': 'bar' }, timeout=60, stream=True)


@patch('harmony.util.get_version')
@patch.object(Session, 'post')
def test_https_download_with_post_sets_api_request_uuid(self, post, get_version):
request_context['request_id'] = 'abc123'
app_name = 'gdal-subsetter'
fake_lib_version = '0.1.0'
get_version.return_value = fake_lib_version
data = { 'foo': 'bar' }
cfg = config_fixture(app_name=app_name)
with patch('builtins.open', mock_open()):
util.download('https://example/file.txt', 'tmp', access_token='', data=data, cfg=cfg)
post.assert_called_with('https://example/file.txt?A-api-request-uuid=abc123', headers={'user-agent': f'harmony (unknown version) harmony-service-lib/{fake_lib_version} (gdal-subsetter)', 'Content-Type': 'application/x-www-form-urlencoded'}, data = { 'foo': 'bar' }, timeout=60, stream=True)


class TestStage(unittest.TestCase):
def setUp(self):
self.config = util.config(validate=False)
Expand Down

0 comments on commit 825b9f7

Please sign in to comment.