From da5ed3d58c8fa38f04f5970cd69b4b94fc6810e5 Mon Sep 17 00:00:00 2001 From: Tom Kralidis Date: Thu, 22 Aug 2024 10:22:12 -0400 Subject: [PATCH] add security property to WNM for access controlled data (#643) (#739) * add security property to WNM for access controlled data (#643) * add security to WCMP2 output --- .github/workflows/tests-docker.yml | 3 +- docs/source/reference/quickstart.rst | 4 +- docs/source/user/data-ingest.rst | 4 +- .../cd-surface-weather-observations.yml | 4 +- tests/integration/test_workflow.py | 68 +++++++++++++++---- wis2box-management/wis2box/auth.py | 39 ++++++++++- .../wis2box/metadata/discovery.py | 9 +++ wis2box-management/wis2box/pubsub/message.py | 28 +++++++- 8 files changed, 138 insertions(+), 21 deletions(-) diff --git a/.github/workflows/tests-docker.yml b/.github/workflows/tests-docker.yml index 58cec091..8b0e1d49 100644 --- a/.github/workflows/tests-docker.yml +++ b/.github/workflows/tests-docker.yml @@ -113,7 +113,7 @@ jobs: python3 wis2box-ctl.py execute wis2box data ingest -mdi $DISCOVERY_METADATA_ID -p $TEST_DATA - name: add Congo synop data (synop2bufr) 🇨🇩 env: - TOPIC_HIERARCHY: origin/a/wis2/cd-brazza_met_centre/data/core/weather/surface-based-observations/synop + TOPIC_HIERARCHY: origin/a/wis2/cd-brazza_met_centre/data/recommended/weather/surface-based-observations/synop TERRITORY: COD DISCOVERY_METADATA: /data/wis2box/metadata/discovery/cd-surface-weather-observations.yml DISCOVERY_METADATA_ID: urn:wmo:md:cd-brazza_met_centre:surface-weather-observations @@ -124,6 +124,7 @@ jobs: python3 wis2box-ctl.py execute wis2box metadata station publish-collection --path $STATION_METADATA --topic-hierarchy $TOPIC_HIERARCHY curl -s http://localhost/oapi/collections/discovery-metadata/items/$DISCOVERY_METADATA_ID --output /tmp/$DISCOVERY_METADATA_ID check-jsonschema --schemafile /tmp/wcmp2-bundled.json /tmp/$DISCOVERY_METADATA_ID + python3 wis2box-ctl.py execute wis2box auth add-token --metadata-id $DISCOVERY_METADATA_ID -p token123 -y python3 wis2box-ctl.py execute wis2box data ingest -mdi $DISCOVERY_METADATA_ID -p $TEST_DATA - name: add example ship data (bufr2bufr) WMO env: diff --git a/docs/source/reference/quickstart.rst b/docs/source/reference/quickstart.rst index a3b22b51..996b0532 100644 --- a/docs/source/reference/quickstart.rst +++ b/docs/source/reference/quickstart.rst @@ -66,7 +66,7 @@ Load initial stations: wis2box metadata station publish-collection --path /data/wis2box/metadata/station/italy.csv --topic-hierarchy it-roma_met_centre.data.core.weather.surface-based-observations.synop wis2box metadata station publish-collection --path /data/wis2box/metadata/station/algeria.csv --topic-hierarchy dz-alger_met_centre.data.core.weather.surface-based-observations.synop wis2box metadata station publish-collection --path /data/wis2box/metadata/station/romania.csv --topic-hierarchy ro-rnimh.data.core.weather.surface-based-observations.synop - wis2box metadata station publish-collection --path /data/wis2box/metadata/station/congo.csv --topic-hierarchy cd-brazza_met_centre.data.core.weather.surface-based-observations.synop + wis2box metadata station publish-collection --path /data/wis2box/metadata/station/congo.csv --topic-hierarchy cd-brazza_met_centre.data.recommended.weather.surface-based-observations.synop wis2box metadata station publish-collection --path /data/wis2box/metadata/station/wmo-test-ship.csv --topic-hierarchy int-wmo-test.data.core.weather.surface-based-observations.ship wis2box metadata station publish-collection --path /data/wis2box/metadata/station/wmo-test-buoy.csv --topic-hierarchy int-wmo-test.data.core.weather.surface-based-observations.buoy wis2box metadata station publish-collection --path /data/wis2box/metadata/station/wmo-test-wind-profiler.csv --topic-hierarchy int-wmo-test.data.core.weather.surface-based-observations.wind_profiler @@ -79,7 +79,7 @@ Ingest data using the data ingest command to push data to the ``wis2box-incoming wis2box data ingest --topic-hierarchy it-roma_met_centre.data.core.weather.surface-based-observations.synop --path $WIS2BOX_DATADIR/observations/italy wis2box data ingest --topic-hierarchy dz-alger_met_centre.data.core.weather.surface-based-observations.synop --path $WIS2BOX_DATADIR/observations/algeria wis2box data ingest --topic-hierarchy ro-rnimh.data.core.weather.surface-based-observations.synop --path $WIS2BOX_DATADIR/observations/romania - wis2box data ingest --topic-hierarchy cd-brazza_met_centre.data.core.weather.surface-based-observations.synop --path $WIS2BOX_DATADIR/observations/congo + wis2box data ingest --topic-hierarchy cd-brazza_met_centre.data.recommended.weather.surface-based-observations.synop --path $WIS2BOX_DATADIR/observations/congo wis2box data ingest --topic-hierarchy int-wmo-test.data.core.weather.surface-based-observations.ship --path $WIS2BOX_DATADIR/observations/wmo/ship wis2box data ingest --topic-hierarchy int-wmo-test.data.core.weather.surface-based-observations.buoy --path $WIS2BOX_DATADIR/observations/wmo/buoy wis2box data ingest --topic-hierarchy int-wmo-test.data.core.weather.surface-based-observations.wind_profiler --path $WIS2BOX_DATADIR/observations/wmo/wind_profiler diff --git a/docs/source/user/data-ingest.rst b/docs/source/user/data-ingest.rst index 5652fd27..5fb44e34 100644 --- a/docs/source/user/data-ingest.rst +++ b/docs/source/user/data-ingest.rst @@ -90,8 +90,8 @@ Select 'browse' on the ``wis2box-incoming`` bucket and select 'Choose or create For example using a filepath matching the topic hierarchy: - * Topic Hierarchy: ``origin/a/wis2/cd-brazza_met_centre/data/core/weather/surface-based-observations/synop`` - * upload data in the path containing: ``cd-brazza_met_centre/data/core/weather/surface-based-observations/synop`` + * Topic Hierarchy: ``origin/a/wis2/cd-brazza_met_centre/data/recommended/weather/surface-based-observations/synop`` + * upload data in the path containing: ``cd-brazza_met_centre/data/recommended/weather/surface-based-observations/synop`` The error message ``Path validation error: Could not match http://minio:9000/wis2box-incoming/... to dataset, ...`` indicates that a file was stored in a directory that could not be matched to a dataset. diff --git a/tests/data/metadata/discovery/cd-surface-weather-observations.yml b/tests/data/metadata/discovery/cd-surface-weather-observations.yml index 76dcda28..87980e5a 100644 --- a/tests/data/metadata/discovery/cd-surface-weather-observations.yml +++ b/tests/data/metadata/discovery/cd-surface-weather-observations.yml @@ -1,6 +1,6 @@ wis2box: retention: P180D - topic_hierarchy: cd-brazza_met_centre/data/core/weather/surface-based-observations/synop + topic_hierarchy: cd-brazza_met_centre/data/recommended/weather/surface-based-observations/synop country: cog centre_id: cd-brazza_met_centre data_mappings: @@ -49,7 +49,7 @@ identification: end: null resolution: P1H url: https://example.org/malawi-surface-weather-observations - wmo_data_policy: core + wmo_data_policy: recommended contact: host: diff --git a/tests/integration/test_workflow.py b/tests/integration/test_workflow.py index 5d7d02df..2aa29766 100644 --- a/tests/integration/test_workflow.py +++ b/tests/integration/test_workflow.py @@ -53,7 +53,7 @@ def test_wis2downloader(): 'origin/a/wis2/dz-alger_met_centre/data/core/weather/surface-based-observations/synop': 28, # noqa 'origin/a/wis2/cn-cma/data/core/weather/prediction/forecast/medium-range/probabilistic/global': 10, # noqa 'origin/a/wis2/ro-rnimh/data/core/weather/surface-based-observations/synop': 49, # noqa - 'origin/a/wis2/cd-brazza_met_centre/data/core/weather/surface-based-observations/synop': 14, # noqa + 'origin/a/wis2/cd-brazza_met_centre/data/recommended/weather/surface-based-observations/synop': 0, # noqa 'origin/a/wis2/int-wmo-test/data/core/weather/surface-based-observations/buoy': 2, # noqa 'origin/a/wis2/int-wmo-test/data/core/weather/surface-based-observations/wind_profiler': 1, # noqa 'origin/a/wis2/int-wmo-test/data/core/weather/surface-based-observations/ship': 5, # noqa @@ -196,6 +196,17 @@ def test_metadata_discovery_publish(): r = r.json() assert r['conformsTo'][0] == 'http://wis.wmo.int/spec/wcmp/2/conf/core' + id_ = 'urn:wmo:md:cd-brazza_met_centre:surface-weather-observations' + r = SESSION.get(f'{API_URL}/collections/discovery-metadata/items/{id_}').json() # noqa + + assert 'has_auth' in r['wis2box'] + assert r['wis2box']['has_auth'] + + for link in r['links']: + if link['rel'] == 'collection' and link['title'] == id_: + assert link['security']['default']['type'] == 'http' + assert link['security']['default']['scheme'] == 'bearer' + def test_data_ingest(): """Test data ingest/process publish""" @@ -305,8 +316,8 @@ def test_message_api(): # should match sum of counts above assert r['numberMatched'] == sum(counts.values()) - # we want to find a particular message with data ID - target_data_id = "cd-brazza_met_centre:surface-weather-observations/WIGOS_0-20000-0-64406_20230803T090000" # noqa + # we want to find a particular message with data ID for core data + target_data_id = 'mw-mw_met_centre:surface-weather-observations/WIGOS_0-454-2-AWSLOBI_20211111T125500' # noqa msg = None for feature in r['features']: @@ -322,18 +333,14 @@ def test_message_api(): assert msg['geometry'] is not None props = msg['properties'] - assert props['datetime'] == '2023-08-03T09:00:00Z' - assert props['wigos_station_identifier'] == '0-20000-0-64406' + assert props['datetime'] == '2021-11-11T12:55:00Z' + assert props['wigos_station_identifier'] == '0-454-2-AWSLOBI' assert props['integrity']['method'] == 'sha512' - assert not props['data_id'].startswith('wis2') - assert not props['data_id'].startswith('origin/a/wis2') - assert props['data_id'].startswith('cd') - assert props['content']['size'] == 253 + assert not props['data_id'].startswith(('wis2', 'origin/a/wis2')) + assert props['data_id'].startswith('mw') + assert props['content']['size'] == 247 assert props['content']['encoding'] == 'base64' assert props['content']['value'] is not None - assert 'gts' in props - assert props['gts']['ttaaii'] == 'SICG20' - assert props['gts']['cccc'] == 'FCBB' link_rel = msg['links'][0] @@ -346,3 +353,40 @@ def test_message_api(): assert str(r.headers['Content-Length']) == str(link_rel['length']) assert b'BUFR' in r.content + + # we want to find a particular message with data ID for recommended data + url = f'{API_URL}/collections/messages/items?sortby=-datetime&q=cd-brazza_met_centre' # noqa + r = SESSION.get(url).json() + + target_data_id = "cd-brazza_met_centre:surface-weather-observations/WIGOS_0-20000-0-64406_20230803T090000" # noqa + + msg = None + for feature in r['features']: + if feature['properties']['data_id'] == target_data_id: + msg = feature + break + + assert msg is not None + + is_valid, _ = validate_message(msg) + assert is_valid + + assert msg['geometry'] is not None + + props = msg['properties'] + assert props['datetime'] == '2023-08-03T09:00:00Z' + assert props['wigos_station_identifier'] == '0-20000-0-64406' + assert props['integrity']['method'] == 'sha512' + assert not props['data_id'].startswith('wis2') + assert not props['data_id'].startswith('origin/a/wis2') + assert props['data_id'].startswith('cd') + assert 'content' not in props + assert 'gts' in props + assert props['gts']['ttaaii'] == 'SICG20' + assert props['gts']['cccc'] == 'FCBB' + + link_rel = msg['links'][0] + + assert link_rel['type'] == 'application/x-bufr' + assert link_rel['security']['default']['type'] == 'http' + assert link_rel['security']['default']['scheme'] == 'bearer' diff --git a/wis2box-management/wis2box/auth.py b/wis2box-management/wis2box/auth.py index 0728e875..6d4f4696 100644 --- a/wis2box-management/wis2box/auth.py +++ b/wis2box-management/wis2box/auth.py @@ -24,9 +24,12 @@ import requests from secrets import token_hex +from owslib.ogcapi.records import Records + from wis2box import cli_helpers +from wis2box.api import upsert_collection_item from wis2box.data_mappings import get_data_mappings -from wis2box.env import AUTH_URL +from wis2box.env import AUTH_URL, DOCKER_API_URL LOGGER = logging.getLogger(__name__) @@ -170,6 +173,27 @@ def add_token(ctx, metadata_id, path, yes, token): if create_token(path, token): click.echo('Token successfully created') + if metadata_id is not None: + click.echo('Adding access control object to discovery metadata') + + oar = Records(DOCKER_API_URL) + + record = oar.collection_item('discovery-metadata', metadata_id) + record['wis2box']['has_auth'] = True + + for link in record['links']: + if link['rel'] == 'collection' and link['title'] == metadata_id: + LOGGER.debug('Adding security object to link') + link['security'] = { + 'default': { + 'type': 'http', + 'scheme': 'bearer', + 'description': 'Please contact the data provider for access' # noqa + } + } + + upsert_collection_item('discovery-metadata', record) + @click.command() @click.pass_context @@ -192,6 +216,19 @@ def remove_token(ctx, metadata_id, path, token): if delete_token(path, token): click.echo('Token successfully deleted') + if metadata_id is not None: + click.echo('Removing access control object to discovery metadata') + + oar = Records(DOCKER_API_URL) + + record = oar.collection_item('discovery-metadata', metadata_id) + record['wis2box'].pop('has_auth', None) + for link in record['links']: + if 'security' in link: + link.pop('security', None) + + upsert_collection_item('discovery-metadata', record) + auth.add_command(add_token) auth.add_command(remove_token) diff --git a/wis2box-management/wis2box/metadata/discovery.py b/wis2box-management/wis2box/metadata/discovery.py index b394d6e1..1e40085e 100644 --- a/wis2box-management/wis2box/metadata/discovery.py +++ b/wis2box-management/wis2box/metadata/discovery.py @@ -266,6 +266,15 @@ def publish_discovery_metadata(metadata: Union[dict, str]): LOGGER.error(msg) raise RuntimeError(msg) + oar = Records(DOCKER_API_URL) + try: + LOGGER.debug('Checking if record / auth enabled') + r = oar.collection_item('discovery-metadata', record['id']).json() + if r['wis2box'].get('has_auth', False): + record['wis2box']['has_auth'] = True + except Exception: + LOGGER.debug('No auth defined') + LOGGER.debug('Publishing to API') upsert_collection_item('discovery-metadata', record) diff --git a/wis2box-management/wis2box/pubsub/message.py b/wis2box-management/wis2box/pubsub/message.py index 14e3d3ae..758f44b5 100644 --- a/wis2box-management/wis2box/pubsub/message.py +++ b/wis2box-management/wis2box/pubsub/message.py @@ -28,9 +28,11 @@ from pathlib import Path import uuid +from owslib.ogcapi.records import Records + from wis2box import __version__ from wis2box.util import json_serial -from wis2box.env import STORAGE_PUBLIC, URL, STORAGE_SOURCE +from wis2box.env import DOCKER_API_URL, STORAGE_PUBLIC, URL, STORAGE_SOURCE from wis2box.storage import get_data LOGGER = logging.getLogger(__name__) @@ -210,6 +212,30 @@ def __init__(self, identifier: str, metadata_id: str, filepath: str, } self.message['links'].append(link) + LOGGER.debug(f'Checking for access control (metadata id: {metadata_id})') # noqa + try: + oar = Records(DOCKER_API_URL) + record = oar.collection_item('discovery-metadata', metadata_id) + + if record['wis2box'].get('has_auth'): + LOGGER.debug('Updating message with access control') + + for link in self.message['links']: + if link['href'] == public_file_url: + LOGGER.debug('Adding security object to link') + link['security'] = { + 'default': { + 'type': 'http', + 'scheme': 'bearer', + 'description': 'Please contact the data provider for access' # noqa + } + } + + LOGGER.debug('Removing inline content') + self.message['properties'].pop('content', None) + except Exception as err: + LOGGER.debug(f'Cannot locate metadata record: {err}') + def gcm() -> dict: """