From 88dd8b6d6a1d2d90bc9005cd5eb5070032375fbe Mon Sep 17 00:00:00 2001 From: "Matthew.Iannucci" Date: Wed, 22 May 2024 16:10:55 -0400 Subject: [PATCH 1/9] First pass at redis cache in reference reads --- Dockerfile | 2 +- docker-compose.yml | 18 ++++- redis/redis.conf | 5 ++ requirements.txt | 2 + xreds/dataset_provider.py | 19 +++-- xreds/dependencies/__init__.py | 1 + xreds/dependencies/redis.py | 8 ++ xreds/redis.py | 16 ++++ xreds/utils.py | 139 +++++++++++++++++++++------------ 9 files changed, 150 insertions(+), 60 deletions(-) create mode 100644 redis/redis.conf create mode 100644 xreds/dependencies/__init__.py create mode 100644 xreds/dependencies/redis.py create mode 100644 xreds/redis.py diff --git a/Dockerfile b/Dockerfile index 5a34139..c35728d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -68,4 +68,4 @@ ARG ROOT_PATH=/xreds/ ENV ROOT_PATH ${ROOT_PATH} # Run the webserver -CMD ["sh", "-c", "gunicorn --workers=1 --worker-class=uvicorn.workers.UvicornWorker --log-level=debug --bind=0.0.0.0:${PORT} app:app"] +CMD ["sh", "-c", "gunicorn --workers=4 --worker-class=uvicorn.workers.UvicornWorker --log-level=debug --bind=0.0.0.0:${PORT} app:app"] diff --git a/docker-compose.yml b/docker-compose.yml index c4af359..51ebd19 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,14 +1,28 @@ version: '3' services: + redis: + container_name: redis + image: redis:7-alpine + volumes: + - ./redis/redis.conf:/usr/local/etc/redis/redis.conf + restart: on-failure + ports: + - "6380:6380" + command: redis-server /usr/local/etc/redis/redis.conf xreds: - image: xreds:latest - platform: linux/amd64 + container_name: xreds + build: . volumes: - "./datasets:/opt/xreds/datasets" + platform: linux/amd64 ports: - "8090:8090" + depends_on: + - redis environment: - PORT=8090 - datasets_mapping_file=/opt/xreds/datasets/datasets.json - EXPORT_THRESHOLD=600 + - REDIS_HOST=redis + - REDIS_PORT=6380 diff --git a/redis/redis.conf b/redis/redis.conf new file mode 100644 index 0000000..dec0717 --- /dev/null +++ b/redis/redis.conf @@ -0,0 +1,5 @@ +port 6380 +protected-mode no + +# Save to disk every 60 seconds if at least 1 key has changed +save 60 1 diff --git a/requirements.txt b/requirements.txt index 71fd645..7f19551 100644 --- a/requirements.txt +++ b/requirements.txt @@ -38,3 +38,5 @@ xpublish-wms@git+https://github.com/xpublish-community/xpublish-wms@9574a71405e4 xpublish-edr@git+https://github.com/xpublish-community/xpublish-edr@019e53acd2e0ad5a1d909d1acfe9863f2e90e51b opendap-protocol<1.2.0 xarray-subset-grid@git+https://github.com/asascience-open/xarray-subset-grid@81ce464b6357e7353deaaf350ad1be22295d238e +redis-fsspec-cache@git+https://github.com/mpiannucci/redis-fsspec-cache.git +redis==5.0.4 diff --git a/xreds/dataset_provider.py b/xreds/dataset_provider.py index e90a8cc..a14f9b5 100644 --- a/xreds/dataset_provider.py +++ b/xreds/dataset_provider.py @@ -1,18 +1,19 @@ -import yaml import datetime import fsspec +from redis import Redis import xarray as xr +import yaml from pluggy import PluginManager - from xpublish import Plugin, hookimpl +from xreds.config import settings from xreds.dataset_extension import DATASET_EXTENSION_PLUGIN_NAMESPACE +from xreds.dependencies.redis import get_redis +from xreds.extensions import VDatumTransformationExtension from xreds.logging import logger -from xreds.config import settings from xreds.utils import load_dataset -from xreds.extensions import VDatumTransformationExtension - +from xreds.redis import pool as redis_pool dataset_extension_manager = PluginManager(DATASET_EXTENSION_PLUGIN_NAMESPACE) dataset_extension_manager.register(VDatumTransformationExtension, name="vdatum") @@ -32,8 +33,8 @@ def __init__(self, **kwargs): fs = fsspec.filesystem("file") with fs.open(settings.datasets_mapping_file, "r") as f: - #load config using yaml, which can load json or yaml - #because yaml is a superset of json + # load config using yaml, which can load json or yaml + # because yaml is a superset of json self.dataset_mapping = yaml.safe_load(f) @hookimpl @@ -44,6 +45,8 @@ def get_datasets(self): def get_dataset(self, dataset_id: str) -> xr.Dataset: cache_key = f"dataset-{dataset_id}" + redis_cache = Redis(connection_pool=redis_pool) + cached_ds = self.datasets.get(cache_key, None) if cached_ds: if (datetime.datetime.now() - cached_ds["date"]).seconds < (10 * 60): @@ -56,7 +59,7 @@ def get_dataset(self, dataset_id: str) -> xr.Dataset: logger.info(f"No dataset found in cache for {dataset_id}, loading...") dataset_spec = self.dataset_mapping[dataset_id] - ds = load_dataset(dataset_spec) + ds = load_dataset(dataset_spec, redis_cache=None) if ds is None: raise ValueError(f"Dataset {dataset_id} not found") diff --git a/xreds/dependencies/__init__.py b/xreds/dependencies/__init__.py new file mode 100644 index 0000000..15c796c --- /dev/null +++ b/xreds/dependencies/__init__.py @@ -0,0 +1 @@ +# module diff --git a/xreds/dependencies/redis.py b/xreds/dependencies/redis.py new file mode 100644 index 0000000..14092c6 --- /dev/null +++ b/xreds/dependencies/redis.py @@ -0,0 +1,8 @@ +from fastapi import Depends +import redis + +from xreds.redis import pool as redis_pool + + +def get_redis(): + return redis.Redis(connection_pool=redis_pool) diff --git a/xreds/redis.py b/xreds/redis.py new file mode 100644 index 0000000..5745fa5 --- /dev/null +++ b/xreds/redis.py @@ -0,0 +1,16 @@ +import os + +import redis + +from xreds.logging import logger + + +def create_redis(redis_host: str, redis_port: int): + logger.warning(f"Creating redis connection pool for {redis_host}:{redis_port}") + return redis.ConnectionPool( + host=redis_host, + port=redis_port, + db=0 + ) + +pool = create_redis(os.getenv("REDIS_HOST", "localhost"), int(os.getenv("REDIS_PORT", "6379"))) diff --git a/xreds/utils.py b/xreds/utils.py index e91636e..033d25f 100644 --- a/xreds/utils.py +++ b/xreds/utils.py @@ -1,74 +1,115 @@ +from typing import Optional + import fsspec +import ujson import xarray as xr +from redis_fsspec_cache.reference import RedisCachingReferenceFileSystem +from redis import Redis from xreds.logging import logger def infer_dataset_type(dataset_path: str) -> str: - if dataset_path.endswith('.nc'): - return 'netcdf' - elif dataset_path.endswith('.grib2'): - return 'grib2' - elif dataset_path.endswith('.nc.zarr') or dataset_path.endswith('json'): - return 'kerchunk' - elif dataset_path.endswith('.zarr') : - return 'zarr' + if dataset_path.endswith(".nc"): + return "netcdf" + elif dataset_path.endswith(".grib2"): + return "grib2" + elif dataset_path.endswith(".nc.zarr") or dataset_path.endswith("json"): + return "kerchunk" + elif dataset_path.endswith(".zarr"): + return "zarr" - return 'unknown' + return "unknown" -def load_dataset(dataset_spec: dict) -> xr.Dataset | None: +def load_dataset( + dataset_spec: dict, redis_cache: Optional[Redis] = None +) -> xr.Dataset | None: """Load a dataset from a path""" ds = None - dataset_path = dataset_spec['path'] + dataset_path = dataset_spec["path"] dataset_type = dataset_spec.get("type", None) if not dataset_type: dataset_type = infer_dataset_type(dataset_path) logger.info(f"Inferred dataset type {dataset_type} for {dataset_path}") - if dataset_type == 'unknown': + if dataset_type == "unknown": logger.error(f"Could not infer dataset type for {dataset_path}") return None - chunks = dataset_spec.get('chunks', None) - drop_variables = dataset_spec.get('drop_variables', None) - additional_coords = dataset_spec.get('additional_coords', None) - additional_attrs = dataset_spec.get('additional_attrs', None) - key = dataset_spec.get('key', None) - secret = dataset_spec.get('secret', None) + chunks = dataset_spec.get("chunks", None) + drop_variables = dataset_spec.get("drop_variables", None) + additional_coords = dataset_spec.get("additional_coords", None) + additional_attrs = dataset_spec.get("additional_attrs", None) + key = dataset_spec.get("key", None) + secret = dataset_spec.get("secret", None) - if dataset_type == 'netcdf': - ds = xr.open_dataset(dataset_path, engine='netcdf4', chunks=chunks, drop_variables=drop_variables) + if dataset_type == "netcdf": + ds = xr.open_dataset( + dataset_path, engine="netcdf4", chunks=chunks, drop_variables=drop_variables + ) if additional_coords is not None: ds = ds.set_coords(additional_coords) - elif dataset_type == 'grib2': - ds = xr.open_dataset(dataset_path, engine='cfgrib') - elif dataset_type == 'kerchunk': + elif dataset_type == "grib2": + ds = xr.open_dataset(dataset_path, engine="cfgrib") + elif dataset_type == "kerchunk": if key is not None: - options = {'anon': False, 'key': key, 'secret': secret} + options = {"anon": False, "key": key, "secret": secret} + else: + options = {"anon": True} + + if redis_cache is not None: + logger.warning("USING REDIS CACHE") + # reference_url = f"rediscache::{dataset_path}" + # with fsspec.open( + # reference_url, + # mode="r", + # rediscache={"redis": redis_cache, "expiry": 60}, + # s3={"anon": True}, + # ) as f: + # raw_refs = f.read() + # refs = ujson.loads(raw_refs) + fs = RedisCachingReferenceFileSystem( + redis=redis_cache, + expiry_time=180, + fo=dataset_path, + target_protocol="s3", + target_options=options, + remote_protocol="s3", + remote_options=options, + ) else: - options = {'anon': True} - fs = fsspec.filesystem( - "filecache", - expiry_time=10 * 60, # TODO: Make this driven by config per dataset, for now default to 10 minutes - target_protocol='reference', - target_options={ - 'fo': dataset_path, - 'target_protocol': 's3', - 'target_options': options, - 'remote_protocol': 's3', - 'remote_options': options, - }) + fs = fsspec.filesystem( + "filecache", + expiry_time=10 + * 60, # TODO: Make this driven by config per dataset, for now default to 10 minutes + target_protocol="reference", + target_options={ + "fo": dataset_path, + "target_protocol": "s3", + "target_options": options, + "remote_protocol": "s3", + "remote_options": options, + }, + ) m = fs.get_mapper("") - ds = xr.open_dataset(m, engine="zarr", backend_kwargs=dict(consolidated=False), chunks=chunks, drop_variables=drop_variables) + ds = xr.open_dataset( + m, + engine="zarr", + backend_kwargs=dict(consolidated=False), + chunks=chunks, + drop_variables=drop_variables, + ) try: - if ds.cf.coords['longitude'].dims[0] == 'longitude': - ds = ds.assign_coords(longitude=(((ds.longitude + 180) % 360) - 180)).sortby('longitude') + if ds.cf.coords["longitude"].dims[0] == "longitude": + ds = ds.assign_coords( + longitude=(((ds.longitude + 180) % 360) - 180) + ).sortby("longitude") # TODO: Yeah this should not be assumed... but for regular grids we will viz with rioxarray so for now we will assume ds = ds.rio.write_crs(4326) except Exception as e: - logger.warning(f'Could not reindex longitude: {e}') + logger.warning(f"Could not reindex longitude: {e}") pass - elif dataset_type == 'zarr': + elif dataset_type == "zarr": # TODO: Enable S3 support # mapper = fsspec.get_mapper(dataset_location) ds = xr.open_zarr(dataset_path, consolidated=True) @@ -79,18 +120,18 @@ def load_dataset(dataset_spec: dict) -> xr.Dataset | None: # Add additional attributes to the dataset if provided if additional_attrs is not None: ds.attrs.update(additional_attrs) - + # Check if we have a time dimension and if it is not indexed, index it try: - time_dim = ds.cf['time'].dims[0] + time_dim = ds.cf["time"].dims[0] if not ds.indexes.get(time_dim, None): - time_coord = ds.cf['time'].name - logger.info(f'Indexing time dimension {time_dim} as {time_coord}') + time_coord = ds.cf["time"].name + logger.info(f"Indexing time dimension {time_dim} as {time_coord}") ds = ds.set_index({time_dim: time_coord}) - if 'standard_name' not in ds[time_dim].attrs: - ds[time_dim].attrs['standard_name'] = 'time' + if "standard_name" not in ds[time_dim].attrs: + ds[time_dim].attrs["standard_name"] = "time" except Exception as e: - logger.warning(f'Could not index time dimension: {e}') + logger.warning(f"Could not index time dimension: {e}") pass - return ds \ No newline at end of file + return ds From e2754d815f151323d20530d0ffa2583303507a3c Mon Sep 17 00:00:00 2001 From: "Matthew.Iannucci" Date: Thu, 23 May 2024 10:16:20 -0400 Subject: [PATCH 2/9] Turn off cache --- Dockerfile | 2 +- xreds/dataset_provider.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index c35728d..5a34139 100644 --- a/Dockerfile +++ b/Dockerfile @@ -68,4 +68,4 @@ ARG ROOT_PATH=/xreds/ ENV ROOT_PATH ${ROOT_PATH} # Run the webserver -CMD ["sh", "-c", "gunicorn --workers=4 --worker-class=uvicorn.workers.UvicornWorker --log-level=debug --bind=0.0.0.0:${PORT} app:app"] +CMD ["sh", "-c", "gunicorn --workers=1 --worker-class=uvicorn.workers.UvicornWorker --log-level=debug --bind=0.0.0.0:${PORT} app:app"] diff --git a/xreds/dataset_provider.py b/xreds/dataset_provider.py index a14f9b5..5075b53 100644 --- a/xreds/dataset_provider.py +++ b/xreds/dataset_provider.py @@ -77,7 +77,7 @@ def get_dataset(self, dataset_id: str) -> xr.Dataset: logger.info(f"Applying extension {ext_name} to dataset {dataset_id}") ds = extension().transform_dataset(ds=ds, config=ext_config) - self.datasets[cache_key] = {"dataset": ds, "date": datetime.datetime.now()} + # self.datasets[cache_key] = {"dataset": ds, "date": datetime.datetime.now()} if cache_key in self.datasets: logger.info(f"Loaded and cached dataset for {dataset_id}") From 24de011b4a98e1d527bd6b1f2c336b700fd8da1a Mon Sep 17 00:00:00 2001 From: "Matthew.Iannucci" Date: Thu, 23 May 2024 10:48:38 -0400 Subject: [PATCH 3/9] Fix requirements, get cache working --- requirements.txt | 2 +- xreds/dataset_provider.py | 2 +- xreds/utils.py | 17 ++++++++--------- 3 files changed, 10 insertions(+), 11 deletions(-) diff --git a/requirements.txt b/requirements.txt index 7f19551..a69769b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -38,5 +38,5 @@ xpublish-wms@git+https://github.com/xpublish-community/xpublish-wms@9574a71405e4 xpublish-edr@git+https://github.com/xpublish-community/xpublish-edr@019e53acd2e0ad5a1d909d1acfe9863f2e90e51b opendap-protocol<1.2.0 xarray-subset-grid@git+https://github.com/asascience-open/xarray-subset-grid@81ce464b6357e7353deaaf350ad1be22295d238e -redis-fsspec-cache@git+https://github.com/mpiannucci/redis-fsspec-cache.git +redis-fsspec-cache@git+https://github.com/mpiannucci/redis-fsspec-cache.git@bc65aa0177ee35d17bfa67816579bc645f1e241c redis==5.0.4 diff --git a/xreds/dataset_provider.py b/xreds/dataset_provider.py index 5075b53..16e773a 100644 --- a/xreds/dataset_provider.py +++ b/xreds/dataset_provider.py @@ -59,7 +59,7 @@ def get_dataset(self, dataset_id: str) -> xr.Dataset: logger.info(f"No dataset found in cache for {dataset_id}, loading...") dataset_spec = self.dataset_mapping[dataset_id] - ds = load_dataset(dataset_spec, redis_cache=None) + ds = load_dataset(dataset_spec, redis_cache=redis_cache) if ds is None: raise ValueError(f"Dataset {dataset_id} not found") diff --git a/xreds/utils.py b/xreds/utils.py index 033d25f..5532212 100644 --- a/xreds/utils.py +++ b/xreds/utils.py @@ -59,15 +59,14 @@ def load_dataset( if redis_cache is not None: logger.warning("USING REDIS CACHE") - # reference_url = f"rediscache::{dataset_path}" - # with fsspec.open( - # reference_url, - # mode="r", - # rediscache={"redis": redis_cache, "expiry": 60}, - # s3={"anon": True}, - # ) as f: - # raw_refs = f.read() - # refs = ujson.loads(raw_refs) + reference_url = f"rediscache::{dataset_path}" + with fsspec.open( + reference_url, + mode="rb", + rediscache={"redis": redis_cache, "expiry": 3 * 60}, + s3={"anon": True}, + ) as f: + refs = ujson.load(f) fs = RedisCachingReferenceFileSystem( redis=redis_cache, expiry_time=180, From 5e32bade01bb0f98cd721111d82def565431c0da Mon Sep 17 00:00:00 2001 From: "Matthew.Iannucci" Date: Thu, 23 May 2024 14:00:22 -0400 Subject: [PATCH 4/9] Add lots more documentation, do settings correctly --- Dockerfile | 9 +++++--- README.md | 47 +++++++++++++++++++++++++++++++++------ app.py | 7 +++--- deploy.yaml | 4 ++-- docker-compose.nginx.yml | 2 +- docker-compose.redis.yml | 29 ++++++++++++++++++++++++ docker-compose.yml | 18 +++------------ nginx/nginx.conf | 2 +- xreds/config.py | 27 +++++++++++++++++++--- xreds/dataset_provider.py | 6 ++--- xreds/redis.py | 17 ++++++++++++-- 11 files changed, 128 insertions(+), 40 deletions(-) create mode 100644 docker-compose.redis.yml diff --git a/Dockerfile b/Dockerfile index 5a34139..bdc928c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -15,7 +15,7 @@ COPY viewer/index.html ./index.html COPY viewer/public ./public COPY viewer/src ./src -ARG ROOT_PATH=/xreds/ +ARG ROOT_PATH ENV VITE_XREDS_BASE_URL=${ROOT_PATH} RUN npm run build @@ -64,8 +64,11 @@ COPY --from=0 /opt/viewer/dist ./viewer/dist # Set the port to run the server on ENV PORT 8090 -ARG ROOT_PATH=/xreds/ +ARG ROOT_PATH ENV ROOT_PATH ${ROOT_PATH} +ARG WORKERS=1 +ENV WORKERS ${WORKERS} + # Run the webserver -CMD ["sh", "-c", "gunicorn --workers=1 --worker-class=uvicorn.workers.UvicornWorker --log-level=debug --bind=0.0.0.0:${PORT} app:app"] +CMD ["sh", "-c", "gunicorn --workers=${WORKERS} --worker-class=uvicorn.workers.UvicornWorker --log-level=debug --bind=0.0.0.0:${PORT} app:app"] diff --git a/README.md b/README.md index 2d7b65c..abd7cfd 100644 --- a/README.md +++ b/README.md @@ -38,17 +38,17 @@ Build the react app ```bash cd viewer/ -yarn install -yarn build +npm run install +npm run build ``` Run the following in the activated `virtualenv`: ```bash -datasets_mapping_file=./test.json python app.py +DATASETS_MAPPING_FILE=./test.json python app.py ``` -Where `datasets_mapping_file` is the path to the dataset key value store specified in the previous section. You can now navigate to http://localhost:8090/docs to see the supported operations +Where `DATASETS_MAPPING_FILE` is the path to the dataset key value store as described [here](./README.md#specifying-datasets). You can now navigate to `http://localhost:8090/docs` to see the supported operations ## Running With Docker @@ -60,16 +60,37 @@ The docker container for the app can be built with: docker build -t xreds:latest . ``` -Once built, it requires a few things to be run: The 8090 port to be exposed, and a volume for the datasets to live in, and the environment variable pointing to the dateset json file. +There are aso build arguments available when building the docker image: + +- `ROOT_PATH`: The root path the app will be served from. Defaults to `/xreds/`. +- `WORKERS`: The number of gunicorn workers to run. Defaults to `1`. + +Once built, it requires a few things to be run: The `8090` port to be exposed, and a volume for the datasets to live in, and the environment variable pointing to the dateset json file. ```bash -docker run -p 8090:8090 -e "datasets_mapping_file=/path/to/datasets.json" -v "/path/to/datasets:/opt/xreds/datasets" xreds:latest +docker run -p 8090:8090 -e "DATASETS_MAPPING_FILE=/path/to/datasets.json" -v "/path/to/datasets:/opt/xreds/datasets" xreds:latest ``` ### Running with `docker compose` +There are a few `docker compose` examples to get started with: + +#### Vanilla + +```bash +docker compose -d +``` + +#### With Redis + ```bash -docker compose --platform=linux/amd64 up -d +docker compose -f docker-compose-redis.yml up -d +``` + +#### With NGINX Proxy + +```bash +docker compose -f docker-compose-nginx.yml up -d ``` ## Specifying Datasets @@ -110,6 +131,18 @@ gfswave_global: Currently `zarr`, `netcdf`, and [`kerchunk`](https://github.com/fsspec/kerchunk) dataset types are supported. This information should be saved a file and specified when running. +## Configuration Options + +The following environment variables can be set to configure the app: + +- `DATASETS_MAPPING_FILE`: The fsspec compatible path to the dataset key value store as described [here](./README.md#specifying-datasets) +- `PORT`: The port the app should run on. Defaults to `8090` +- `ROOT_PATH`: The root path the app will be served from. Defaults to `` to be served from the root. +- `EXPORT_THRESHOLD`: The maximum size file to allow to be exported. Defaults to `500 MB` +- `USE_REDIS_CACHE`: Whether to use a redis cache for the app. Defaults to `False` +- `REDIS_HOST`: [Optional] The host of the redis cache. Defaults to `localhost` +- `REDIS_PORT`: [Optional] The port of the redis cache. Defaults to `6379` + ## Building and Deploying Docker Image First follow instructions above to build the docker image tagged `xreds:latest`. Then the`xreds:latest` image needs to be tagged and deployed to the relevant docker registry. diff --git a/app.py b/app.py index 6d97855..cf6ff32 100644 --- a/app.py +++ b/app.py @@ -2,9 +2,10 @@ import xpublish from fastapi.middleware.cors import CORSMiddleware + +from xreds.config import settings from xreds.plugins.export import ExportPlugin from xreds.plugins.size_plugin import SizePlugin - from xreds.spastaticfiles import SPAStaticFiles from xreds.dataset_provider import DatasetProvider from xreds.plugins.subset_plugin import SubsetPlugin, SubsetSupportPlugin @@ -20,7 +21,7 @@ datasets=None, ) -export_threshold = int(os.environ.get("EXPORT_THRESHOLD", 500)) +export_threshold = settings.export_threshold rest.register_plugin(DatasetProvider()) rest.register_plugin(SubsetSupportPlugin()) @@ -39,7 +40,7 @@ ) app.mount("/", SPAStaticFiles(directory="./viewer/dist", html=True), name="viewer") -app.root_path = os.environ.get("ROOT_PATH") +app.root_path = settings.root_path if __name__ == "__main__": diff --git a/deploy.yaml b/deploy.yaml index 50d72fa..60d872a 100644 --- a/deploy.yaml +++ b/deploy.yaml @@ -24,7 +24,7 @@ spec: env: - name: EXPORT_THRESHOLD value: "600" - - name: datasets_mapping_file + - name: DATASETS_MAPPING_FILE value: "s3://nextgen-dmac/kerchunk/datasets.json" --- apiVersion: v1 @@ -41,4 +41,4 @@ spec: port: 8090 targetPort: 8090 selector: - app: xreds \ No newline at end of file + app: xreds diff --git a/docker-compose.nginx.yml b/docker-compose.nginx.yml index a96ebfb..66555cf 100644 --- a/docker-compose.nginx.yml +++ b/docker-compose.nginx.yml @@ -22,4 +22,4 @@ services: environment: - PORT=8091 - ROOT_PATH=:8090 - - datasets_mapping_file=/opt/xreds/datasets/datasets.json + - DATASETS_MAPPING_FILE=/opt/xreds/datasets/datasets.json diff --git a/docker-compose.redis.yml b/docker-compose.redis.yml new file mode 100644 index 0000000..dbadfd1 --- /dev/null +++ b/docker-compose.redis.yml @@ -0,0 +1,29 @@ +version: '3' + +services: + redis: + container_name: redis + image: redis:7-alpine + volumes: + - ./redis/redis.conf:/usr/local/etc/redis/redis.conf + restart: on-failure + ports: + - "6380:6380" + command: redis-server /usr/local/etc/redis/redis.conf + xreds: + container_name: xreds + build: . + volumes: + - "./datasets:/opt/xreds/datasets" + platform: linux/amd64 + ports: + - "8090:8090" + depends_on: + - redis + environment: + - PORT=8090 + - DATASETS_MAPPING_FILE=/opt/xreds/datasets/datasets.json + - EXPORT_THRESHOLD=600 + - USE_REDIS_CACHE=true + - REDIS_HOST=redis + - REDIS_PORT=6380 diff --git a/docker-compose.yml b/docker-compose.yml index 51ebd19..f40876c 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,28 +1,16 @@ version: '3' services: - redis: - container_name: redis - image: redis:7-alpine - volumes: - - ./redis/redis.conf:/usr/local/etc/redis/redis.conf - restart: on-failure - ports: - - "6380:6380" - command: redis-server /usr/local/etc/redis/redis.conf xreds: container_name: xreds - build: . + build: + context: . volumes: - "./datasets:/opt/xreds/datasets" platform: linux/amd64 ports: - "8090:8090" - depends_on: - - redis environment: - PORT=8090 - - datasets_mapping_file=/opt/xreds/datasets/datasets.json + - DATASETS_MAPPING_FILE=/opt/xreds/datasets/datasets.json - EXPORT_THRESHOLD=600 - - REDIS_HOST=redis - - REDIS_PORT=6380 diff --git a/nginx/nginx.conf b/nginx/nginx.conf index f2434e8..2e178d1 100644 --- a/nginx/nginx.conf +++ b/nginx/nginx.conf @@ -14,7 +14,7 @@ http { inactive=24h max_size=2g; server { location / { - proxy_pass http://zms:8091; + proxy_pass http://xms:8091; proxy_set_header Host $host; proxy_buffering on; proxy_cache STATIC; diff --git a/xreds/config.py b/xreds/config.py index 7c86256..41cf7a2 100644 --- a/xreds/config.py +++ b/xreds/config.py @@ -1,8 +1,29 @@ from pydantic_settings import BaseSettings -class Settings(BaseSettings): - datasets_mapping_file: str +class Settings(BaseSettings): + '''Settings for running xreds''' + # fsspec compatible url path to the dataset mapping file + # in either json or yml format + datasets_mapping_file: str = '' + # Root path for the service to mount at + root_path: str = '' -settings = Settings() \ No newline at end of file + # Size threshold exporting datasets to local files + # in MB + export_threshold: int = 500 + + # Whether to use redis to cache datasets when possible + use_redis_cache: bool = False + + # Optional redis host name + # If not provided, will default to localhost + redis_host: str = "localhost" + + # Optional redis port number + # If not provided, will default to 6379 + redis_port: int = 6379 + + +settings = Settings() diff --git a/xreds/dataset_provider.py b/xreds/dataset_provider.py index 16e773a..f47139b 100644 --- a/xreds/dataset_provider.py +++ b/xreds/dataset_provider.py @@ -1,7 +1,6 @@ import datetime import fsspec -from redis import Redis import xarray as xr import yaml from pluggy import PluginManager @@ -13,7 +12,7 @@ from xreds.extensions import VDatumTransformationExtension from xreds.logging import logger from xreds.utils import load_dataset -from xreds.redis import pool as redis_pool +from xreds.redis import get_redis_cache dataset_extension_manager = PluginManager(DATASET_EXTENSION_PLUGIN_NAMESPACE) dataset_extension_manager.register(VDatumTransformationExtension, name="vdatum") @@ -45,7 +44,7 @@ def get_datasets(self): def get_dataset(self, dataset_id: str) -> xr.Dataset: cache_key = f"dataset-{dataset_id}" - redis_cache = Redis(connection_pool=redis_pool) + redis_cache = get_redis_cache() cached_ds = self.datasets.get(cache_key, None) if cached_ds: @@ -77,6 +76,7 @@ def get_dataset(self, dataset_id: str) -> xr.Dataset: logger.info(f"Applying extension {ext_name} to dataset {dataset_id}") ds = extension().transform_dataset(ds=ds, config=ext_config) + # TODO: For now this cache is disabled for testing with redis # self.datasets[cache_key] = {"dataset": ds, "date": datetime.datetime.now()} if cache_key in self.datasets: diff --git a/xreds/redis.py b/xreds/redis.py index 5745fa5..af2a9a0 100644 --- a/xreds/redis.py +++ b/xreds/redis.py @@ -1,11 +1,19 @@ import os +from typing import Optional import redis +from xreds.config import Settings, settings from xreds.logging import logger -def create_redis(redis_host: str, redis_port: int): +def create_redis_pool(settings: Settings) -> Optional[redis.ConnectionPool]: + if not settings.use_redis_cache: + logger.warning("Not using redis cache") + return None + + redis_host = settings.redis_host + redis_port = settings.redis_port logger.warning(f"Creating redis connection pool for {redis_host}:{redis_port}") return redis.ConnectionPool( host=redis_host, @@ -13,4 +21,9 @@ def create_redis(redis_host: str, redis_port: int): db=0 ) -pool = create_redis(os.getenv("REDIS_HOST", "localhost"), int(os.getenv("REDIS_PORT", "6379"))) +pool = create_redis_pool(settings=settings) + +def get_redis_cache() -> Optional[redis.Redis]: + if pool is None: + return None + return redis.Redis(connection_pool=pool) From aef10eaef93a3c30d0dfed53151eae08519f4c57 Mon Sep 17 00:00:00 2001 From: "Matthew.Iannucci" Date: Thu, 23 May 2024 14:06:57 -0400 Subject: [PATCH 5/9] Update kubernetes and circleci build arguments --- .circleci/config.yml | 4 ++-- deploy.yaml | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 50ef6fb..a5c1a0f 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -28,7 +28,7 @@ jobs: echo "export TAG=${TAG}" >> $BASH_ENV echo "Building for TAG ${TAG}" - docker build -t ${ECR_REPO}:${TAG} . + docker build --build-arg="ROOT_PATH=/xreds/" -t ${ECR_REPO}:${TAG} . - run: name: Install Grype @@ -76,4 +76,4 @@ workflows: filters: branches: only: - - main \ No newline at end of file + - main diff --git a/deploy.yaml b/deploy.yaml index 60d872a..47477de 100644 --- a/deploy.yaml +++ b/deploy.yaml @@ -22,6 +22,8 @@ spec: ports: - containerPort: 8090 env: + - name: ROOT_PATH + value: "/xreds/" - name: EXPORT_THRESHOLD value: "600" - name: DATASETS_MAPPING_FILE From 992bf8b2cce0dd6192aa935035fb31b74b0d5ec6 Mon Sep 17 00:00:00 2001 From: "Matthew.Iannucci" Date: Thu, 23 May 2024 14:59:32 -0400 Subject: [PATCH 6/9] Remove rps specific kubernetes yaml --- README.md | 2 +- deploy.yaml | 46 ---------------------------------------------- 2 files changed, 1 insertion(+), 47 deletions(-) delete mode 100644 deploy.yaml diff --git a/README.md b/README.md index abd7cfd..48421f4 100644 --- a/README.md +++ b/README.md @@ -143,7 +143,7 @@ The following environment variables can be set to configure the app: - `REDIS_HOST`: [Optional] The host of the redis cache. Defaults to `localhost` - `REDIS_PORT`: [Optional] The port of the redis cache. Defaults to `6379` -## Building and Deploying Docker Image +## Building and Deploying Public Docker Image First follow instructions above to build the docker image tagged `xreds:latest`. Then the`xreds:latest` image needs to be tagged and deployed to the relevant docker registry. diff --git a/deploy.yaml b/deploy.yaml deleted file mode 100644 index 47477de..0000000 --- a/deploy.yaml +++ /dev/null @@ -1,46 +0,0 @@ -apiVersion: apps/v1 -kind: Deployment -metadata: - name: xreds - labels: - app: xreds -spec: - replicas: 1 - selector: - matchLabels: - app: xreds - - template: - metadata: - labels: - app: xreds - spec: - containers: - - name: xreds - image: public.ecr.aws/m2c5k9c1/nextgen-dmac/xreds:latest - imagePullPolicy: "Always" - ports: - - containerPort: 8090 - env: - - name: ROOT_PATH - value: "/xreds/" - - name: EXPORT_THRESHOLD - value: "600" - - name: DATASETS_MAPPING_FILE - value: "s3://nextgen-dmac/kerchunk/datasets.json" ---- -apiVersion: v1 -kind: Service -metadata: - name: xreds-service - labels: - app: xreds -spec: - selector: - app.kubernetes.io/name: xreds - ports: - - protocol: TCP - port: 8090 - targetPort: 8090 - selector: - app: xreds From d32ddf8028641ba1d00d63c245c4dc14fb4f927e Mon Sep 17 00:00:00 2001 From: "Matthew.Iannucci" Date: Thu, 23 May 2024 15:02:43 -0400 Subject: [PATCH 7/9] Add redis cache to vdatum extension --- xreds/extensions/vdatum.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/xreds/extensions/vdatum.py b/xreds/extensions/vdatum.py index d3321ed..3afe5ae 100644 --- a/xreds/extensions/vdatum.py +++ b/xreds/extensions/vdatum.py @@ -2,6 +2,7 @@ from xreds.dataset_extension import DatasetExtension, hookimpl from xreds.logging import logger +from xreds.redis import get_redis_cache from xreds.utils import load_dataset @@ -62,13 +63,14 @@ def transform_dataset(self, ds: xr.Dataset, config: dict) -> xr.Dataset: ) return ds - ds_vdatum = load_dataset({"path": vdatum_file}) + redis_cache = get_redis_cache() + ds_vdatum = load_dataset({"path": vdatum_file}, redis_cache=redis_cache) if ds_vdatum is None: logger.warning( f"Could not load vdatum dataset from {vdatum_file}. Skipping vdatum transformation" ) return ds - + target_zeta_var = config.get("water_level_var", "zeta") target_datum_var = config.get("vdatum_var", "igld85tolwd") target_datum_name = config.get("vdatum_name", "igld85") From e6ee049116f34e80ae7043095a3f840e42df8a21 Mon Sep 17 00:00:00 2001 From: "Matthew.Iannucci" Date: Thu, 23 May 2024 15:04:46 -0400 Subject: [PATCH 8/9] vdatum safety enhancements --- xreds/extensions/vdatum.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/xreds/extensions/vdatum.py b/xreds/extensions/vdatum.py index 3afe5ae..4f980df 100644 --- a/xreds/extensions/vdatum.py +++ b/xreds/extensions/vdatum.py @@ -72,8 +72,15 @@ def transform_dataset(self, ds: xr.Dataset, config: dict) -> xr.Dataset: return ds target_zeta_var = config.get("water_level_var", "zeta") - target_datum_var = config.get("vdatum_var", "igld85tolwd") - target_datum_name = config.get("vdatum_name", "igld85") + target_datum_var = config.get("vdatum_var", None) + target_datum_name = config.get("vdatum_name", None) + + if target_datum_var is None or target_datum_name is None: + logger.warning( + f"Dataset {ds.attrs.get('name', 'unknown')} does not have a vdatum_var or vdatum_name attribute. Skipping vdatum transformation" + ) + return ds + out_datum_var = f"{target_zeta_var}_{target_datum_name}" ds_transformed = transform_datum(ds, ds_vdatum, target_zeta_var, target_datum_var, target_datum_name, out_datum_var) From 8b9a68e489ea1793c83f76c03b623dabdbbc54fe Mon Sep 17 00:00:00 2001 From: Nicholas Delli Carpini Date: Thu, 23 May 2024 16:35:18 -0400 Subject: [PATCH 9/9] add "report" button --- viewer/package.json | 1 + viewer/src/pages/app.tsx | 103 ++++++++++++++++++++++++++++++++++++++- 2 files changed, 103 insertions(+), 1 deletion(-) diff --git a/viewer/package.json b/viewer/package.json index 6c1fde6..013f0ab 100644 --- a/viewer/package.json +++ b/viewer/package.json @@ -11,6 +11,7 @@ }, "dependencies": { "@mapbox/mapbox-gl-draw": "^1.4.3", + "@mapbox/tilebelt": "1.0.2", "@tanstack/query-core": "^5.32.1", "@tanstack/react-query": "^5.32.1", "fast-xml-parser": "^4.3.2", diff --git a/viewer/src/pages/app.tsx b/viewer/src/pages/app.tsx index 3feb8b7..a96170e 100644 --- a/viewer/src/pages/app.tsx +++ b/viewer/src/pages/app.tsx @@ -1,4 +1,4 @@ -import { ImageSource, MapMouseEvent, Popup } from 'maplibre-gl'; +import { ImageSource, MapDataEvent, MapMouseEvent, Popup } from 'maplibre-gl'; import { useEffect, useRef, useState } from 'react'; import { bboxContainsPoint, createImageLayerParams } from '../tools'; import Map from '../components/map'; @@ -12,8 +12,20 @@ import { useDatasetMinMaxQuery, useDatasetsQuery, } from '../query/datasets'; +import { tileToBBOX } from '@mapbox/tilebelt'; import { Link } from 'react-router-dom'; +interface LoadingMetadata { + dataset: string, + variable: string, + bbox: number[], + elevation: number | undefined, + startTime: number, + endTime: number, + elapsedTime: number, + requestTime: string, +} + const colormaps: Array<{ id: string; name: string }> = [ { id: 'rainbow', name: 'Rainbow' }, { id: 'jet', name: 'Jet' }, @@ -74,6 +86,77 @@ function App() { }>({}); const [layerLoading, setLayerLoading] = useState(false); + const dataLoading = useRef<{ [k: string]: LoadingMetadata }>({}); + const dataLoaded = useRef([]); + + useEffect(() => { + if (!map.current) { + return; + } + + map.current.on("dataloading", (e: any) => { + if (!e.sourceId?.startsWith("xreds") || !e.source?.tiles || e.source.tiles.length === 0) { + return; + } + + const urlArr = e.source.tiles[0].split("/wms/?", 2) + const paramArr = urlArr[1].split("&"); + const tiles = map.current!.style.sourceCaches[e.sourceId]?._tiles; + if (!tiles) { + return; + } + + Object.keys(tiles).forEach((k) => { + const currID = `${e.sourceId}---${k}`; + if (tiles[k].state === "loading" && !dataLoading.current[currID]) { + const time = paramArr.find((p: string) => p.startsWith("time="))?.replace("time=", ""); + const variable = paramArr.find((p: string) => p.startsWith("layers="))?.replace("layers=", ""); + const elevation = paramArr.find((p: string) => p.startsWith("elevation="))?.replace("elevation=", ""); + dataLoading.current[currID] = { + dataset: urlArr[0].replace("/datasets/", ""), + variable: variable, + bbox: [], + elevation: elevation !== undefined ? parseFloat(elevation) : undefined, + requestTime: time, + startTime: (new Date()).getTime(), + endTime: -1, + elapsedTime: -1 + } + } + }); + }); + + map.current.on("data", (e: any) => { + if (!e.sourceId?.startsWith("xreds") || !e.source?.tiles || e.source.tiles.length === 0) { + return; + } + + const tiles = map.current!.style.sourceCaches[e.sourceId]?._tiles; + if (!tiles) { + return; + } + + Object.keys(dataLoading.current).forEach((k) => { + if (tiles[k.split("---")[1]] === undefined) { + delete dataLoading.current[k]; + } + }); + + Object.keys(tiles).forEach((k) => { + const currID = `${e.sourceId}---${k}`; + if (tiles[k].state === "loaded" && dataLoading.current[currID]) { + const coords = tiles[k].tileID.canonical; + dataLoading.current[currID].bbox = tileToBBOX([coords.x, coords.y, coords.z]); + dataLoading.current[currID].endTime = (new Date()).getTime(); + dataLoading.current[currID].elapsedTime = dataLoading.current[currID].endTime - dataLoading.current[currID].startTime; + + dataLoaded.current.push(dataLoading.current[currID]); + delete dataLoading.current[currID]; + } + }); + }); + }, []) + useEffect(() => { const datasetsCollapsed = datasetIds.data?.reduce( (obj: { [k: string]: boolean }, id: string) => { @@ -365,6 +448,24 @@ function App() { } /> + {datasetIds.data?.map((d, i) => (