From 88dd8b6d6a1d2d90bc9005cd5eb5070032375fbe Mon Sep 17 00:00:00 2001
From: "Matthew.Iannucci" <matthew.iannucci@tetratech.com>
Date: Wed, 22 May 2024 16:10:55 -0400
Subject: [PATCH 1/9] First pass at redis cache in reference reads

---
 Dockerfile                     |   2 +-
 docker-compose.yml             |  18 ++++-
 redis/redis.conf               |   5 ++
 requirements.txt               |   2 +
 xreds/dataset_provider.py      |  19 +++--
 xreds/dependencies/__init__.py |   1 +
 xreds/dependencies/redis.py    |   8 ++
 xreds/redis.py                 |  16 ++++
 xreds/utils.py                 | 139 +++++++++++++++++++++------------
 9 files changed, 150 insertions(+), 60 deletions(-)
 create mode 100644 redis/redis.conf
 create mode 100644 xreds/dependencies/__init__.py
 create mode 100644 xreds/dependencies/redis.py
 create mode 100644 xreds/redis.py

diff --git a/Dockerfile b/Dockerfile
index 5a34139..c35728d 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -68,4 +68,4 @@ ARG ROOT_PATH=/xreds/
 ENV ROOT_PATH ${ROOT_PATH}
 
 # Run the webserver
-CMD ["sh", "-c", "gunicorn --workers=1 --worker-class=uvicorn.workers.UvicornWorker --log-level=debug --bind=0.0.0.0:${PORT} app:app"]
+CMD ["sh", "-c", "gunicorn --workers=4 --worker-class=uvicorn.workers.UvicornWorker --log-level=debug --bind=0.0.0.0:${PORT} app:app"]
diff --git a/docker-compose.yml b/docker-compose.yml
index c4af359..51ebd19 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -1,14 +1,28 @@
 version: '3'
 
 services:
+  redis:
+    container_name: redis
+    image: redis:7-alpine
+    volumes:
+      - ./redis/redis.conf:/usr/local/etc/redis/redis.conf
+    restart: on-failure
+    ports:
+      - "6380:6380"
+    command: redis-server /usr/local/etc/redis/redis.conf
   xreds:
-    image: xreds:latest
-    platform: linux/amd64
+    container_name: xreds
+    build: .
     volumes:
       - "./datasets:/opt/xreds/datasets"
+    platform: linux/amd64
     ports:
       - "8090:8090"
+    depends_on:
+      - redis
     environment:
       - PORT=8090
       - datasets_mapping_file=/opt/xreds/datasets/datasets.json
       - EXPORT_THRESHOLD=600
+      - REDIS_HOST=redis
+      - REDIS_PORT=6380
diff --git a/redis/redis.conf b/redis/redis.conf
new file mode 100644
index 0000000..dec0717
--- /dev/null
+++ b/redis/redis.conf
@@ -0,0 +1,5 @@
+port 6380
+protected-mode no
+
+# Save to disk every 60 seconds if at least 1 key has changed
+save 60 1
diff --git a/requirements.txt b/requirements.txt
index 71fd645..7f19551 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -38,3 +38,5 @@ xpublish-wms@git+https://github.com/xpublish-community/xpublish-wms@9574a71405e4
 xpublish-edr@git+https://github.com/xpublish-community/xpublish-edr@019e53acd2e0ad5a1d909d1acfe9863f2e90e51b
 opendap-protocol<1.2.0
 xarray-subset-grid@git+https://github.com/asascience-open/xarray-subset-grid@81ce464b6357e7353deaaf350ad1be22295d238e
+redis-fsspec-cache@git+https://github.com/mpiannucci/redis-fsspec-cache.git
+redis==5.0.4
diff --git a/xreds/dataset_provider.py b/xreds/dataset_provider.py
index e90a8cc..a14f9b5 100644
--- a/xreds/dataset_provider.py
+++ b/xreds/dataset_provider.py
@@ -1,18 +1,19 @@
-import yaml
 import datetime
 
 import fsspec
+from redis import Redis
 import xarray as xr
+import yaml
 from pluggy import PluginManager
-
 from xpublish import Plugin, hookimpl
 
+from xreds.config import settings
 from xreds.dataset_extension import DATASET_EXTENSION_PLUGIN_NAMESPACE
+from xreds.dependencies.redis import get_redis
+from xreds.extensions import VDatumTransformationExtension
 from xreds.logging import logger
-from xreds.config import settings
 from xreds.utils import load_dataset
-from xreds.extensions import VDatumTransformationExtension
-
+from xreds.redis import pool as redis_pool
 
 dataset_extension_manager = PluginManager(DATASET_EXTENSION_PLUGIN_NAMESPACE)
 dataset_extension_manager.register(VDatumTransformationExtension, name="vdatum")
@@ -32,8 +33,8 @@ def __init__(self, **kwargs):
             fs = fsspec.filesystem("file")
 
         with fs.open(settings.datasets_mapping_file, "r") as f:
-            #load config using yaml, which can load json or yaml
-            #because yaml is a superset of json
+            # load config using yaml, which can load json or yaml
+            # because yaml is a superset of json
             self.dataset_mapping = yaml.safe_load(f)
 
     @hookimpl
@@ -44,6 +45,8 @@ def get_datasets(self):
     def get_dataset(self, dataset_id: str) -> xr.Dataset:
         cache_key = f"dataset-{dataset_id}"
 
+        redis_cache = Redis(connection_pool=redis_pool)
+
         cached_ds = self.datasets.get(cache_key, None)
         if cached_ds:
             if (datetime.datetime.now() - cached_ds["date"]).seconds < (10 * 60):
@@ -56,7 +59,7 @@ def get_dataset(self, dataset_id: str) -> xr.Dataset:
             logger.info(f"No dataset found in cache for {dataset_id}, loading...")
 
         dataset_spec = self.dataset_mapping[dataset_id]
-        ds = load_dataset(dataset_spec)
+        ds = load_dataset(dataset_spec, redis_cache=None)
 
         if ds is None:
             raise ValueError(f"Dataset {dataset_id} not found")
diff --git a/xreds/dependencies/__init__.py b/xreds/dependencies/__init__.py
new file mode 100644
index 0000000..15c796c
--- /dev/null
+++ b/xreds/dependencies/__init__.py
@@ -0,0 +1 @@
+# module
diff --git a/xreds/dependencies/redis.py b/xreds/dependencies/redis.py
new file mode 100644
index 0000000..14092c6
--- /dev/null
+++ b/xreds/dependencies/redis.py
@@ -0,0 +1,8 @@
+from fastapi import Depends
+import redis
+
+from xreds.redis import pool as redis_pool
+
+
+def get_redis():
+    return redis.Redis(connection_pool=redis_pool)
diff --git a/xreds/redis.py b/xreds/redis.py
new file mode 100644
index 0000000..5745fa5
--- /dev/null
+++ b/xreds/redis.py
@@ -0,0 +1,16 @@
+import os
+
+import redis
+
+from xreds.logging import logger
+
+
+def create_redis(redis_host: str, redis_port: int):
+    logger.warning(f"Creating redis connection pool for {redis_host}:{redis_port}")
+    return redis.ConnectionPool(
+        host=redis_host,
+        port=redis_port,
+        db=0
+    )
+
+pool = create_redis(os.getenv("REDIS_HOST", "localhost"), int(os.getenv("REDIS_PORT", "6379")))
diff --git a/xreds/utils.py b/xreds/utils.py
index e91636e..033d25f 100644
--- a/xreds/utils.py
+++ b/xreds/utils.py
@@ -1,74 +1,115 @@
+from typing import Optional
+
 import fsspec
+import ujson
 import xarray as xr
+from redis_fsspec_cache.reference import RedisCachingReferenceFileSystem
 
+from redis import Redis
 from xreds.logging import logger
 
 
 def infer_dataset_type(dataset_path: str) -> str:
-    if dataset_path.endswith('.nc'):
-        return 'netcdf'
-    elif dataset_path.endswith('.grib2'):
-        return 'grib2'
-    elif dataset_path.endswith('.nc.zarr') or dataset_path.endswith('json'):
-        return 'kerchunk'
-    elif dataset_path.endswith('.zarr') :
-        return 'zarr'
+    if dataset_path.endswith(".nc"):
+        return "netcdf"
+    elif dataset_path.endswith(".grib2"):
+        return "grib2"
+    elif dataset_path.endswith(".nc.zarr") or dataset_path.endswith("json"):
+        return "kerchunk"
+    elif dataset_path.endswith(".zarr"):
+        return "zarr"
 
-    return 'unknown'
+    return "unknown"
 
 
-def load_dataset(dataset_spec: dict) -> xr.Dataset | None:
+def load_dataset(
+    dataset_spec: dict, redis_cache: Optional[Redis] = None
+) -> xr.Dataset | None:
     """Load a dataset from a path"""
     ds = None
-    dataset_path = dataset_spec['path']
+    dataset_path = dataset_spec["path"]
     dataset_type = dataset_spec.get("type", None)
     if not dataset_type:
         dataset_type = infer_dataset_type(dataset_path)
         logger.info(f"Inferred dataset type {dataset_type} for {dataset_path}")
-    if dataset_type == 'unknown':
+    if dataset_type == "unknown":
         logger.error(f"Could not infer dataset type for {dataset_path}")
         return None
 
-    chunks = dataset_spec.get('chunks', None)
-    drop_variables = dataset_spec.get('drop_variables', None)
-    additional_coords = dataset_spec.get('additional_coords', None)
-    additional_attrs = dataset_spec.get('additional_attrs', None)
-    key = dataset_spec.get('key', None)
-    secret = dataset_spec.get('secret', None)
+    chunks = dataset_spec.get("chunks", None)
+    drop_variables = dataset_spec.get("drop_variables", None)
+    additional_coords = dataset_spec.get("additional_coords", None)
+    additional_attrs = dataset_spec.get("additional_attrs", None)
+    key = dataset_spec.get("key", None)
+    secret = dataset_spec.get("secret", None)
 
-    if dataset_type == 'netcdf':
-        ds = xr.open_dataset(dataset_path, engine='netcdf4', chunks=chunks, drop_variables=drop_variables)
+    if dataset_type == "netcdf":
+        ds = xr.open_dataset(
+            dataset_path, engine="netcdf4", chunks=chunks, drop_variables=drop_variables
+        )
         if additional_coords is not None:
             ds = ds.set_coords(additional_coords)
-    elif dataset_type == 'grib2':
-        ds = xr.open_dataset(dataset_path, engine='cfgrib')
-    elif dataset_type == 'kerchunk':
+    elif dataset_type == "grib2":
+        ds = xr.open_dataset(dataset_path, engine="cfgrib")
+    elif dataset_type == "kerchunk":
         if key is not None:
-            options = {'anon': False, 'key': key, 'secret': secret}
+            options = {"anon": False, "key": key, "secret": secret}
+        else:
+            options = {"anon": True}
+
+        if redis_cache is not None:
+            logger.warning("USING REDIS CACHE")
+            # reference_url = f"rediscache::{dataset_path}"
+            # with fsspec.open(
+            #     reference_url,
+            #     mode="r",
+            #     rediscache={"redis": redis_cache, "expiry": 60},
+            #     s3={"anon": True},
+            # ) as f:
+            #     raw_refs = f.read()
+            #     refs = ujson.loads(raw_refs)
+            fs = RedisCachingReferenceFileSystem(
+                redis=redis_cache,
+                expiry_time=180,
+                fo=dataset_path,
+                target_protocol="s3",
+                target_options=options,
+                remote_protocol="s3",
+                remote_options=options,
+            )
         else:
-            options = {'anon': True}
-        fs = fsspec.filesystem(
-            "filecache",
-            expiry_time=10 * 60, # TODO: Make this driven by config per dataset, for now default to 10 minutes
-            target_protocol='reference',
-            target_options={
-                'fo': dataset_path,
-                'target_protocol': 's3',
-                'target_options': options,
-                'remote_protocol': 's3',
-                'remote_options': options,
-            })
+            fs = fsspec.filesystem(
+                "filecache",
+                expiry_time=10
+                * 60,  # TODO: Make this driven by config per dataset, for now default to 10 minutes
+                target_protocol="reference",
+                target_options={
+                    "fo": dataset_path,
+                    "target_protocol": "s3",
+                    "target_options": options,
+                    "remote_protocol": "s3",
+                    "remote_options": options,
+                },
+            )
         m = fs.get_mapper("")
-        ds = xr.open_dataset(m, engine="zarr", backend_kwargs=dict(consolidated=False), chunks=chunks, drop_variables=drop_variables)
+        ds = xr.open_dataset(
+            m,
+            engine="zarr",
+            backend_kwargs=dict(consolidated=False),
+            chunks=chunks,
+            drop_variables=drop_variables,
+        )
         try:
-            if ds.cf.coords['longitude'].dims[0] == 'longitude':
-                ds = ds.assign_coords(longitude=(((ds.longitude + 180) % 360) - 180)).sortby('longitude')
+            if ds.cf.coords["longitude"].dims[0] == "longitude":
+                ds = ds.assign_coords(
+                    longitude=(((ds.longitude + 180) % 360) - 180)
+                ).sortby("longitude")
                 # TODO: Yeah this should not be assumed... but for regular grids we will viz with rioxarray so for now we will assume
                 ds = ds.rio.write_crs(4326)
         except Exception as e:
-            logger.warning(f'Could not reindex longitude: {e}')
+            logger.warning(f"Could not reindex longitude: {e}")
             pass
-    elif dataset_type == 'zarr':
+    elif dataset_type == "zarr":
         # TODO: Enable S3  support
         # mapper = fsspec.get_mapper(dataset_location)
         ds = xr.open_zarr(dataset_path, consolidated=True)
@@ -79,18 +120,18 @@ def load_dataset(dataset_spec: dict) -> xr.Dataset | None:
     # Add additional attributes to the dataset if provided
     if additional_attrs is not None:
         ds.attrs.update(additional_attrs)
-    
+
     # Check if we have a time dimension and if it is not indexed, index it
     try:
-        time_dim = ds.cf['time'].dims[0]
+        time_dim = ds.cf["time"].dims[0]
         if not ds.indexes.get(time_dim, None):
-            time_coord = ds.cf['time'].name
-            logger.info(f'Indexing time dimension {time_dim} as {time_coord}')
+            time_coord = ds.cf["time"].name
+            logger.info(f"Indexing time dimension {time_dim} as {time_coord}")
             ds = ds.set_index({time_dim: time_coord})
-            if 'standard_name' not in ds[time_dim].attrs:
-                ds[time_dim].attrs['standard_name'] = 'time'
+            if "standard_name" not in ds[time_dim].attrs:
+                ds[time_dim].attrs["standard_name"] = "time"
     except Exception as e:
-        logger.warning(f'Could not index time dimension: {e}')
+        logger.warning(f"Could not index time dimension: {e}")
         pass
 
-    return ds
\ No newline at end of file
+    return ds

From e2754d815f151323d20530d0ffa2583303507a3c Mon Sep 17 00:00:00 2001
From: "Matthew.Iannucci" <matthew.iannucci@tetratech.com>
Date: Thu, 23 May 2024 10:16:20 -0400
Subject: [PATCH 2/9] Turn off cache

---
 Dockerfile                | 2 +-
 xreds/dataset_provider.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index c35728d..5a34139 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -68,4 +68,4 @@ ARG ROOT_PATH=/xreds/
 ENV ROOT_PATH ${ROOT_PATH}
 
 # Run the webserver
-CMD ["sh", "-c", "gunicorn --workers=4 --worker-class=uvicorn.workers.UvicornWorker --log-level=debug --bind=0.0.0.0:${PORT} app:app"]
+CMD ["sh", "-c", "gunicorn --workers=1 --worker-class=uvicorn.workers.UvicornWorker --log-level=debug --bind=0.0.0.0:${PORT} app:app"]
diff --git a/xreds/dataset_provider.py b/xreds/dataset_provider.py
index a14f9b5..5075b53 100644
--- a/xreds/dataset_provider.py
+++ b/xreds/dataset_provider.py
@@ -77,7 +77,7 @@ def get_dataset(self, dataset_id: str) -> xr.Dataset:
                 logger.info(f"Applying extension {ext_name} to dataset {dataset_id}")
             ds = extension().transform_dataset(ds=ds, config=ext_config)
 
-        self.datasets[cache_key] = {"dataset": ds, "date": datetime.datetime.now()}
+        # self.datasets[cache_key] = {"dataset": ds, "date": datetime.datetime.now()}
 
         if cache_key in self.datasets:
             logger.info(f"Loaded and cached dataset for {dataset_id}")

From 24de011b4a98e1d527bd6b1f2c336b700fd8da1a Mon Sep 17 00:00:00 2001
From: "Matthew.Iannucci" <matthew.iannucci@tetratech.com>
Date: Thu, 23 May 2024 10:48:38 -0400
Subject: [PATCH 3/9] Fix requirements, get cache working

---
 requirements.txt          |  2 +-
 xreds/dataset_provider.py |  2 +-
 xreds/utils.py            | 17 ++++++++---------
 3 files changed, 10 insertions(+), 11 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index 7f19551..a69769b 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -38,5 +38,5 @@ xpublish-wms@git+https://github.com/xpublish-community/xpublish-wms@9574a71405e4
 xpublish-edr@git+https://github.com/xpublish-community/xpublish-edr@019e53acd2e0ad5a1d909d1acfe9863f2e90e51b
 opendap-protocol<1.2.0
 xarray-subset-grid@git+https://github.com/asascience-open/xarray-subset-grid@81ce464b6357e7353deaaf350ad1be22295d238e
-redis-fsspec-cache@git+https://github.com/mpiannucci/redis-fsspec-cache.git
+redis-fsspec-cache@git+https://github.com/mpiannucci/redis-fsspec-cache.git@bc65aa0177ee35d17bfa67816579bc645f1e241c
 redis==5.0.4
diff --git a/xreds/dataset_provider.py b/xreds/dataset_provider.py
index 5075b53..16e773a 100644
--- a/xreds/dataset_provider.py
+++ b/xreds/dataset_provider.py
@@ -59,7 +59,7 @@ def get_dataset(self, dataset_id: str) -> xr.Dataset:
             logger.info(f"No dataset found in cache for {dataset_id}, loading...")
 
         dataset_spec = self.dataset_mapping[dataset_id]
-        ds = load_dataset(dataset_spec, redis_cache=None)
+        ds = load_dataset(dataset_spec, redis_cache=redis_cache)
 
         if ds is None:
             raise ValueError(f"Dataset {dataset_id} not found")
diff --git a/xreds/utils.py b/xreds/utils.py
index 033d25f..5532212 100644
--- a/xreds/utils.py
+++ b/xreds/utils.py
@@ -59,15 +59,14 @@ def load_dataset(
 
         if redis_cache is not None:
             logger.warning("USING REDIS CACHE")
-            # reference_url = f"rediscache::{dataset_path}"
-            # with fsspec.open(
-            #     reference_url,
-            #     mode="r",
-            #     rediscache={"redis": redis_cache, "expiry": 60},
-            #     s3={"anon": True},
-            # ) as f:
-            #     raw_refs = f.read()
-            #     refs = ujson.loads(raw_refs)
+            reference_url = f"rediscache::{dataset_path}"
+            with fsspec.open(
+                reference_url,
+                mode="rb",
+                rediscache={"redis": redis_cache, "expiry": 3 * 60},
+                s3={"anon": True},
+            ) as f:
+                refs = ujson.load(f)
             fs = RedisCachingReferenceFileSystem(
                 redis=redis_cache,
                 expiry_time=180,

From 5e32bade01bb0f98cd721111d82def565431c0da Mon Sep 17 00:00:00 2001
From: "Matthew.Iannucci" <matthew.iannucci@tetratech.com>
Date: Thu, 23 May 2024 14:00:22 -0400
Subject: [PATCH 4/9] Add lots more documentation, do settings correctly

---
 Dockerfile                |  9 +++++---
 README.md                 | 47 +++++++++++++++++++++++++++++++++------
 app.py                    |  7 +++---
 deploy.yaml               |  4 ++--
 docker-compose.nginx.yml  |  2 +-
 docker-compose.redis.yml  | 29 ++++++++++++++++++++++++
 docker-compose.yml        | 18 +++------------
 nginx/nginx.conf          |  2 +-
 xreds/config.py           | 27 +++++++++++++++++++---
 xreds/dataset_provider.py |  6 ++---
 xreds/redis.py            | 17 ++++++++++++--
 11 files changed, 128 insertions(+), 40 deletions(-)
 create mode 100644 docker-compose.redis.yml

diff --git a/Dockerfile b/Dockerfile
index 5a34139..bdc928c 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -15,7 +15,7 @@ COPY viewer/index.html ./index.html
 COPY viewer/public ./public
 COPY viewer/src ./src
 
-ARG ROOT_PATH=/xreds/
+ARG ROOT_PATH
 ENV VITE_XREDS_BASE_URL=${ROOT_PATH}
 RUN npm run build
 
@@ -64,8 +64,11 @@ COPY --from=0 /opt/viewer/dist ./viewer/dist
 
 # Set the port to run the server on
 ENV PORT 8090
-ARG ROOT_PATH=/xreds/
+ARG ROOT_PATH
 ENV ROOT_PATH ${ROOT_PATH}
 
+ARG WORKERS=1
+ENV WORKERS ${WORKERS}
+
 # Run the webserver
-CMD ["sh", "-c", "gunicorn --workers=1 --worker-class=uvicorn.workers.UvicornWorker --log-level=debug --bind=0.0.0.0:${PORT} app:app"]
+CMD ["sh", "-c", "gunicorn --workers=${WORKERS} --worker-class=uvicorn.workers.UvicornWorker --log-level=debug --bind=0.0.0.0:${PORT} app:app"]
diff --git a/README.md b/README.md
index 2d7b65c..abd7cfd 100644
--- a/README.md
+++ b/README.md
@@ -38,17 +38,17 @@ Build the react app
 
 ```bash
 cd viewer/
-yarn install
-yarn build
+npm run install
+npm run build
 ```
 
 Run the following in the activated `virtualenv`:
 
 ```bash
-datasets_mapping_file=./test.json python app.py
+DATASETS_MAPPING_FILE=./test.json python app.py
 ```
 
-Where `datasets_mapping_file` is the path to the dataset key value store specified in the previous section. You can now navigate to http://localhost:8090/docs to see the supported operations
+Where `DATASETS_MAPPING_FILE` is the path to the dataset key value store as described [here](./README.md#specifying-datasets). You can now navigate to `http://localhost:8090/docs` to see the supported operations
 
 ## Running With Docker
 
@@ -60,16 +60,37 @@ The docker container for the app can be built with:
 docker build -t xreds:latest .
 ```
 
-Once built, it requires a few things to be run: The 8090 port to be exposed, and a volume for the datasets to live in, and the environment variable pointing to the dateset json file.
+There are aso build arguments available when building the docker image:
+
+- `ROOT_PATH`: The root path the app will be served from. Defaults to `/xreds/`.
+- `WORKERS`: The number of gunicorn workers to run. Defaults to `1`.
+
+Once built, it requires a few things to be run: The `8090` port to be exposed, and a volume for the datasets to live in, and the environment variable pointing to the dateset json file.
 
 ```bash
-docker run -p 8090:8090 -e "datasets_mapping_file=/path/to/datasets.json" -v "/path/to/datasets:/opt/xreds/datasets" xreds:latest
+docker run -p 8090:8090 -e "DATASETS_MAPPING_FILE=/path/to/datasets.json" -v "/path/to/datasets:/opt/xreds/datasets" xreds:latest
 ```
 
 ### Running with `docker compose`
 
+There are a few `docker compose` examples to get started with:
+
+#### Vanilla
+
+```bash
+docker compose -d
+```
+
+#### With Redis
+
 ```bash
-docker compose --platform=linux/amd64 up -d
+docker compose -f docker-compose-redis.yml up -d
+```
+
+#### With NGINX Proxy
+
+```bash
+docker compose -f docker-compose-nginx.yml up -d
 ```
 
 ## Specifying Datasets
@@ -110,6 +131,18 @@ gfswave_global:
 
 Currently `zarr`, `netcdf`, and [`kerchunk`](https://github.com/fsspec/kerchunk) dataset types are supported. This information should be saved a file and specified when running.
 
+## Configuration Options
+
+The following environment variables can be set to configure the app:
+
+- `DATASETS_MAPPING_FILE`: The fsspec compatible path to the dataset key value store as described [here](./README.md#specifying-datasets)
+- `PORT`: The port the app should run on. Defaults to `8090`
+- `ROOT_PATH`: The root path the app will be served from. Defaults to `` to be served from the root.
+- `EXPORT_THRESHOLD`: The maximum size file to allow to be exported. Defaults to `500 MB`
+- `USE_REDIS_CACHE`: Whether to use a redis cache for the app. Defaults to `False`
+- `REDIS_HOST`: [Optional] The host of the redis cache. Defaults to `localhost`
+- `REDIS_PORT`: [Optional] The port of the redis cache. Defaults to `6379`
+
 ## Building and Deploying Docker Image
 
 First follow instructions above to build the docker image tagged `xreds:latest`. Then the`xreds:latest` image needs to be tagged and deployed to the relevant docker registry.
diff --git a/app.py b/app.py
index 6d97855..cf6ff32 100644
--- a/app.py
+++ b/app.py
@@ -2,9 +2,10 @@
 import xpublish
 
 from fastapi.middleware.cors import CORSMiddleware
+
+from xreds.config import settings
 from xreds.plugins.export import ExportPlugin
 from xreds.plugins.size_plugin import SizePlugin
-
 from xreds.spastaticfiles import SPAStaticFiles
 from xreds.dataset_provider import DatasetProvider
 from xreds.plugins.subset_plugin import SubsetPlugin, SubsetSupportPlugin
@@ -20,7 +21,7 @@
     datasets=None,
 )
 
-export_threshold = int(os.environ.get("EXPORT_THRESHOLD", 500))
+export_threshold = settings.export_threshold
 
 rest.register_plugin(DatasetProvider())
 rest.register_plugin(SubsetSupportPlugin())
@@ -39,7 +40,7 @@
 )
 
 app.mount("/", SPAStaticFiles(directory="./viewer/dist", html=True), name="viewer")
-app.root_path = os.environ.get("ROOT_PATH")
+app.root_path = settings.root_path
 
 
 if __name__ == "__main__":
diff --git a/deploy.yaml b/deploy.yaml
index 50d72fa..60d872a 100644
--- a/deploy.yaml
+++ b/deploy.yaml
@@ -24,7 +24,7 @@ spec:
         env:
         - name: EXPORT_THRESHOLD
           value: "600"
-        - name: datasets_mapping_file
+        - name: DATASETS_MAPPING_FILE
           value: "s3://nextgen-dmac/kerchunk/datasets.json"
 ---
 apiVersion: v1
@@ -41,4 +41,4 @@ spec:
     port: 8090
     targetPort: 8090
   selector:
-    app: xreds
\ No newline at end of file
+    app: xreds
diff --git a/docker-compose.nginx.yml b/docker-compose.nginx.yml
index a96ebfb..66555cf 100644
--- a/docker-compose.nginx.yml
+++ b/docker-compose.nginx.yml
@@ -22,4 +22,4 @@ services:
     environment:
       - PORT=8091
       - ROOT_PATH=:8090
-      - datasets_mapping_file=/opt/xreds/datasets/datasets.json
+      - DATASETS_MAPPING_FILE=/opt/xreds/datasets/datasets.json
diff --git a/docker-compose.redis.yml b/docker-compose.redis.yml
new file mode 100644
index 0000000..dbadfd1
--- /dev/null
+++ b/docker-compose.redis.yml
@@ -0,0 +1,29 @@
+version: '3'
+
+services:
+  redis:
+    container_name: redis
+    image: redis:7-alpine
+    volumes:
+      - ./redis/redis.conf:/usr/local/etc/redis/redis.conf
+    restart: on-failure
+    ports:
+      - "6380:6380"
+    command: redis-server /usr/local/etc/redis/redis.conf
+  xreds:
+    container_name: xreds
+    build: .
+    volumes:
+      - "./datasets:/opt/xreds/datasets"
+    platform: linux/amd64
+    ports:
+      - "8090:8090"
+    depends_on:
+      - redis
+    environment:
+      - PORT=8090
+      - DATASETS_MAPPING_FILE=/opt/xreds/datasets/datasets.json
+      - EXPORT_THRESHOLD=600
+      - USE_REDIS_CACHE=true
+      - REDIS_HOST=redis
+      - REDIS_PORT=6380
diff --git a/docker-compose.yml b/docker-compose.yml
index 51ebd19..f40876c 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -1,28 +1,16 @@
 version: '3'
 
 services:
-  redis:
-    container_name: redis
-    image: redis:7-alpine
-    volumes:
-      - ./redis/redis.conf:/usr/local/etc/redis/redis.conf
-    restart: on-failure
-    ports:
-      - "6380:6380"
-    command: redis-server /usr/local/etc/redis/redis.conf
   xreds:
     container_name: xreds
-    build: .
+    build:
+        context: .
     volumes:
       - "./datasets:/opt/xreds/datasets"
     platform: linux/amd64
     ports:
       - "8090:8090"
-    depends_on:
-      - redis
     environment:
       - PORT=8090
-      - datasets_mapping_file=/opt/xreds/datasets/datasets.json
+      - DATASETS_MAPPING_FILE=/opt/xreds/datasets/datasets.json
       - EXPORT_THRESHOLD=600
-      - REDIS_HOST=redis
-      - REDIS_PORT=6380
diff --git a/nginx/nginx.conf b/nginx/nginx.conf
index f2434e8..2e178d1 100644
--- a/nginx/nginx.conf
+++ b/nginx/nginx.conf
@@ -14,7 +14,7 @@ http {
     inactive=24h  max_size=2g;
     server {
         location / {
-            proxy_pass             http://zms:8091;
+            proxy_pass             http://xms:8091;
             proxy_set_header       Host $host;
             proxy_buffering        on;
             proxy_cache            STATIC;
diff --git a/xreds/config.py b/xreds/config.py
index 7c86256..41cf7a2 100644
--- a/xreds/config.py
+++ b/xreds/config.py
@@ -1,8 +1,29 @@
 from pydantic_settings import BaseSettings
 
 
-class Settings(BaseSettings): 
-    datasets_mapping_file: str
+class Settings(BaseSettings):
+    '''Settings for running xreds'''
+    # fsspec compatible url path to the dataset mapping file
+    # in either json or yml format
+    datasets_mapping_file: str = ''
 
+    # Root path for the service to mount at
+    root_path: str = ''
 
-settings = Settings()
\ No newline at end of file
+    # Size threshold exporting datasets to local files
+    # in MB
+    export_threshold: int = 500
+
+    # Whether to use redis to cache datasets when possible
+    use_redis_cache: bool = False
+
+    # Optional redis host name
+    # If not provided, will default to localhost
+    redis_host: str = "localhost"
+
+    # Optional redis port number
+    # If not provided, will default to 6379
+    redis_port: int = 6379
+
+
+settings = Settings()
diff --git a/xreds/dataset_provider.py b/xreds/dataset_provider.py
index 16e773a..f47139b 100644
--- a/xreds/dataset_provider.py
+++ b/xreds/dataset_provider.py
@@ -1,7 +1,6 @@
 import datetime
 
 import fsspec
-from redis import Redis
 import xarray as xr
 import yaml
 from pluggy import PluginManager
@@ -13,7 +12,7 @@
 from xreds.extensions import VDatumTransformationExtension
 from xreds.logging import logger
 from xreds.utils import load_dataset
-from xreds.redis import pool as redis_pool
+from xreds.redis import get_redis_cache
 
 dataset_extension_manager = PluginManager(DATASET_EXTENSION_PLUGIN_NAMESPACE)
 dataset_extension_manager.register(VDatumTransformationExtension, name="vdatum")
@@ -45,7 +44,7 @@ def get_datasets(self):
     def get_dataset(self, dataset_id: str) -> xr.Dataset:
         cache_key = f"dataset-{dataset_id}"
 
-        redis_cache = Redis(connection_pool=redis_pool)
+        redis_cache = get_redis_cache()
 
         cached_ds = self.datasets.get(cache_key, None)
         if cached_ds:
@@ -77,6 +76,7 @@ def get_dataset(self, dataset_id: str) -> xr.Dataset:
                 logger.info(f"Applying extension {ext_name} to dataset {dataset_id}")
             ds = extension().transform_dataset(ds=ds, config=ext_config)
 
+        # TODO: For now this cache is disabled for testing with redis
         # self.datasets[cache_key] = {"dataset": ds, "date": datetime.datetime.now()}
 
         if cache_key in self.datasets:
diff --git a/xreds/redis.py b/xreds/redis.py
index 5745fa5..af2a9a0 100644
--- a/xreds/redis.py
+++ b/xreds/redis.py
@@ -1,11 +1,19 @@
 import os
+from typing import Optional
 
 import redis
 
+from xreds.config import Settings, settings
 from xreds.logging import logger
 
 
-def create_redis(redis_host: str, redis_port: int):
+def create_redis_pool(settings: Settings) -> Optional[redis.ConnectionPool]:
+    if not settings.use_redis_cache:
+        logger.warning("Not using redis cache")
+        return None
+
+    redis_host = settings.redis_host
+    redis_port = settings.redis_port
     logger.warning(f"Creating redis connection pool for {redis_host}:{redis_port}")
     return redis.ConnectionPool(
         host=redis_host,
@@ -13,4 +21,9 @@ def create_redis(redis_host: str, redis_port: int):
         db=0
     )
 
-pool = create_redis(os.getenv("REDIS_HOST", "localhost"), int(os.getenv("REDIS_PORT", "6379")))
+pool = create_redis_pool(settings=settings)
+
+def get_redis_cache() -> Optional[redis.Redis]:
+    if pool is None:
+        return None
+    return redis.Redis(connection_pool=pool)

From aef10eaef93a3c30d0dfed53151eae08519f4c57 Mon Sep 17 00:00:00 2001
From: "Matthew.Iannucci" <matthew.iannucci@tetratech.com>
Date: Thu, 23 May 2024 14:06:57 -0400
Subject: [PATCH 5/9] Update kubernetes and circleci build arguments

---
 .circleci/config.yml | 4 ++--
 deploy.yaml          | 2 ++
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/.circleci/config.yml b/.circleci/config.yml
index 50ef6fb..a5c1a0f 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -28,7 +28,7 @@ jobs:
             echo "export TAG=${TAG}" >> $BASH_ENV
 
             echo "Building for TAG ${TAG}"
-            docker build -t ${ECR_REPO}:${TAG} .
+            docker build --build-arg="ROOT_PATH=/xreds/" -t ${ECR_REPO}:${TAG} .
 
       - run:
           name: Install Grype
@@ -76,4 +76,4 @@ workflows:
           filters:
             branches:
               only:
-                - main
\ No newline at end of file
+                - main
diff --git a/deploy.yaml b/deploy.yaml
index 60d872a..47477de 100644
--- a/deploy.yaml
+++ b/deploy.yaml
@@ -22,6 +22,8 @@ spec:
         ports:
         - containerPort: 8090
         env:
+        - name: ROOT_PATH
+          value: "/xreds/"
         - name: EXPORT_THRESHOLD
           value: "600"
         - name: DATASETS_MAPPING_FILE

From 992bf8b2cce0dd6192aa935035fb31b74b0d5ec6 Mon Sep 17 00:00:00 2001
From: "Matthew.Iannucci" <matthew.iannucci@tetratech.com>
Date: Thu, 23 May 2024 14:59:32 -0400
Subject: [PATCH 6/9] Remove rps specific kubernetes yaml

---
 README.md   |  2 +-
 deploy.yaml | 46 ----------------------------------------------
 2 files changed, 1 insertion(+), 47 deletions(-)
 delete mode 100644 deploy.yaml

diff --git a/README.md b/README.md
index abd7cfd..48421f4 100644
--- a/README.md
+++ b/README.md
@@ -143,7 +143,7 @@ The following environment variables can be set to configure the app:
 - `REDIS_HOST`: [Optional] The host of the redis cache. Defaults to `localhost`
 - `REDIS_PORT`: [Optional] The port of the redis cache. Defaults to `6379`
 
-## Building and Deploying Docker Image
+## Building and Deploying Public Docker Image
 
 First follow instructions above to build the docker image tagged `xreds:latest`. Then the`xreds:latest` image needs to be tagged and deployed to the relevant docker registry.
 
diff --git a/deploy.yaml b/deploy.yaml
deleted file mode 100644
index 47477de..0000000
--- a/deploy.yaml
+++ /dev/null
@@ -1,46 +0,0 @@
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: xreds
-  labels:
-    app: xreds
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: xreds
-
-  template:
-    metadata:
-      labels:
-        app: xreds
-    spec:
-      containers:
-      - name: xreds
-        image: public.ecr.aws/m2c5k9c1/nextgen-dmac/xreds:latest
-        imagePullPolicy: "Always"
-        ports:
-        - containerPort: 8090
-        env:
-        - name: ROOT_PATH
-          value: "/xreds/"
-        - name: EXPORT_THRESHOLD
-          value: "600"
-        - name: DATASETS_MAPPING_FILE
-          value: "s3://nextgen-dmac/kerchunk/datasets.json"
----
-apiVersion: v1
-kind: Service
-metadata:
-  name: xreds-service
-  labels:
-    app: xreds
-spec:
-  selector:
-    app.kubernetes.io/name: xreds
-  ports:
-  - protocol: TCP
-    port: 8090
-    targetPort: 8090
-  selector:
-    app: xreds

From d32ddf8028641ba1d00d63c245c4dc14fb4f927e Mon Sep 17 00:00:00 2001
From: "Matthew.Iannucci" <matthew.iannucci@tetratech.com>
Date: Thu, 23 May 2024 15:02:43 -0400
Subject: [PATCH 7/9] Add redis cache to vdatum extension

---
 xreds/extensions/vdatum.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/xreds/extensions/vdatum.py b/xreds/extensions/vdatum.py
index d3321ed..3afe5ae 100644
--- a/xreds/extensions/vdatum.py
+++ b/xreds/extensions/vdatum.py
@@ -2,6 +2,7 @@
 
 from xreds.dataset_extension import DatasetExtension, hookimpl
 from xreds.logging import logger
+from xreds.redis import get_redis_cache
 from xreds.utils import load_dataset
 
 
@@ -62,13 +63,14 @@ def transform_dataset(self, ds: xr.Dataset, config: dict) -> xr.Dataset:
             )
             return ds
 
-        ds_vdatum = load_dataset({"path": vdatum_file})
+        redis_cache = get_redis_cache()
+        ds_vdatum = load_dataset({"path": vdatum_file}, redis_cache=redis_cache)
         if ds_vdatum is None:
             logger.warning(
                 f"Could not load vdatum dataset from {vdatum_file}. Skipping vdatum transformation"
             )
             return ds
-        
+
         target_zeta_var = config.get("water_level_var", "zeta")
         target_datum_var = config.get("vdatum_var", "igld85tolwd")
         target_datum_name = config.get("vdatum_name", "igld85")

From e6ee049116f34e80ae7043095a3f840e42df8a21 Mon Sep 17 00:00:00 2001
From: "Matthew.Iannucci" <matthew.iannucci@tetratech.com>
Date: Thu, 23 May 2024 15:04:46 -0400
Subject: [PATCH 8/9] vdatum safety enhancements

---
 xreds/extensions/vdatum.py | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/xreds/extensions/vdatum.py b/xreds/extensions/vdatum.py
index 3afe5ae..4f980df 100644
--- a/xreds/extensions/vdatum.py
+++ b/xreds/extensions/vdatum.py
@@ -72,8 +72,15 @@ def transform_dataset(self, ds: xr.Dataset, config: dict) -> xr.Dataset:
             return ds
 
         target_zeta_var = config.get("water_level_var", "zeta")
-        target_datum_var = config.get("vdatum_var", "igld85tolwd")
-        target_datum_name = config.get("vdatum_name", "igld85")
+        target_datum_var = config.get("vdatum_var", None)
+        target_datum_name = config.get("vdatum_name", None)
+
+        if target_datum_var is None or target_datum_name is None:
+            logger.warning(
+                f"Dataset {ds.attrs.get('name', 'unknown')} does not have a vdatum_var or vdatum_name attribute. Skipping vdatum transformation"
+            )
+            return ds
+
         out_datum_var = f"{target_zeta_var}_{target_datum_name}"
 
         ds_transformed = transform_datum(ds, ds_vdatum, target_zeta_var, target_datum_var, target_datum_name, out_datum_var)

From 8b9a68e489ea1793c83f76c03b623dabdbbc54fe Mon Sep 17 00:00:00 2001
From: Nicholas Delli Carpini <nick.dellicarpini@rpsgroup.com>
Date: Thu, 23 May 2024 16:35:18 -0400
Subject: [PATCH 9/9] add "report" button

---
 viewer/package.json      |   1 +
 viewer/src/pages/app.tsx | 103 ++++++++++++++++++++++++++++++++++++++-
 2 files changed, 103 insertions(+), 1 deletion(-)

diff --git a/viewer/package.json b/viewer/package.json
index 6c1fde6..013f0ab 100644
--- a/viewer/package.json
+++ b/viewer/package.json
@@ -11,6 +11,7 @@
     },
     "dependencies": {
         "@mapbox/mapbox-gl-draw": "^1.4.3",
+        "@mapbox/tilebelt": "1.0.2",
         "@tanstack/query-core": "^5.32.1",
         "@tanstack/react-query": "^5.32.1",
         "fast-xml-parser": "^4.3.2",
diff --git a/viewer/src/pages/app.tsx b/viewer/src/pages/app.tsx
index 3feb8b7..a96170e 100644
--- a/viewer/src/pages/app.tsx
+++ b/viewer/src/pages/app.tsx
@@ -1,4 +1,4 @@
-import { ImageSource, MapMouseEvent, Popup } from 'maplibre-gl';
+import { ImageSource, MapDataEvent, MapMouseEvent, Popup } from 'maplibre-gl';
 import { useEffect, useRef, useState } from 'react';
 import { bboxContainsPoint, createImageLayerParams } from '../tools';
 import Map from '../components/map';
@@ -12,8 +12,20 @@ import {
     useDatasetMinMaxQuery,
     useDatasetsQuery,
 } from '../query/datasets';
+import { tileToBBOX } from '@mapbox/tilebelt';
 import { Link } from 'react-router-dom';
 
+interface LoadingMetadata {
+    dataset: string, 
+    variable: string, 
+    bbox: number[], 
+    elevation: number | undefined,
+    startTime: number,
+    endTime: number,
+    elapsedTime: number,
+    requestTime: string,
+}
+
 const colormaps: Array<{ id: string; name: string }> = [
     { id: 'rainbow', name: 'Rainbow' },
     { id: 'jet', name: 'Jet' },
@@ -74,6 +86,77 @@ function App() {
     }>({});
     const [layerLoading, setLayerLoading] = useState(false);
 
+    const dataLoading = useRef<{ [k: string]: LoadingMetadata }>({});
+    const dataLoaded = useRef<LoadingMetadata[]>([]);
+
+    useEffect(() => {
+        if (!map.current) {
+            return;
+        }
+
+        map.current.on("dataloading", (e: any) => {
+            if (!e.sourceId?.startsWith("xreds") || !e.source?.tiles || e.source.tiles.length === 0) {
+                return;
+            }
+
+            const urlArr = e.source.tiles[0].split("/wms/?", 2)
+            const paramArr = urlArr[1].split("&");
+            const tiles = map.current!.style.sourceCaches[e.sourceId]?._tiles;
+            if (!tiles) {
+                return;
+            }
+
+            Object.keys(tiles).forEach((k) => {
+                const currID = `${e.sourceId}---${k}`;
+                if (tiles[k].state === "loading" && !dataLoading.current[currID]) {
+                    const time = paramArr.find((p: string) => p.startsWith("time="))?.replace("time=", "");
+                    const variable = paramArr.find((p: string) => p.startsWith("layers="))?.replace("layers=", "");
+                    const elevation = paramArr.find((p: string) => p.startsWith("elevation="))?.replace("elevation=", "");
+                    dataLoading.current[currID] = {
+                        dataset: urlArr[0].replace("/datasets/", ""),
+                        variable: variable,
+                        bbox: [], 
+                        elevation: elevation !== undefined ? parseFloat(elevation) : undefined,
+                        requestTime: time,
+                        startTime: (new Date()).getTime(),
+                        endTime: -1,
+                        elapsedTime: -1
+                    }
+                }
+            });
+        });
+
+        map.current.on("data", (e: any) => {
+            if (!e.sourceId?.startsWith("xreds") || !e.source?.tiles || e.source.tiles.length === 0) {
+                return;
+            }
+
+            const tiles = map.current!.style.sourceCaches[e.sourceId]?._tiles;
+            if (!tiles) {
+                return;
+            }
+
+            Object.keys(dataLoading.current).forEach((k) => {
+                if (tiles[k.split("---")[1]] === undefined) {
+                    delete dataLoading.current[k];
+                }
+            });
+
+            Object.keys(tiles).forEach((k) => {
+                const currID = `${e.sourceId}---${k}`;
+                if (tiles[k].state === "loaded" && dataLoading.current[currID]) {
+                    const coords = tiles[k].tileID.canonical;
+                    dataLoading.current[currID].bbox = tileToBBOX([coords.x, coords.y, coords.z]);
+                    dataLoading.current[currID].endTime = (new Date()).getTime();
+                    dataLoading.current[currID].elapsedTime = dataLoading.current[currID].endTime - dataLoading.current[currID].startTime;
+
+                    dataLoaded.current.push(dataLoading.current[currID]);
+                    delete dataLoading.current[currID]; 
+                }
+            });
+        });
+    }, [])
+
     useEffect(() => {
         const datasetsCollapsed = datasetIds.data?.reduce(
             (obj: { [k: string]: boolean }, id: string) => {
@@ -365,6 +448,24 @@ function App() {
                                         }
                                     />
                                 </div>
+                                <button
+                                    onClick={() => {
+                                        let outString = "Time (s),Dataset,Variable,Time,West,South,East,North,Elevation\n"
+                                        dataLoaded.current.forEach((d) => {
+                                            outString += `${d.elapsedTime / 1000},${d.dataset},${d.variable},${d.requestTime},${d.bbox.join(",")},${d.elevation ?? ""}\n`
+                                        })
+
+                                        const link = document.createElement('a');
+                                        link.href = 'data:text/plain;charset=utf-8,' + encodeURIComponent(outString);
+                                        link.download = `tile_report_${(new Date()).toISOString()}.csv`;
+
+                                        document.body.appendChild(link);
+                                        link.click();
+                                        link.remove();
+                                    }}
+                                >
+                                    Report
+                                </button>
                             </div>
                             {datasetIds.data?.map((d, i) => (
                                 <section key={d}>