diff --git a/chain/chain.py b/chain/chain.py index 8d7f8d1..cf9d9aa 100644 --- a/chain/chain.py +++ b/chain/chain.py @@ -5,6 +5,14 @@ import xclim +""" +This CLI demonstrates a workflow where commands can be called individually +or chained together in memory. That is, each individual command accepts and +returns an xarray.Dataset. The `click.group` is responsible for readingin the +input data from disk, and writing the output data to disk. +""" + + @click.group(chain=True, help="Chained CLI", invoke_without_command=True) @click.argument("input", type=click.File("r")) @click.argument("output") @@ -13,15 +21,18 @@ def cli(input, output): @cli.result_callback() def process_pipeline(processors, input, output): - click.echo(f"INPUT: {input.name}") - click.echo(f"OUTPUT: {output}") + """Read the input, execute commands in memory, write output to disk.""" + # Read the input data into an xarray.Dataset ds = xr.open_dataset(input.name, engine="h5netcdf") + # Execute individual commands - not obvious here, but options are passed for processor in processors: ds = processor(ds) + # Write output to disk ds.to_netcdf(output, engine="h5netcdf") + @cli.command @click.option("-p", "--poly", help="Path to the polygon shapefile.") @click.option("-s", @@ -34,7 +45,6 @@ def subset(**kwargs): """Subset on polygon""" def processor(ds): gdf = gpd.GeoDataFrame.from_file(kwargs["poly"]) - # buffer = kwargs["buffer"] return clisops.core.subset_shape(ds=ds, shape=gdf, start_date=kwargs["start"], diff --git a/chain/test.py b/chain/test.py index feb1a95..b9324c3 100644 --- a/chain/test.py +++ b/chain/test.py @@ -7,24 +7,27 @@ from chain import cli -@pytest.fixture +@pytest.fixture(scope="module") def tas_series(): """Return mean temperature time series.""" _tas_series = partial(tt, variable="tas") return _tas_series -def test_chain(tas_series, tmp_path): +@pytest.fixture(scope="module") +def input_file(tas_series, tmpdir_factory): # Create input file + input_file = str(tmpdir_factory.mktemp("input").join("in.nc")) tas = tas_series(np.ones(366) + 271.15, start="1/1/2000") tas = tas.expand_dims(dim={"lon": np.linspace(-80, -70, 10), "lat": np.linspace(40, 50, 10)},) tas.lon.attrs["standard_name"] = "longitude" tas.lat.attrs["standard_name"] = "latitude" ds = xr.Dataset(data_vars={"tas": tas}) - - input_file = tmp_path / "in.nc" ds.to_netcdf(input_file, engine="h5netcdf") + return input_file + +def test_chain(input_file, tmp_path): output_file = tmp_path / "out.nc" args = [str(input_file), str(output_file), @@ -38,9 +41,52 @@ def test_chain(tas_series, tmp_path): assert results.exit_code == 0 if output_file.exists(): - print(output_file) out = xr.open_dataset(str(output_file), engine="h5netcdf") outvar = list(out.data_vars.values())[0] np.testing.assert_allclose(outvar[0], 6588.0) else: - raise FileNotFoundError \ No newline at end of file + raise FileNotFoundError + + +def test_hdd(input_file, tmp_path): + output_file = tmp_path / "out.nc" + args = [str(input_file), + str(output_file), + "hdd", + "--thresh", "17 degC"] + runner = CliRunner() + results = runner.invoke(cli, args) + + assert results.exit_code == 0 + + if output_file.exists(): + out = xr.open_dataset(str(output_file), engine="h5netcdf") + outvar = list(out.data_vars.values())[0] + np.testing.assert_allclose(outvar[0], 6588.0) + else: + raise FileNotFoundError + + +def test_subset(input_file, tmp_path): + output_file = tmp_path / "out.nc" + args = [str(input_file), + str(output_file), + "subset", + "-p", "small_geojson.json", + "-s", "2000-06", + "-e", "2000-08",] + runner = CliRunner() + results = runner.invoke(cli, args) + + assert results.exit_code == 0 + + if output_file.exists(): + out = xr.open_dataset(str(output_file), engine="h5netcdf") + assert "tas" in out.data_vars + assert out.time.isel(time=0) == np.datetime64("2000-06-01T00:00:00") + assert out.time.isel(time=-1) == np.datetime64("2000-08-31T00:00:00") + assert len(out.tas.lon) == 1 + assert len(out.tas.lat) == 1 + else: + raise FileNotFoundError + \ No newline at end of file