-
Notifications
You must be signed in to change notification settings - Fork 25
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Possible approach for stacking items #75
base: main
Are you sure you want to change the base?
Changes from 2 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -11,7 +11,7 @@ | |
# STAC catalog asset 'type' determines intake driver: | ||
# https://github.com/radiantearth/stac-spec/blob/master/item-spec/item-spec.md#media-types | ||
default_type = 'application/rasterio' | ||
default_driver = 'rasterio' | ||
default_driver = 'rioxarray' | ||
|
||
drivers = { | ||
'application/netcdf': 'netcdf', | ||
|
@@ -20,13 +20,13 @@ | |
'application/x-parquet': 'parquet', | ||
'application/x-hdf': 'netcdf', | ||
'application/x-hdf5': 'netcdf', | ||
'application/rasterio': 'rasterio', | ||
'image/vnd.stac.geotiff': 'rasterio', | ||
'image/vnd.stac.geotiff; cloud-optimized=true': 'rasterio', | ||
'image/x.geotiff': 'rasterio', | ||
'image/tiff; application=geotiff': 'rasterio', | ||
'image/tiff; application=geotiff; profile=cloud-optimized': 'rasterio', # noqa: E501 | ||
'image/jp2': 'rasterio', | ||
'application/rasterio': 'rioxarray', | ||
'image/vnd.stac.geotiff': 'rioxarray', | ||
'image/vnd.stac.geotiff; cloud-optimized=true': 'rioxarray', | ||
'image/x.geotiff': 'rioxarray', | ||
'image/tiff; application=geotiff': 'rioxarray', | ||
'image/tiff; application=geotiff; profile=cloud-optimized': 'rioxarray', # noqa: E501 | ||
'image/jp2': 'rioxarray', | ||
'image/png': 'xarray_image', | ||
'image/jpg': 'xarray_image', | ||
'image/jpeg': 'xarray_image', | ||
|
@@ -101,13 +101,78 @@ def serialize(self): | |
""" | ||
return self.yaml() | ||
|
||
def stack_items( | ||
self, items, assets, path_as_pattern=None, concat_dim='band', override_coords=None | ||
): | ||
""" | ||
Experimental. Create an xarray.DataArray from a bunch of STAC Items | ||
|
||
Parameters | ||
---------- | ||
items: list of STAC item id strings | ||
assets : list of strings representing the different assets | ||
(assset key or eo:bands "common_name" e.g. ['B4', B5'], ['red', 'nir']) | ||
path_as_pattern : pattern string to extract coordinates from asset href | ||
concat_dim : name of concatenation dimension for xarray.DataArray | ||
override_coords : list of custom coordinate names | ||
|
||
Returns | ||
------- | ||
CombinedAssets instance with mapping of asset names to xarray coordinates | ||
|
||
Examples | ||
------- | ||
source = cat.stack_items(['S2A_36MYB_20200814_0_L2A','S2A_36MYB_20200811_0_L2A'], | ||
['red', 'nir']) | ||
da = stack().to_dask() | ||
""" | ||
common2band = self[items[0]]._get_band_name_mapping() | ||
metadatas = {'items': {}} | ||
hrefs = [] | ||
for item in items: | ||
metadatas['items'][item] = {'STAC': self[item].metadata, 'assets': {}} | ||
stac_assets = self[item]._stac_obj.assets | ||
for key in assets: | ||
|
||
if key in stac_assets: | ||
asset = stac_assets.get(key) | ||
elif key in common2band: | ||
asset = stac_assets.get(common2band[key]) | ||
else: | ||
raise ValueError( | ||
f'Asset "{key}" not found in asset keys {list(common2band.values())}' | ||
f' or eo:bands common_names {list(common2band.keys())}' | ||
) | ||
|
||
asset_metadata = asset.properties | ||
asset_metadata['key'] = key | ||
metadatas['items'][item]['assets'][asset.href] = asset_metadata | ||
hrefs.append(asset.href) | ||
|
||
configDict = {} | ||
configDict['name'] = 'item_stack' | ||
configDict['description'] = 'stack of assets from multiple items' | ||
configDict['args'] = dict( | ||
chunks={}, | ||
concat_dim=concat_dim, | ||
path_as_pattern=path_as_pattern, | ||
urlpath=hrefs, | ||
override_coords=override_coords, | ||
) | ||
configDict['metadata'] = metadatas | ||
|
||
stack = CombinedAssets(configDict) | ||
|
||
return stack | ||
|
||
|
||
class StacCatalog(AbstractStacCatalog): | ||
""" | ||
Maps Intake Catalog to a STAC Catalog | ||
https://pystac.readthedocs.io/en/latest/api.html?#catalog-spec | ||
""" | ||
|
||
# NOTE: name must match driver in setup.py entrypoints | ||
name = 'stac_catalog' | ||
_stac_cls = pystac.Catalog | ||
|
||
|
@@ -172,7 +237,7 @@ class StacItemCollection(AbstractStacCatalog): | |
https://pystac.readthedocs.io/en/latest/api.html?#single-file-stac-extension | ||
""" | ||
|
||
name = 'stac_itemcollection' | ||
name = 'stac_item_collection' | ||
_stac_cls = pystac.Catalog | ||
|
||
def _load(self): | ||
|
@@ -241,111 +306,106 @@ def _get_metadata(self, **kwargs): | |
metadata.update(kwargs) | ||
return metadata | ||
|
||
def _get_band_info(self): | ||
def _get_band_name_mapping(self): | ||
""" | ||
Return list of band info dictionaries (name, common_name, etc.)... | ||
Return dictionary mapping common name to asset name | ||
eo:bands extension has [{'name': 'B01', 'common_name': 'coastal'] | ||
return {'coastal':'B01'} | ||
""" | ||
band_info = [] | ||
try: | ||
# NOTE: ensure we test these scenarios | ||
# FileNotFoundError: [Errno 2] No such file or directory: '/catalog.json' | ||
# NOTE: maybe return entire dataframe w/ central_wavelength etc? | ||
common2band = {} | ||
# 1. try to get directly from item metadata | ||
if 'eo' in self._stac_obj.stac_extensions: | ||
eo = self._stac_obj.ext['eo'] | ||
for band in eo.bands: | ||
common2band[band.common_name] = band.name | ||
|
||
# 2. go a level up to collection metadata | ||
if common2band == {}: | ||
collection = self._stac_obj.get_collection() | ||
if 'item-assets' in collection.stac_extensions: | ||
for val in collection.ext['item_assets']: | ||
if 'eo:bands' in val: | ||
band_info.append(val.get('eo:bands')[0]) | ||
else: | ||
band_info = collection.summaries['eo:bands'] | ||
# Can simplify after item-assets extension implemented in Pystac | ||
# https://github.com/stac-utils/pystac/issues/132 | ||
for asset, meta in collection.extra_fields['item_assets'].items(): | ||
eo = meta.get('eo:bands') | ||
if eo: | ||
for entry in eo: | ||
common_name = entry.get('common_name') | ||
if common_name: | ||
common2band[common_name] = asset | ||
|
||
except Exception: | ||
for band in self._stac_obj.ext['eo'].get_bands(): | ||
band_info.append(band.to_dict()) | ||
finally: | ||
if not band_info: | ||
raise ValueError( | ||
'Unable to parse "eo:bands" information from STAC Collection or Item Assets' | ||
) | ||
return band_info | ||
return common2band | ||
|
||
def stack_bands(self, bands, path_as_pattern=None, concat_dim='band'): | ||
def stack_assets(self, assets, path_as_pattern=None, concat_dim='band', override_coords=None): | ||
Comment on lines
-270
to
+338
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. renamed again to be less EO-specific and more in line with the STAC assets. i don't think too many people are using this library yet, so i'm not concerned with disruptive changes. |
||
""" | ||
Stack the listed bands over the ``band`` dimension. | ||
Stack the listed assets over the ``band`` dimension. | ||
|
||
This method only works for STAC Items using the 'eo' Extension | ||
https://github.com/radiantearth/stac-spec/tree/master/extensions/eo | ||
|
||
NOTE: This method is not aware of geotransform information. It *assumes* | ||
bands for a given STAC Item have the same coordinate reference system (CRS). | ||
WARNING: This method is not aware of geotransform information. It *assumes* | ||
assets for a given STAC Item have the same coordinate reference system (CRS). | ||
This is usually the case for a given multi-band satellite acquisition. | ||
Coordinate alignment is performed automatically upon calling the | ||
`to_dask()` method to load into an Xarray DataArray if bands have diffent | ||
ground sample distance (gsd) or array shapes. | ||
Comment on lines
-280
to
-282
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. i learned the hard way that this isn't exactly true. it works for landsat bands that were aligned based on GSD, but here are two with the same CRS and transform, different GSD that xarray fails to do what we expect... i'll open an issue to illustrate this (https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/8/S2A_36MYB_20200814_0_L2A/B04.tif, https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/8/S2A_36MYB_20200814_0_L2A/B05.tif) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So it might be best to raise a warning if CRS, Affine, and GSD do not match for requested stacked assets There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hi @scottyhq, There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @vitusbenson i'd recommend explicitly interpolating, or using a gdal vrt url4 = 'https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/8/S2A_36MYB_20200814_0_L2A/B04.tif'
url5 = 'https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/8/S2A_36MYB_20200814_0_L2A/B05.tif'
da4 = rioxarray.open_rasterio(url4, chunks='auto').assign_coords(band=['B4']) #gsd=10
da5 = rioxarray.open_rasterio(url5, chunks='auto').assign_coords(band=['B5']) #gsd=20
da5_interp = da5.interp(y=da4['y'], x=da4['x'], method='nearest')
da = xr.concat([da5_interp, da4], dim='band') #gsd=10 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks - I'll have to give that a shot to compare. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thank you! I wasn't aware of rioxarray's interpolation capabilities - really useful! |
||
|
||
See the following documentation for dealing with different CRS or GSD: | ||
http://xarray.pydata.org/en/stable/interpolation.html | ||
https://corteva.github.io/rioxarray/stable/examples/reproject_match.html | ||
|
||
Parameters | ||
---------- | ||
bands : list of strings representing the different bands | ||
(e.g. ['B4', B5'], ['red', 'nir']). | ||
assets : list of strings representing the different assets | ||
(assset key or eo:bands "common_name" e.g. ['B4', B5'], ['red', 'nir']) | ||
path_as_pattern : pattern string to extract coordinates from asset href | ||
concat_dim : name of concatenation dimension for xarray.DataArray | ||
override_coords : list of custom coordinate names | ||
|
||
Returns | ||
------- | ||
StacAsset with mapping of Asset names to Xarray bands | ||
CombinedAssets instance with mapping of asset names to xarray coordinates | ||
|
||
Examples | ||
------- | ||
stack = item.stack_bands(['nir','red']) | ||
da = stack(chunks=dict(band=1, x=2048, y=2048)).to_dask() | ||
stack = item.stack_assets(['nir','red']) | ||
da = stack(chunks=True).to_dask() | ||
|
||
stack = item.stack_bands(['B4','B5'], path_as_pattern='{band}.TIF') | ||
stack = item.stack_assets(['B4','B5'], path_as_pattern='{band}.TIF') | ||
da = stack(chunks=dict(band=1, x=2048, y=2048)).to_dask() | ||
""" | ||
if 'eo' not in self._stac_obj.stac_extensions: | ||
raise ValueError('STAC Item must implement "eo" extension to use this method') | ||
|
||
band_info = self._get_band_info() | ||
configDict = {} | ||
metadatas = {} | ||
titles = [] | ||
metadatas = {'items': {self.name: {'STAC': self.metadata, 'assets': {}}}} | ||
hrefs = [] | ||
types = [] | ||
assets = self._stac_obj.assets | ||
for band in bands: | ||
# band can be band id, name or common_name | ||
if band in assets: | ||
info = next((b for b in band_info if b.get('id', b.get('name')) == band), None,) | ||
common2band = self._get_band_name_mapping() | ||
stac_assets = self._stac_obj.assets | ||
for key in assets: | ||
if key in stac_assets: | ||
asset = stac_assets.get(key) | ||
elif key in common2band: | ||
asset = stac_assets.get(common2band[key]) | ||
else: | ||
info = next((b for b in band_info if b.get('common_name') == band), None) | ||
if info is not None: | ||
band = info.get('id', info.get('name')) | ||
|
||
if band not in assets or info is None: | ||
valid_band_names = [] | ||
for b in band_info: | ||
valid_band_names.append(b.get('id', b.get('name'))) | ||
valid_band_names.append(b.get('common_name')) | ||
raise ValueError( | ||
f'{band} not found in list of eo:bands in collection.' | ||
f'Valid values: {sorted(list(set(valid_band_names)))}' | ||
f'Asset "{key}" not found in asset keys {list(common2band.values())}' | ||
f' or eo:bands common_names {list(common2band.keys())}' | ||
) | ||
asset = assets.get(band) | ||
metadatas[band] = asset.to_dict() | ||
titles.append(band) | ||
types.append(asset.media_type) | ||
hrefs.append(asset.href) | ||
|
||
unique_types = set(types) | ||
if len(unique_types) != 1: | ||
raise ValueError( | ||
f'Stacking failed: bands must have type, multiple found: {unique_types}' | ||
) | ||
# map *HREF* to metadata to do fancy things when opening it | ||
asset_metadata = asset.properties | ||
asset_metadata['key'] = key | ||
asset_metadata['item'] = self.name | ||
metadatas['items'][self.name]['assets'][asset.href] = asset_metadata | ||
hrefs.append(asset.href) | ||
|
||
configDict['name'] = '_'.join(bands) | ||
configDict['description'] = ', '.join(titles) | ||
configDict['name'] = self.name | ||
configDict['description'] = ', '.join(assets) | ||
# NOTE: these are args for driver __init__ method | ||
configDict['args'] = dict( | ||
chunks={}, concat_dim=concat_dim, path_as_pattern=path_as_pattern, urlpath=hrefs | ||
chunks={}, | ||
concat_dim=concat_dim, | ||
path_as_pattern=path_as_pattern, | ||
urlpath=hrefs, | ||
override_coords=override_coords, | ||
) | ||
configDict['metadata'] = metadatas | ||
|
||
return CombinedAssets(configDict) | ||
# instantiate to allow item.stack_assets(['red','nir']).to_dask() ? | ||
stack = CombinedAssets(configDict) | ||
|
||
return stack | ||
|
||
|
||
class StacAsset(LocalCatalogEntry): | ||
|
@@ -443,7 +503,7 @@ def _get_driver(self, asset): | |
) | ||
entry_type = asset.media_type | ||
|
||
# if mimetype not registered try rasterio driver | ||
# if mimetype not registered try rioxarray driver | ||
driver = drivers.get(entry_type, default_driver) | ||
|
||
return driver | ||
|
@@ -453,7 +513,7 @@ def _get_args(self, asset, driver): | |
Optional keyword arguments to pass to intake driver | ||
""" | ||
args = {'urlpath': asset.href} | ||
if driver in ['netcdf', 'rasterio', 'xarray_image']: | ||
if driver in ['netcdf', 'rasterio', 'rioxarray', 'xarray_image']: | ||
# NOTE: force using dask? | ||
args.update(chunks={}) | ||
|
||
|
@@ -465,14 +525,16 @@ class CombinedAssets(LocalCatalogEntry): | |
Maps multiple STAC Item Assets to 1 Intake Catalog Entry | ||
""" | ||
|
||
_stac_cls = None | ||
|
||
def __init__(self, configDict): | ||
""" | ||
configDict = intake Entry dictionary from stack_bands() method | ||
configDict = intake Entry intialization dictionary | ||
""" | ||
super().__init__( | ||
name=configDict['name'], | ||
description=configDict['description'], | ||
driver='rasterio', # stack_bands only relevant to rasterio driver? | ||
driver='rioxarray', | ||
direct_access=True, | ||
args=configDict['args'], | ||
metadata=configDict['metadata'], | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
the way i ended up structuring this it ended up being basically a loop around
StacItem.stack_bands()
so could probably just call that method instead of a bunch of duplicate code?