Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

atl24 cleanup for latest tools #434

Merged
merged 23 commits into from
Oct 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
3029136
atl24 cleanup for latest tools
elidwa Oct 2, 2024
2ca5974
created perf tests for different datasets
elidwa Oct 5, 2024
3e89a75
fixed debug statement
elidwa Oct 5, 2024
e1a1452
Fixed bug in poi time in arrow sampler, bug fixes
elidwa Oct 5, 2024
22dc8db
disabled debug stuff
elidwa Oct 7, 2024
551d9e7
using GEOS Rtree for features
elidwa Oct 7, 2024
1a55f6f
Implemented GeoRtree class
elidwa Oct 8, 2024
680a5fb
Rtree completly replaced vector of features, sorting added
elidwa Oct 8, 2024
0922103
fixed comment
elidwa Oct 8, 2024
39ca549
implemented sort_by_index geoparm, updated all lua tests
elidwa Oct 9, 2024
4980d43
removed index file bbox check
elidwa Oct 9, 2024
78b2756
reverting a few of the changes associated with the static analysis cl…
jpswinski Oct 9, 2024
04e306f
added tolerance for depth to gebco test, projlib issue
elidwa Oct 11, 2024
1200da1
aws per path region bug fix, global canopy on us-east-1
elidwa Oct 11, 2024
42cf2f8
fixed debug leftovers
elidwa Oct 11, 2024
d723f98
collecting samples uses all cpus, removed union code
elidwa Oct 15, 2024
46479ab
fixed broken cmr search, used cmr-search-after since scrolling no lon…
elidwa Oct 16, 2024
02d2349
fixed cmr_stac_search, uses 'next' in links, added test for it
elidwa Oct 16, 2024
f27dc6e
fixed bug in batch landsat code
elidwa Oct 16, 2024
81d0aa2
enabled info statement
elidwa Oct 16, 2024
fca672a
fixed coredump in landsat
elidwa Oct 17, 2024
403e92e
fixed gps time bugs in 3dep, one deadlock removed, unittests added
elidwa Oct 18, 2024
ca68256
optimized finding unique rasters
elidwa Oct 22, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 43 additions & 28 deletions clients/python/sliderule/earthdata.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,29 +234,30 @@ def __cmr_collection_query(provider, short_name):
return search_results['feed']['entry']

def __cmr_query(provider, short_name, version, time_start, time_end, **kwargs):
"""Perform a scrolling CMR query for files matching input criteria."""
"""Perform a search-after CMR query for files matching input criteria."""
kwargs.setdefault('polygon',None)
kwargs.setdefault('name_filter',None)
kwargs.setdefault('return_metadata',False)
# build params
params = '&short_name={0}'.format(short_name)
if version != None:
params += '&version={0}'.format(version)
if time_start != None and time_end != None:
if time_start is not None and time_end is not None:
params += '&temporal[]={0},{1}'.format(time_start, time_end)
if kwargs['polygon']:
params += '&polygon={0}'.format(kwargs['polygon'])
if kwargs['name_filter']:
params += '&options[producer_granule_id][pattern]=true'
params += '&producer_granule_id[]=' + kwargs['name_filter']

CMR_URL = 'https://cmr.earthdata.nasa.gov'
cmr_query_url = ('{0}/search/granules.json?provider={1}'
'&sort_key[]=start_date&sort_key[]=producer_granule_id'
'&scroll=true&page_size={2}'.format(CMR_URL, provider, CMR_PAGE_SIZE))
'&page_size={2}'.format(CMR_URL, provider, CMR_PAGE_SIZE))
cmr_query_url += params
logger.debug('cmr request={0}\n'.format(cmr_query_url))
logger.debug(f'Initial CMR request: {cmr_query_url}')

cmr_scroll_id = None
cmr_search_after = None
ctx = ssl.create_default_context()
ctx.check_hostname = False
ctx.verify_mode = ssl.CERT_NONE
Expand All @@ -266,15 +267,18 @@ def __cmr_query(provider, short_name, version, time_start, time_end, **kwargs):
metadata = sliderule.emptyframe()
while True:
req = urllib.request.Request(cmr_query_url)
if cmr_scroll_id:
req.add_header('cmr-scroll-id', cmr_scroll_id)
if cmr_search_after:
req.add_header('CMR-Search-After', cmr_search_after)
logger.debug(f'Requesting next page with CMR-Search-After: {cmr_search_after}')

response = urllib.request.urlopen(req, context=ctx)
if not cmr_scroll_id:
# Python 2 and 3 have different case for the http headers
headers = {k.lower(): v for k, v in dict(response.info()).items()}
cmr_scroll_id = headers['cmr-scroll-id']

headers = {k.lower(): v for k, v in dict(response.info()).items()}
cmr_search_after = headers.get('cmr-search-after')

search_page = response.read()
search_page = json.loads(search_page.decode('utf-8'))

url_scroll_results = __cmr_filter_urls(search_page, DATASETS[short_name]["formats"])
if not url_scroll_results:
break
Expand All @@ -284,10 +288,22 @@ def __cmr_query(provider, short_name, version, time_start, time_end, **kwargs):
metadata_results = __cmr_granule_metadata(search_page)
else:
metadata_results = geopandas.pd.DataFrame([None for _ in url_scroll_results])

# append granule metadata
metadata = geopandas.pd.concat([metadata, metadata_results])

return (urls,metadata)
# Two ways to determine that there is no more data available:
# 1. The number of granules in the current response is less than the requested 'page_size':
# 2. The absence of the 'CMR-Search-After' header
result_count = len(search_page['feed']['entry'])
if result_count < CMR_PAGE_SIZE:
logger.debug(f'Received {result_count} results, fewer than page size. Ending pagination after processing.')
break
if not cmr_search_after:
logger.debug('No CMR-Search-After header found, no more pages.')
break

return urls, metadata

###############################################################################
# CMR UTILITIES
Expand Down Expand Up @@ -389,7 +405,12 @@ def __cmr_max_version(provider, short_name):
#
def __build_geojson(rsps):
geojson = rsps.json()
del geojson["links"]
next = None
if "links" in geojson:
for link in geojson["links"]:
if link["rel"] == "next":
next = link["href"]
del geojson["links"]
if 'numberMatched' in geojson:
del geojson['numberMatched']
if 'numberReturned' in geojson:
Expand All @@ -410,7 +431,7 @@ def __build_geojson(rsps):
if "href" in assetsDict[val]:
propertiesDict[val] = assetsDict[val]["href"]
del geojson["features"][i]["assets"]
return geojson
return geojson, next

#
# Perform a STAC Query
Expand Down Expand Up @@ -450,22 +471,16 @@ def __stac_search(provider, short_name, collections, polygons, time_start, time_
# make initial stac request
data = context.post(url, data=json.dumps(rqst), headers=headers)
data.raise_for_status()
geojson = __build_geojson(data)

# iterate through additional pages if not all returned
num_returned = geojson["context"]["returned"]
num_matched = geojson["context"]["matched"]
if num_matched > max_requested_resources:
logger.warn("Number of matched resources truncated from {} to {}".format(num_matched, max_requested_resources))
num_matched = max_requested_resources
num_pages = int((num_matched + (num_returned - 1)) / num_returned)
for page in range(2, num_pages+1):
rqst["page"] = page
data = context.post(url, data=json.dumps(rqst), headers=headers)
geojson, next_link = __build_geojson(data)

# Continue fetching pages if 'next' link is available
while next_link:
data = context.get(next_link, headers=headers)
data.raise_for_status()
_geojson = __build_geojson(data)
_geojson, next_link = __build_geojson(data)
geojson["features"] += _geojson["features"]
geojson["context"]["returned"] = num_matched

geojson["context"]["returned"] = len(geojson["features"])
geojson["context"]["limit"] = max_requested_resources

# return geojson dictionary
Expand Down
6 changes: 4 additions & 2 deletions clients/python/tests/test_gedi.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,8 @@ def test_gedi(self, init):
gdf = icesat2.atl06p(parms, resources=[resource])
assert init
assert gdf.describe()["gedi.time"]["std"] == 0.0
assert abs(gdf.describe()["gedi.value"]["mean"] - 3143.5934365441703) < 0.001
# assert abs(gdf.describe()["gedi.value"]["mean"] - 3143.5934365441703) < 0.001
assert abs(gdf.describe()["gedi.value"]["mean"] - 3142.8683679064293) < 0.001
assert gdf.describe()["gedi.file_id"]["max"] == 0.0
assert gdf.describe()["gedi.flags"]["max"] == 0.0

Expand Down Expand Up @@ -136,4 +137,5 @@ def test_gedi(self, init):
assert key in gdf.keys()
assert abs(gdf.describe()["canopy_openness"]["max"] - 10.390829086303711) < 0.001
df = gdf[gdf["gedi.value"] > -9999.0]
assert abs(sum(df["gedi.value"]) - 42767.289459228516) < 0.001
# assert abs(sum(df["gedi.value"]) - 4168.20367060658032) < 0.001
assert abs(sum(df["gedi.value"]) - 42555.52866346482) < 0.001
12 changes: 12 additions & 0 deletions clients/python/tests/test_landsat.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,18 @@ def test_samples(self, init):
assert init
assert len(rsps) > 0

def test_cmr_stac(self, init):
time_start = "2000-01-01T00:00:00Z"
time_end = "2022-02-01T23:59:59Z"
polygon = [ {"lon": -177.0000000001, "lat": 51.0000000001},
{"lon": -179.0000000001, "lat": 51.0000000001},
{"lon": -179.0000000001, "lat": 49.0000000001},
{"lon": -177.0000000001, "lat": 49.0000000001},
{"lon": -177.0000000001, "lat": 51.0000000001} ]
catalog = earthdata.stac(short_name="HLS", polygon=polygon, time_start=time_start, time_end=time_end, as_str=True)
assert len(catalog) >= 6359


def test_subset1(self, init):
time_start = "2021-01-01T00:00:00Z"
time_end = "2021-02-01T23:59:59Z"
Expand Down
14 changes: 7 additions & 7 deletions datasets/bathy/package/BathyDataFrame.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ int BathyDataFrame::luaCreate (lua_State* L)
*----------------------------------------------------------------------------*/
BathyDataFrame::BathyDataFrame (lua_State* L, const char* beam_str, BathyFields* _parms, H5Object* _hdf03, H5Object* _hdf09, const char* rqstq_name, BathyMask* _mask):
GeoDataFrame(L, LUA_META_NAME, LUA_META_TABLE,
{
{
{"time_ns", &time_ns},
{"index_ph", &index_ph},
{"index_seg", &index_seg},
Expand All @@ -124,7 +124,7 @@ BathyDataFrame::BathyDataFrame (lua_State* L, const char* beam_str, BathyFields*
{"class_ph", &class_ph},
{"predictions", &predictions},
{"geoid_corr_h", &geoid_corr_h},
},
},
{
{"spot", &spot},
{"beam", &beam},
Expand Down Expand Up @@ -190,7 +190,7 @@ BathyDataFrame::~BathyDataFrame (void)
{
active = false;
delete pid;

delete rqstQ;

hdf03->releaseLuaObject();
Expand Down Expand Up @@ -530,7 +530,7 @@ void* BathyDataFrame::subsettingThread (void* parm)
GeoLib::UTMTransform utm_transform(region.segment_lat[0], region.segment_lon[0]);
dataframe.utm_zone = utm_transform.zone;
dataframe.utm_is_north = region.segment_lat[0] >= 0.0;

/* Traverse All Photons In Dataset */
while(dataframe.active && (current_photon < atl03.dist_ph_along.size))
{
Expand Down Expand Up @@ -670,7 +670,7 @@ void* BathyDataFrame::subsettingThread (void* parm)
dataframe.yapc_score.append(yapc_score);
dataframe.max_signal_conf.append(atl03_cnf);
dataframe.quality_ph.append(quality_ph);
dataframe.processing_flags.append(on_boundary ? BathyFields::ON_BOUNDARY : 0x00);
dataframe.processing_flags.append(on_boundary ? BathyFields::ON_BOUNDARY : BathyFields::FLAGS_CLEAR);

/* Add Additional Photon Data to DataFrame */
dataframe.geoid_corr_h.append(atl03.h_ph[current_photon] - atl03.geoid[current_segment]);
Expand Down Expand Up @@ -762,7 +762,7 @@ int BathyDataFrame::luaIsValid (lua_State* L)
bool status = false;
try
{
BathyDataFrame* lua_obj = dynamic_cast<BathyDataFrame*>(getLuaSelf(L, 1));
const BathyDataFrame* lua_obj = dynamic_cast<BathyDataFrame*>(getLuaSelf(L, 1));
status = lua_obj->valid;
}
catch(const RunTimeException& e)
Expand All @@ -780,7 +780,7 @@ int BathyDataFrame::luaLength (lua_State* L)
{
try
{
BathyDataFrame* lua_obj = dynamic_cast<BathyDataFrame*>(getLuaSelf(L, 1));
const BathyDataFrame* lua_obj = dynamic_cast<BathyDataFrame*>(getLuaSelf(L, 1));
lua_pushinteger(L, lua_obj->length());
}
catch(const RunTimeException& e)
Expand Down
Loading
Loading