Skip to content

Commit d8c1767

Browse files
committed
latest atl24 cleanup merged
2 parents e06dd34 + ca68256 commit d8c1767

File tree

19 files changed

+543
-424
lines changed

19 files changed

+543
-424
lines changed

clients/python/sliderule/earthdata.py

Lines changed: 43 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -234,29 +234,30 @@ def __cmr_collection_query(provider, short_name):
234234
return search_results['feed']['entry']
235235

236236
def __cmr_query(provider, short_name, version, time_start, time_end, **kwargs):
237-
"""Perform a scrolling CMR query for files matching input criteria."""
237+
"""Perform a search-after CMR query for files matching input criteria."""
238238
kwargs.setdefault('polygon',None)
239239
kwargs.setdefault('name_filter',None)
240240
kwargs.setdefault('return_metadata',False)
241241
# build params
242242
params = '&short_name={0}'.format(short_name)
243243
if version != None:
244244
params += '&version={0}'.format(version)
245-
if time_start != None and time_end != None:
245+
if time_start is not None and time_end is not None:
246246
params += '&temporal[]={0},{1}'.format(time_start, time_end)
247247
if kwargs['polygon']:
248248
params += '&polygon={0}'.format(kwargs['polygon'])
249249
if kwargs['name_filter']:
250250
params += '&options[producer_granule_id][pattern]=true'
251251
params += '&producer_granule_id[]=' + kwargs['name_filter']
252+
252253
CMR_URL = 'https://cmr.earthdata.nasa.gov'
253254
cmr_query_url = ('{0}/search/granules.json?provider={1}'
254255
'&sort_key[]=start_date&sort_key[]=producer_granule_id'
255-
'&scroll=true&page_size={2}'.format(CMR_URL, provider, CMR_PAGE_SIZE))
256+
'&page_size={2}'.format(CMR_URL, provider, CMR_PAGE_SIZE))
256257
cmr_query_url += params
257-
logger.debug('cmr request={0}\n'.format(cmr_query_url))
258+
logger.debug(f'Initial CMR request: {cmr_query_url}')
258259

259-
cmr_scroll_id = None
260+
cmr_search_after = None
260261
ctx = ssl.create_default_context()
261262
ctx.check_hostname = False
262263
ctx.verify_mode = ssl.CERT_NONE
@@ -266,15 +267,18 @@ def __cmr_query(provider, short_name, version, time_start, time_end, **kwargs):
266267
metadata = sliderule.emptyframe()
267268
while True:
268269
req = urllib.request.Request(cmr_query_url)
269-
if cmr_scroll_id:
270-
req.add_header('cmr-scroll-id', cmr_scroll_id)
270+
if cmr_search_after:
271+
req.add_header('CMR-Search-After', cmr_search_after)
272+
logger.debug(f'Requesting next page with CMR-Search-After: {cmr_search_after}')
273+
271274
response = urllib.request.urlopen(req, context=ctx)
272-
if not cmr_scroll_id:
273-
# Python 2 and 3 have different case for the http headers
274-
headers = {k.lower(): v for k, v in dict(response.info()).items()}
275-
cmr_scroll_id = headers['cmr-scroll-id']
275+
276+
headers = {k.lower(): v for k, v in dict(response.info()).items()}
277+
cmr_search_after = headers.get('cmr-search-after')
278+
276279
search_page = response.read()
277280
search_page = json.loads(search_page.decode('utf-8'))
281+
278282
url_scroll_results = __cmr_filter_urls(search_page, DATASETS[short_name]["formats"])
279283
if not url_scroll_results:
280284
break
@@ -284,10 +288,22 @@ def __cmr_query(provider, short_name, version, time_start, time_end, **kwargs):
284288
metadata_results = __cmr_granule_metadata(search_page)
285289
else:
286290
metadata_results = geopandas.pd.DataFrame([None for _ in url_scroll_results])
291+
287292
# append granule metadata
288293
metadata = geopandas.pd.concat([metadata, metadata_results])
289294

290-
return (urls,metadata)
295+
# Two ways to determine that there is no more data available:
296+
# 1. The number of granules in the current response is less than the requested 'page_size':
297+
# 2. The absence of the 'CMR-Search-After' header
298+
result_count = len(search_page['feed']['entry'])
299+
if result_count < CMR_PAGE_SIZE:
300+
logger.debug(f'Received {result_count} results, fewer than page size. Ending pagination after processing.')
301+
break
302+
if not cmr_search_after:
303+
logger.debug('No CMR-Search-After header found, no more pages.')
304+
break
305+
306+
return urls, metadata
291307

292308
###############################################################################
293309
# CMR UTILITIES
@@ -389,7 +405,12 @@ def __cmr_max_version(provider, short_name):
389405
#
390406
def __build_geojson(rsps):
391407
geojson = rsps.json()
392-
del geojson["links"]
408+
next = None
409+
if "links" in geojson:
410+
for link in geojson["links"]:
411+
if link["rel"] == "next":
412+
next = link["href"]
413+
del geojson["links"]
393414
if 'numberMatched' in geojson:
394415
del geojson['numberMatched']
395416
if 'numberReturned' in geojson:
@@ -410,7 +431,7 @@ def __build_geojson(rsps):
410431
if "href" in assetsDict[val]:
411432
propertiesDict[val] = assetsDict[val]["href"]
412433
del geojson["features"][i]["assets"]
413-
return geojson
434+
return geojson, next
414435

415436
#
416437
# Perform a STAC Query
@@ -450,22 +471,16 @@ def __stac_search(provider, short_name, collections, polygons, time_start, time_
450471
# make initial stac request
451472
data = context.post(url, data=json.dumps(rqst), headers=headers)
452473
data.raise_for_status()
453-
geojson = __build_geojson(data)
454-
455-
# iterate through additional pages if not all returned
456-
num_returned = geojson["context"]["returned"]
457-
num_matched = geojson["context"]["matched"]
458-
if num_matched > max_requested_resources:
459-
logger.warn("Number of matched resources truncated from {} to {}".format(num_matched, max_requested_resources))
460-
num_matched = max_requested_resources
461-
num_pages = int((num_matched + (num_returned - 1)) / num_returned)
462-
for page in range(2, num_pages+1):
463-
rqst["page"] = page
464-
data = context.post(url, data=json.dumps(rqst), headers=headers)
474+
geojson, next_link = __build_geojson(data)
475+
476+
# Continue fetching pages if 'next' link is available
477+
while next_link:
478+
data = context.get(next_link, headers=headers)
465479
data.raise_for_status()
466-
_geojson = __build_geojson(data)
480+
_geojson, next_link = __build_geojson(data)
467481
geojson["features"] += _geojson["features"]
468-
geojson["context"]["returned"] = num_matched
482+
483+
geojson["context"]["returned"] = len(geojson["features"])
469484
geojson["context"]["limit"] = max_requested_resources
470485

471486
# return geojson dictionary

clients/python/tests/test_gedi.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,8 @@ def test_gedi(self, init):
7979
gdf = icesat2.atl06p(parms, resources=[resource])
8080
assert init
8181
assert gdf.describe()["gedi.time"]["std"] == 0.0
82-
assert abs(gdf.describe()["gedi.value"]["mean"] - 3143.5934365441703) < 0.001
82+
# assert abs(gdf.describe()["gedi.value"]["mean"] - 3143.5934365441703) < 0.001
83+
assert abs(gdf.describe()["gedi.value"]["mean"] - 3142.8683679064293) < 0.001
8384
assert gdf.describe()["gedi.file_id"]["max"] == 0.0
8485
assert gdf.describe()["gedi.flags"]["max"] == 0.0
8586

@@ -136,4 +137,5 @@ def test_gedi(self, init):
136137
assert key in gdf.keys()
137138
assert abs(gdf.describe()["canopy_openness"]["max"] - 10.390829086303711) < 0.001
138139
df = gdf[gdf["gedi.value"] > -9999.0]
139-
assert abs(sum(df["gedi.value"]) - 42767.289459228516) < 0.001
140+
# assert abs(sum(df["gedi.value"]) - 4168.20367060658032) < 0.001
141+
assert abs(sum(df["gedi.value"]) - 42555.52866346482) < 0.001

clients/python/tests/test_landsat.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,18 @@ def test_samples(self, init):
2323
assert init
2424
assert len(rsps) > 0
2525

26+
def test_cmr_stac(self, init):
27+
time_start = "2000-01-01T00:00:00Z"
28+
time_end = "2022-02-01T23:59:59Z"
29+
polygon = [ {"lon": -177.0000000001, "lat": 51.0000000001},
30+
{"lon": -179.0000000001, "lat": 51.0000000001},
31+
{"lon": -179.0000000001, "lat": 49.0000000001},
32+
{"lon": -177.0000000001, "lat": 49.0000000001},
33+
{"lon": -177.0000000001, "lat": 51.0000000001} ]
34+
catalog = earthdata.stac(short_name="HLS", polygon=polygon, time_start=time_start, time_end=time_end, as_str=True)
35+
assert len(catalog) >= 6359
36+
37+
2638
def test_subset1(self, init):
2739
time_start = "2021-01-01T00:00:00Z"
2840
time_end = "2021-02-01T23:59:59Z"

datasets/landsat/package/LandsatHlsRaster.cpp

Lines changed: 20 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,7 @@ void LandsatHlsRaster::getIndexFile(const OGRGeometry* geo, std::string& file, c
143143
static_cast<void>(geo);
144144
static_cast<void>(points);
145145
file = indexFile;
146-
mlog(DEBUG, "Using %s", file.c_str());
146+
// mlog(DEBUG, "Using %s", file.c_str());
147147
}
148148

149149

@@ -344,7 +344,7 @@ uint32_t LandsatHlsRaster::_getGroupSamples(sample_mode_t mode, const rasters_gr
344344
assert(ur);
345345

346346
/* Get the sample for this point from unique raster */
347-
for(const point_sample_t& ps : ur->pointSamples)
347+
for(point_sample_t& ps : ur->pointSamples)
348348
{
349349
if(ps.pointIndex == pointIndx)
350350
{
@@ -360,12 +360,24 @@ uint32_t LandsatHlsRaster::_getGroupSamples(sample_mode_t mode, const rasters_gr
360360
const bool returnBandSample = it->second;
361361
if(returnBandSample)
362362
{
363-
/* Create a copy of the sample and add it to the list */
364-
RasterSample* sample = new RasterSample(*ps.sample);
365-
366-
/* Set flags for this sample */
367-
sample->flags = flags;
368-
slist->add(sample);
363+
RasterSample* s;
364+
if(!ps.sampleReturned.exchange(true))
365+
{
366+
s = ps.sample;
367+
}
368+
else
369+
{
370+
/* Sample has already been returned, must create a copy */
371+
s = new RasterSample(*ps.sample);
372+
}
373+
374+
/* Set time for this sample */
375+
s->time = rgroup->gpsTime / 1000;
376+
377+
/* Set flags for this sample, add it to the list */
378+
s->flags = flags;
379+
slist->add(s);
380+
errors |= ps.ssErrors;
369381
}
370382
}
371383
errors |= ps.ssErrors;

datasets/landsat/selftests/landsat_reader.lua

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -323,6 +323,12 @@ runner.check(sampleCnt == 180)
323323
print(string.format("POI sample time: %.2f (%d threads)", stoptime - starttime, sampleCnt))
324324

325325

326+
--[[
327+
328+
EL - we are currently not using subseting and this test is very slow (180 rasters to subset)
329+
- I am disabling it for now. There are other tests (shorter) which subset rasters.
330+
- I am leaving this here in case we want to re-enable it later.
331+
326332
print(string.format("\n-------------------------------------------------\nLandsat AOI Subset test\n-------------------------------------------------"))
327333
328334
-- AOI extent (extent of hls_trimmed.geojson)
@@ -367,6 +373,7 @@ if tbl ~= nil then
367373
runner.check(size > 0)
368374
end
369375
end
376+
--]]
370377

371378

372379

@@ -440,7 +447,7 @@ for i=1, maxSamples do
440447
sampleCnt = sampleCnt + 1
441448
end
442449
local stoptime = time.latch();
443-
print(string.format("POI sample %d points time: %.2f (%d threads)", sampleCnt, stoptime - starttime, threadCnt))
450+
print(string.format("POI sample %d points time: %.2f", sampleCnt, stoptime - starttime))
444451
runner.check(sampleCnt == maxSamples)
445452
dem = nil
446453

datasets/landsat/selftests/plugin_unittest.lua

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -22,28 +22,28 @@ while not aws.csget("lpdaac-cloud") do
2222
end
2323

2424

25-
local geojsonfile = td.."../data/hls_trimmed.geojson"
25+
local geojsonfile = td.."../data/grand_mesa.geojson"
2626
local f = io.open(geojsonfile, "r")
2727
local contents = f:read("*all")
2828
f:close()
2929

3030
-- Unit Test --
3131

32-
local lon = -179.0
33-
local lat = 51.0
34-
35-
local lon_incr = 0.01
36-
local lat_incr = 0.00
37-
local pointCount = 100
38-
3932
print(string.format("\n-------------------------------------------------\nLandsat Plugin test (NDVI)\n-------------------------------------------------"))
4033
local demType = "landsat-hls"
41-
local dem = geo.raster(geo.parms({ asset = demType, algorithm = "NearestNeighbour", radius = 0, bands = {"NDVI"}, catalog=contents, sort_by_index=true }))
34+
local t0str = "2022:01:05:00:00:00"
35+
local t1str = "2022:01:15:00:00:00"
36+
local dem = geo.raster(geo.parms({ asset = demType, algorithm = "NearestNeighbour", radius = 0, t0=t0str, t1=t1str, bands = {"NDVI"}, catalog = contents, sort_by_index = true }))
4237
runner.check(dem ~= nil)
4338

4439
local ut = geo.ut_sample(dem)
4540
runner.check(ut ~= nil)
46-
local status = ut:test(lon, lat, lon_incr, lat_incr, pointCount)
41+
-- This test ignores lon, lat, lon_incr, lat_incr, pointCount as they are not used.
42+
-- It opens a test file with points.
43+
local pointsFile = td.."../data/grand_mesa_poi.txt"
44+
local pointsInFile = 26183 -- number of points in file
45+
local maxPointCount = 1000 -- number of points to sample, 1000 will trigger all threaded code
46+
status = ut:test(0, 0, 0, 0, maxPointCount, pointsFile);
4747
runner.check(status, "Failed sampling test")
4848

4949
-- Clean Up --

datasets/landsat/systests/grand_mesa_test.lua

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ for i=1,#arr do
7272
local lon = arr[i][1]
7373
local lat = arr[i][2]
7474
local height = 0
75+
-- print(string.format("%0.16f, %0.16f", lon, lat))
7576
local tbl, err = dem:sample(lon, lat, height)
7677
if err ~= 0 then
7778
print(string.format("======> FAILED to read", lon, lat, height))
@@ -115,4 +116,3 @@ local dtime = stoptime - starttime
115116
print(string.format("\nSamples: %d, wrong NDVI: %d, wrong groupID: %d", samplesCnt, badNDVICnt, badFileCnt))
116117
print(string.format("ExecTime: %f", dtime))
117118

118-
os.exit()

datasets/opendata/selftests/worldcover_reader.lua

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,9 @@ local assets = asset.loaddir()
1313
-- Unit Test --
1414

1515
local sigma = 1.0e-9
16-
local lon = -108.1
17-
local lat = 39.1
18-
local height = 0.0
16+
local lon = -108.1
17+
local lat = 39.1
18+
local height = 0.0
1919

2020

2121
print(string.format("\n-------------------------------------------------\nesa worldcover 10meter sample POI\n-------------------------------------------------"))
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
local runner = require("test_executive")
2+
local console = require("console")
3+
local asset = require("asset")
4+
local assets = asset.loaddir()
5+
local json = require("json")
6+
local _,td = runner.srcscript()
7+
8+
-- console.monitor:config(core.LOG, core.DEBUG)
9+
-- sys.setlvl(core.LOG, core.DEBUG)
10+
11+
-- Check If Present --
12+
if not core.UNITTEST then return end
13+
14+
-- Setup --
15+
local assets = asset.loaddir()
16+
17+
local geojsonfile = td.."../data/grand_mesa_1m_dem.geojson"
18+
local f = io.open(geojsonfile, "r")
19+
local contents = f:read("*all")
20+
f:close()
21+
22+
-- Unit Test --
23+
24+
print(string.format("\n-------------------------------------------------\n3dep unit test\n-------------------------------------------------"))
25+
local demType = "usgs3dep-1meter-dem"
26+
local dem = geo.raster(geo.parms({ asset = demType, algorithm = "NearestNeighbour", radius = 0, catalog = contents, sort_by_index = true }))
27+
runner.check(dem ~= nil)
28+
29+
ut = geo.ut_sample(dem)
30+
runner.check(ut ~= nil)
31+
-- This test ignores lon, lat, lon_incr, lat_incr, pointCount as they are not used.
32+
-- It opens a test file with points.
33+
local pointsFile = td.."../../landsat/data/grand_mesa_poi.txt"
34+
print(string.format("Points file: %s", pointsFile))
35+
local pointsInFile = 26183 -- number of points in file
36+
local maxPointCount = 110
37+
status = ut:test(0, 0, 0, 0, maxPointCount, pointsFile);
38+
runner.check(status, "Failed sampling test")
39+
ut = nil
40+
41+
42+
43+
-- Clean Up --
44+
45+
-- Report Results --
46+
47+
runner.report()
48+

0 commit comments

Comments
 (0)