Skip to content

Commit ca68256

Browse files
committed
optimized finding unique rasters
1 parent 403e92e commit ca68256

File tree

5 files changed

+72
-76
lines changed

5 files changed

+72
-76
lines changed

packages/geo/GeoIndexedRaster.cpp

Lines changed: 47 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -48,8 +48,7 @@
4848
* STATIC DATA
4949
******************************************************************************/
5050

51-
const double GeoIndexedRaster::DISTANCE = 0.01; /* Aproximately 1000 meters at equator */
52-
const double GeoIndexedRaster::TOLERANCE = DISTANCE/10; /* Tolerance for simplification */
51+
const double GeoIndexedRaster::TOLERANCE = 0.01; /* Tolerance for simplification */
5352

5453
const char* GeoIndexedRaster::FLAGS_TAG = "Fmask";
5554
const char* GeoIndexedRaster::VALUE_TAG = "Value";
@@ -206,6 +205,10 @@ uint32_t GeoIndexedRaster::getSamples(const std::vector<point_info_t>& points, L
206205

207206
perfStats.clear();
208207

208+
/* Clear raster cache and file dictionary used by serialized getSamples */
209+
cache.clear();
210+
fileDictClear();
211+
209212
/* Vector of points and their associated raster groups */
210213
std::vector<point_groups_t> pointsGroups;
211214

@@ -222,19 +225,23 @@ uint32_t GeoIndexedRaster::getSamples(const std::vector<point_info_t>& points, L
222225
throw RunTimeException(CRITICAL, RTE_ERROR, "Error opening index file");
223226
}
224227

225-
/* Rasters to points map */
226-
raster_points_map_t rasterToPointsMap;
227-
228-
/* For all points from the caller, create a vector of raster group lists */
229-
if(!findAllGroups(&points, pointsGroups, rasterToPointsMap))
230228
{
231-
throw RunTimeException(CRITICAL, RTE_ERROR, "Error creating groups");
232-
}
229+
/* Rasters to points map */
230+
raster_points_map_t rasterToPointsMap;
233231

234-
/* For all points from the caller, create a vector of unique rasters */
235-
if(!findUniqueRasters(uniqueRasters, pointsGroups, rasterToPointsMap))
236-
{
237-
throw RunTimeException(CRITICAL, RTE_ERROR, "Error finding unique rasters");
232+
/* For all points from the caller, create a vector of raster group lists */
233+
if(!findAllGroups(&points, pointsGroups, rasterToPointsMap))
234+
{
235+
throw RunTimeException(CRITICAL, RTE_ERROR, "Error creating groups");
236+
}
237+
238+
/* For all points from the caller, create a vector of unique rasters */
239+
if(!findUniqueRasters(uniqueRasters, pointsGroups, rasterToPointsMap))
240+
{
241+
throw RunTimeException(CRITICAL, RTE_ERROR, "Error finding unique rasters");
242+
}
243+
244+
/* rastersToPointsMap is no longer needed */
238245
}
239246

240247
/* For all unique rasters, sample them */
@@ -946,10 +953,10 @@ void* GeoIndexedRaster::batchReaderThread(void *param)
946953
{
947954
unique_raster_t* ur = breader->uraster;
948955
GdalRaster* raster = new GdalRaster(breader->obj->parms,
949-
ur->rinfo->fileName,
956+
ur->fileName,
950957
0, /* Sample collecting code will set it to group's gpsTime */
951958
ur->fileId,
952-
ur->rinfo->dataIsElevation,
959+
ur->dataIsElevation,
953960
breader->obj->crscb);
954961

955962
/* Sample all points for this raster */
@@ -1383,7 +1390,7 @@ OGRGeometry* GeoIndexedRaster::getConvexHull(const std::vector<point_info_t>* po
13831390
}
13841391

13851392
/* Add a buffer around the convex hull to avoid missing edge points */
1386-
OGRGeometry* bufferedConvexHull = convexHull->Buffer(DISTANCE);
1393+
OGRGeometry* bufferedConvexHull = convexHull->Buffer(TOLERANCE);
13871394
if(bufferedConvexHull)
13881395
{
13891396
OGRGeometryFactory::destroyGeometry(convexHull);
@@ -1397,7 +1404,7 @@ OGRGeometry* GeoIndexedRaster::getConvexHull(const std::vector<point_info_t>* po
13971404
*----------------------------------------------------------------------------*/
13981405
void GeoIndexedRaster::applySpatialFilter(OGRLayer* layer, const std::vector<point_info_t>* points)
13991406
{
1400-
mlog(DEBUG, "Features before spatial filter: %lld", layer->GetFeatureCount());
1407+
mlog(INFO, "Features before spatial filter: %lld", layer->GetFeatureCount());
14011408

14021409
const double startTime = TimeLib::latchtime();
14031410

@@ -1410,10 +1417,6 @@ void GeoIndexedRaster::applySpatialFilter(OGRLayer* layer, const std::vector<poi
14101417
layer->SetSpatialFilter(filter);
14111418
OGRGeometryFactory::destroyGeometry(filter);
14121419
}
1413-
else
1414-
{
1415-
mlog(ERROR, "Failed to create polygon for spatial filter");
1416-
}
14171420
perfStats.spatialFilterTime = TimeLib::latchtime() - startTime;
14181421

14191422
mlog(DEBUG, "Features after spatial filter: %lld", layer->GetFeatureCount());
@@ -1435,15 +1438,15 @@ bool GeoIndexedRaster::findAllGroups(const std::vector<point_info_t>* points,
14351438

14361439
try
14371440
{
1438-
mlog(DEBUG, "Finding rasters groups for all points");
1439-
14401441
/* Start rasters groups finder threads */
14411442
std::vector<Thread*> pids;
14421443
std::vector<GroupsFinder*> rgroupFinders;
14431444

14441445
const uint32_t numMaxThreads = std::thread::hardware_concurrency();
14451446
const uint32_t minPointsPerThread = 100;
14461447

1448+
mlog(INFO, "Finding rasters groups for all points with %u threads", numMaxThreads);
1449+
14471450
std::vector<range_t> pointsRanges;
14481451
getThreadsRanges(pointsRanges, points->size(), minPointsPerThread, numMaxThreads);
14491452
const uint32_t numThreads = pointsRanges.size();
@@ -1463,10 +1466,10 @@ bool GeoIndexedRaster::findAllGroups(const std::vector<point_info_t>* points,
14631466
delete pid;
14641467
}
14651468

1466-
mlog(DEBUG, "All groups finders time: %lf", TimeLib::latchtime() - startTime);
1469+
mlog(INFO, "All groups finders time: %lf", TimeLib::latchtime() - startTime);
14671470

14681471
/* Merge the pointGroups for each thread */
1469-
mlog(DEBUG, "Merging point groups from all threads");
1472+
mlog(INFO, "Merging point groups from all threads");
14701473
for(GroupsFinder* gf : rgroupFinders)
14711474
{
14721475
pointsGroups.insert(pointsGroups.end(), gf->pointsGroups.begin(), gf->pointsGroups.end());
@@ -1518,6 +1521,9 @@ bool GeoIndexedRaster::findUniqueRasters(std::vector<unique_raster_t*>& uniqueRa
15181521

15191522
try
15201523
{
1524+
/* Map to track the index of each unique raster in the uniqueRasters vector */
1525+
std::unordered_map<std::string, size_t> fileIndexMap;
1526+
15211527
/* Create vector of unique rasters. */
15221528
mlog(DEBUG, "Finding unique rasters");
15231529
for(const point_groups_t& pg : pointsGroups)
@@ -1529,41 +1535,34 @@ bool GeoIndexedRaster::findUniqueRasters(std::vector<unique_raster_t*>& uniqueRa
15291535
for(raster_info_t& rinfo : rgroup->infovect)
15301536
{
15311537
/* Is this raster already in the list of unique rasters? */
1532-
bool addNewRaster = true;
1533-
for(unique_raster_t* ur : uniqueRasters)
1538+
auto it = fileIndexMap.find(rinfo.fileName);
1539+
if(it != fileIndexMap.end())
15341540
{
1535-
if(ur->rinfo->fileName == rinfo.fileName)
1536-
{
1537-
/* already in unique rasters list, set pointer in rinfo to this raster */
1538-
rinfo.uraster = ur;
1539-
addNewRaster = false;
1540-
break;
1541-
}
1541+
/* Raster is already in the vector of unique rasters, get index from map and update uraster pointer */
1542+
rinfo.uraster = uniqueRasters[it->second];
15421543
}
1543-
1544-
if(addNewRaster)
1544+
else
15451545
{
1546-
unique_raster_t* ur = new unique_raster_t();
1547-
ur->rinfo = &rinfo;
1546+
/* Raster is not in the vector of unique rasters */
1547+
unique_raster_t* ur = new unique_raster_t(rinfo.dataIsElevation, rinfo.fileName);
15481548
ur->fileId = fileDictAdd(rinfo.fileName);
15491549
uniqueRasters.push_back(ur);
15501550

1551-
/* Set pointer in rinfo to this new unique raster */
1551+
/* Set pointer in rinfo to new unique raster */
15521552
rinfo.uraster = ur;
1553+
1554+
/* Update index map */
1555+
fileIndexMap[rinfo.fileName] = uniqueRasters.size() - 1;
15531556
}
15541557
}
15551558
}
15561559
}
15571560

1558-
/*
1559-
* For each unique raster, find the points that belong to it
1560-
*/
1561+
/* For each unique raster, find the points that belong to it */
15611562
mlog(DEBUG, "Finding points for unique rasters");
15621563
for(unique_raster_t* ur : uniqueRasters)
15631564
{
1564-
const std::string& rasterName = ur->rinfo->fileName;
1565-
1566-
auto it = rasterToPointsMap.find(rasterName);
1565+
auto it = rasterToPointsMap.find(ur->fileName);
15671566
if(it != rasterToPointsMap.end())
15681567
{
15691568
for(const uint32_t pointIndx : it->second)
@@ -1577,7 +1576,6 @@ bool GeoIndexedRaster::findUniqueRasters(std::vector<unique_raster_t*>& uniqueRa
15771576

15781577
/* Reduce memory usage */
15791578
uniqueRasters.shrink_to_fit();
1580-
15811579
status = true;
15821580
}
15831581
catch(const RunTimeException& e)
@@ -1586,7 +1584,7 @@ bool GeoIndexedRaster::findUniqueRasters(std::vector<unique_raster_t*>& uniqueRa
15861584
}
15871585

15881586
perfStats.findUniqueRastersTime = TimeLib::latchtime() - startTime;
1589-
mlog(DEBUG, "Unique rasters time: %lf", perfStats.findUniqueRastersTime);
1587+
mlog(INFO, "Unique rasters time: %lf", perfStats.findUniqueRastersTime);
15901588

15911589
return status;
15921590
}
@@ -1683,7 +1681,6 @@ bool GeoIndexedRaster::sampleUniqueRasters(const std::vector<unique_raster_t*>&
16831681
breader->sync.unlock();
16841682
}
16851683

1686-
mlog(DEBUG, "Done Sampling %u rasters", numRasters);
16871684
status = true;
16881685
}
16891686
catch(const RunTimeException& e)
@@ -1692,6 +1689,7 @@ bool GeoIndexedRaster::sampleUniqueRasters(const std::vector<unique_raster_t*>&
16921689
}
16931690

16941691
perfStats.samplesTime = TimeLib::latchtime() - startTime;
1692+
mlog(INFO, "Done Sampling, time: %lf", perfStats.samplesTime);
16951693
return status;
16961694
}
16971695

@@ -1754,15 +1752,10 @@ bool GeoIndexedRaster::collectSamples(const std::vector<point_groups_t>& pointsG
17541752
mlog(DEBUG, "Merged %d sample lists, time: %lf", sllist.length(), TimeLib::latchtime() - mergeStart);
17551753

17561754
perfStats.collectSamplesTime = TimeLib::latchtime() - start;
1757-
mlog(DEBUG, "Populated sllist with %d lists of samples, time: %lf", sllist.length(), perfStats.collectSamplesTime);
1755+
mlog(INFO, "Populated sllist with %d lists of samples, time: %lf", sllist.length(), perfStats.collectSamplesTime);
17581756

17591757
return true;
17601758
}
17611759

17621760

17631761

1764-
1765-
1766-
1767-
1768-

packages/geo/GeoIndexedRaster.h

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,6 @@ class GeoIndexedRaster: public RasterObject
5959
* Constants
6060
*--------------------------------------------------------------------*/
6161

62-
static const double DISTANCE;
6362
static const double TOLERANCE;
6463

6564
static const int MAX_CACHE_SIZE = 20;
@@ -88,20 +87,23 @@ class GeoIndexedRaster: public RasterObject
8887

8988
} point_sample_t;
9089

91-
struct unique_raster_t;
90+
struct UniqueRaster;
9291
typedef struct RasterInfo {
93-
bool dataIsElevation;
94-
std::string tag;
95-
std::string fileName;
96-
unique_raster_t* uraster; // only used for batch reading
92+
bool dataIsElevation;
93+
std::string tag;
94+
std::string fileName;
95+
UniqueRaster* uraster; // only used for batch reading
9796

9897
RasterInfo(void): dataIsElevation(false), uraster(NULL) {}
9998
} raster_info_t;
10099

101-
typedef struct unique_raster_t {
102-
raster_info_t* rinfo;
100+
typedef struct UniqueRaster {
101+
bool dataIsElevation;
102+
const std::string& fileName;
103103
uint64_t fileId;
104104
std::vector<point_sample_t> pointSamples;
105+
explicit UniqueRaster(bool _dataIsElevation, const std::string& _fileName):
106+
dataIsElevation(_dataIsElevation), fileName(_fileName), fileId(0) {}
105107
} unique_raster_t;
106108

107109
typedef struct RaserGroup {
@@ -168,7 +170,7 @@ class GeoIndexedRaster: public RasterObject
168170
explicit SampleCollector(GeoIndexedRaster* _obj, const std::vector<point_groups_t>& _pointsGroups);
169171
} sample_collector_t;
170172

171-
/* Typedef for the global map (raster file name -> set of unique point IDs) */
173+
/* Typedef for the map of raster file name -> set of unique ordered points */
172174
typedef std::unordered_map<std::string, std::set<uint32_t>> raster_points_map_t;
173175

174176
typedef struct GroupsFinder {

packages/geo/RasterObject.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -382,6 +382,18 @@ const char* RasterObject::fileDictGetFile (uint64_t fileId)
382382
return fileName;
383383
}
384384

385+
/*----------------------------------------------------------------------------
386+
* fileDictClear
387+
*----------------------------------------------------------------------------*/
388+
void RasterObject::fileDictClear (void)
389+
{
390+
fileDictMut.lock();
391+
{
392+
fileDict.clear();
393+
}
394+
fileDictMut.unlock();
395+
}
396+
385397
/*----------------------------------------------------------------------------
386398
* getThreadsRanges
387399
*----------------------------------------------------------------------------*/

packages/geo/RasterObject.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,7 @@ class RasterObject: public LuaObject
145145
bool sampling (void) {return samplingEnabled;};
146146
uint64_t fileDictAdd (const std::string& fileName);
147147
const char* fileDictGetFile (uint64_t fileId);
148+
void fileDictClear (void);
148149
static void getThreadsRanges(std::vector<range_t>& ranges, uint32_t num,
149150
uint32_t minPerThread, uint32_t maxNumThreads);
150151

packages/geo/UT_RasterSubset.cpp

Lines changed: 1 addition & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -244,18 +244,6 @@ int UT_RasterSubset::luaSubsetTest(lua_State* L)
244244
*----------------------------------------------------------------------------*/
245245
const char* UT_RasterSubset::getRasterName(RasterObject* robj, uint64_t fileId)
246246
{
247-
const char* fileName = NULL;
248-
249-
/* Find fileName from fileId */
250-
Dictionary<uint64_t>::Iterator iterator(robj->fileDictGet());
251-
for(int i = 0; i < iterator.length; i++)
252-
{
253-
if(iterator[i].value == fileId)
254-
{
255-
fileName = iterator[i].key;
256-
break;
257-
}
258-
}
259-
247+
const char* fileName = robj->fileDictGetFile(fileId);
260248
return StringLib::duplicate(fileName);
261249
}

0 commit comments

Comments
 (0)