@@ -41,7 +41,11 @@ void printHelp(int argc, char** argv) {
4141 << " \n "
4242 << " (C) 2023-" << YEAR << " " << COPY << " \n "
4343 << " Authors: " << AUTHORS << " \n\n "
44- << " Usage: " << argv[0 ] << " [--help] [-h] [input]\n\n "
44+ << " Usage: " << argv[0 ] << " [OPTIONS] [INPUT1] [INPUT1]\n\n "
45+ << " With input from either stdin, or from file(s) [INPUT1] and [INPUT2] "
46+ " (.bz2 or .gz\n supported). If "
47+ << " both [INPUT1] and [INPUT2] are given, compute a non-self join with\n "
48+ << " [INPUT1] on the left side, [INPUT2] on the right side.\n\n "
4549 << " Allowed options:\n\n "
4650 << std::setfill (' ' ) << std::left << " General:\n "
4751 << std::setw (42 ) << " -h [ --help ]"
@@ -52,6 +56,11 @@ void printHelp(int argc, char** argv) {
5256 << " cache directory for intermediate files\n "
5357 << std::setw (42 ) << " --de9im"
5458 << " output DE-9IM relationships\n "
59+ << std::setw (42 ) << " --within-distance (default: '')"
60+ << " if set to non-negative value, only compute for each object\n "
61+ << std::setw (42 ) << " "
62+ << " the objects within the given distance\n\n "
63+ << std::setfill (' ' ) << std::left << " Formatting:\n "
5564 << std::setw (42 ) << " --prefix (default: '')"
5665 << " prefix added at the beginning of every relation\n "
5766 << std::setw (42 ) << " --intersects (default: ' intersects ')"
@@ -70,12 +79,11 @@ void printHelp(int argc, char** argv) {
7079 << " separator between crossing geometry IDs\n "
7180 << std::setw (42 ) << " --suffix (default: '\\ n')"
7281 << " suffix added at the beginning of every relation\n\n "
73- << std::setw (42 ) << " --within-distance (default: '')"
74- << " if set to non-negative value, only compute for each object the "
75- " objects within the given distance\n\n "
7682 << std::setfill (' ' ) << std::left << " Geometric computation:\n "
7783 << std::setw (42 ) << " --no-box-ids"
78- << " disable box id criteria for contains/covers/intersect computation\n "
84+ << " disable box id criteria for contains/covers/intersect\n "
85+ << std::setw (42 ) << " "
86+ << " computation\n "
7987 << std::setw (42 ) << " --no-surface-area"
8088 << " disable surface area criteria for polygon contains/covers\n "
8189 << std::setw (42 ) << " --no-oriented-envelope"
@@ -94,11 +102,17 @@ void printHelp(int argc, char** argv) {
94102 << " --num-caches (default: " + std::to_string (NUM_THREADS) + " )"
95103 << " number of geometry caches (if < --num-threads, syncing\n "
96104 << std::setw (42 )
97- << " --cache-max-size (default: " + std::to_string (DEFAULT_CACHE_SIZE) + " )"
105+ << " --cache-max-size (default: " + std::to_string (DEFAULT_CACHE_SIZE) +
106+ " )"
98107 << " maximum approx. size in bytes of cache per type and thread\n "
99108 << std::setw (42 ) << " --no-geometry-checks"
100- << " do not compute geometric relations, only report number of "
101- " candidates\n "
109+ << " do not compute geometric relations, only report number of\n "
110+ << std::setw (42 ) << " "
111+ << " candidates\n "
112+ << std::setw (42 ) << " --stats"
113+ << " output stats\n "
114+ << std::setw (42 ) << " -v [ --verbose ]"
115+ << " verbose logging\n "
102116 << std::endl;
103117}
104118
@@ -134,6 +148,9 @@ int main(int argc, char** argv) {
134148 bool noGeometryChecks = false ;
135149 bool computeDE9IM = false ;
136150
151+ bool printStats = false ;
152+ bool verbose = false ;
153+
137154 size_t numThreads = NUM_THREADS;
138155 size_t numCaches = NUM_THREADS;
139156 size_t geomCacheMaxSizeBytes = DEFAULT_CACHE_SIZE;
@@ -194,6 +211,10 @@ int main(int argc, char** argv) {
194211 useFastSweepSkip = false ;
195212 } else if (cur == " --use-inner-outer" ) {
196213 useInnerOuter = true ;
214+ } else if (cur == " --stats" ) {
215+ printStats = true ;
216+ } else if (cur == " --verbose" || cur == " -v" ) {
217+ verbose = true ;
197218 } else {
198219 inputFiles.push_back (cur);
199220 }
@@ -265,53 +286,116 @@ int main(int argc, char** argv) {
265286 unsigned char * buf = new unsigned char [CACHE_SIZE];
266287 size_t len;
267288
268- Sweeper sweeper ({numThreads,
269- numCaches,
270- geomCacheMaxSizeBytes / (numThreads * 3 ),
271- prefix,
272- intersects,
273- contains,
274- covers,
275- touches,
276- equals,
277- overlaps,
278- crosses,
279- suffix,
280- useBoxIds,
281- useArea,
282- useOBB,
283- useDiagBox,
284- useFastSweepSkip,
285- useInnerOuter,
286- noGeometryChecks,
287- withinDist,
288- computeDE9IM,
289- {},
290- [](const std::string& s) { LOGTO (INFO, std::cerr) << s; },
291- [](const std::string& s) { std::cerr << s; },
292- {},
293- {}},
294- cache, output);
295-
296- LOGTO (INFO, std::cerr) << " Parsing input geometries..." ;
289+ sj::SweeperCfg sweeperCfg{numThreads,
290+ numCaches,
291+ geomCacheMaxSizeBytes / (numThreads * 3 ),
292+ prefix,
293+ intersects,
294+ contains,
295+ covers,
296+ touches,
297+ equals,
298+ overlaps,
299+ crosses,
300+ suffix,
301+ useBoxIds,
302+ useArea,
303+ useOBB,
304+ useDiagBox,
305+ useFastSweepSkip,
306+ useInnerOuter,
307+ noGeometryChecks,
308+ withinDist,
309+ computeDE9IM,
310+ {},
311+ {},
312+ {},
313+ {},
314+ {}};
315+
316+ if (printStats)
317+ sweeperCfg.statsCb = [](const std::string& s) { std::cerr << s; };
318+
319+ if (verbose)
320+ sweeperCfg.logCb = [](const std::string& s) {
321+ LOGTO (INFO, std::cerr) << s;
322+ };
323+
324+ Sweeper sweeper (sweeperCfg, cache, output);
325+
326+ sweeper.log (" Parsing input geometries..." );
297327 auto ts = TIME ();
298328
299329 sj::WKTParser parser (&sweeper, NUM_THREADS);
300330
301331 if (!inputFiles.empty ()) {
302332 if (inputFiles.size () > 2 ) {
303- std::cerr << " Either 1 input files (for self join), or 2 input files (for non-self join) can be provided." << std::endl;
333+ std::cerr << " Either 1 input files (for self join), or 2 input files "
334+ " (for non-self join) can be provided."
335+ << std::endl;
304336 exit (1 );
305337 }
306338 for (size_t i = 0 ; i < inputFiles.size (); i++) {
307- int f = open (inputFiles[i].c_str (), O_RDONLY);
339+ if (util::endsWith (inputFiles[i], " .bz2" )) {
340+ #ifndef SPATIALJOIN_NO_BZIP2
341+ auto fh = fopen (inputFiles[i].c_str (), " r" );
342+ if (!fh) {
343+ std::cerr << " Could not open input file " << inputFiles[i]
344+ << std::endl;
345+ exit (1 );
346+ }
347+ int err;
348+ BZFILE* f = BZ2_bzReadOpen (&err, fh, 0 , 0 , NULL , 0 );
349+ if (!f || err != BZ_OK) {
350+ std::cerr << " Could not open input file " << inputFiles[i]
351+ << std::endl;
352+ exit (1 );
353+ }
354+ while ((len = util::bz2readAll (f, buf, CACHE_SIZE)) > 0 ) {
355+ parser.parse (reinterpret_cast <char *>(buf), len, i != 0 );
356+ }
308357
309- if (f < 0 ) {
310- throw std::runtime_error (" Could not open input file " + inputFiles[i]);
311- }
358+ BZ2_bzReadClose (&err, f);
359+ fclose (fh);
360+ #else
361+ std::cerr << " Could not open input file " << inputFiles[i]
362+ << " , spatialjoin was compiled without BZip2 support"
363+ << std::endl;
364+ exit (1 );
365+ #endif
366+ } else if (util::endsWith (inputFiles[i], " .gz" )) {
367+ #ifndef SPATIALJOIN_NO_ZLIB
368+ gzFile f = gzopen (inputFiles[i].c_str (), " r" );
369+ if (f == Z_NULL) {
370+ std::cerr << " Could not open input file " << inputFiles[i]
371+ << std::endl;
372+ exit (1 );
373+ }
374+ while ((len = util::zreadAll (f, buf, CACHE_SIZE)) > 0 ) {
375+ parser.parse (reinterpret_cast <char *>(buf), len, i != 0 );
376+ }
377+
378+ gzclose (f);
379+ #else
380+ std::cerr << " Could not open input file " << inputFiles[i]
381+ << " , spatialjoin was compiled without gzip support"
382+ << std::endl;
383+ exit (1 );
384+ #endif
385+ } else {
386+ int f = open (inputFiles[i].c_str (), O_RDONLY);
387+
388+ if (f < 0 ) {
389+ std::cerr << " Could not open input file " << inputFiles[i]
390+ << std::endl;
391+ exit (1 );
392+ }
393+
394+ while ((len = util::readAll (f, buf, CACHE_SIZE)) > 0 ) {
395+ parser.parse (reinterpret_cast <char *>(buf), len, i != 0 );
396+ }
312397
313- while ((len = util::readAll (f, buf, CACHE_SIZE)) > 0 ) {
314- parser.parse (reinterpret_cast <char *>(buf), len, i != 0 );
398+ close (f);
315399 }
316400 }
317401 } else {
@@ -322,20 +406,20 @@ int main(int argc, char** argv) {
322406
323407 parser.done ();
324408
325- LOGTO (INFO, std::cerr) << " Done parsing ( "
326- << TOOK (ts) / 1000000000.0 << " s)." ;
409+ sweeper. log ( " Done parsing ( " + std::to_string ( TOOK (ts) / 1000000000.0 ) +
410+ " s)." ) ;
327411 ts = TIME ();
328412
329- LOGTO (INFO, std::cerr) << " Sorting sweep events..." ;
413+ sweeper. log ( " Sorting sweep events..." ) ;
330414
331415 sweeper.flush ();
332416
333- LOGTO (INFO, std::cerr) << " done (" << TOOK (ts) / 1000000000.0 << " s)." ;
417+ sweeper. log ( " done (" + std::to_string ( TOOK (ts) / 1000000000.0 ) + " s)." ) ;
334418
335- LOGTO (INFO, std::cerr) << " Sweeping..." ;
419+ sweeper. log ( " Sweeping..." ) ;
336420 ts = TIME ();
337421 sweeper.sweep ();
338- LOGTO (INFO, std::cerr) << " done (" << TOOK (ts) / 1000000000.0 << " s)." ;
422+ sweeper. log ( " done (" + std::to_string ( TOOK (ts) / 1000000000.0 ) + " s)." ) ;
339423
340424 delete[] buf;
341425}
0 commit comments