88
99package org .locationtech .geomesa .accumulo .index
1010
11+ import com .google .gson .Gson
1112import org .geotools .api .data ._
1213import org .geotools .api .feature .simple .SimpleFeature
1314import org .geotools .api .filter .Filter
@@ -18,7 +19,9 @@ import org.geotools.geometry.jts.ReferencedEnvelope
1819import org .geotools .util .Converters
1920import org .junit .runner .RunWith
2021import org .locationtech .geomesa .accumulo .TestWithFeatureType
22+ import org .locationtech .geomesa .accumulo .data .{AccumuloDataStore , AccumuloDataStoreParams }
2123import org .locationtech .geomesa .accumulo .data .AccumuloQueryPlan .{BatchScanPlan , JoinPlan }
24+ import org .locationtech .geomesa .arrow .io .SimpleFeatureArrowFileReader
2225import org .locationtech .geomesa .features .ScalaSimpleFeature
2326import org .locationtech .geomesa .filter ._
2427import org .locationtech .geomesa .index .api .FilterStrategy
@@ -27,16 +30,20 @@ import org.locationtech.geomesa.index.index.attribute.AttributeIndex
2730import org .locationtech .geomesa .index .iterators .DensityScan
2831import org .locationtech .geomesa .index .planning .FilterSplitter
2932import org .locationtech .geomesa .index .utils .{ExplainNull , Explainer }
33+ import org .locationtech .geomesa .index .view .MergedDataStoreViewFactory
3034import org .locationtech .geomesa .utils .bin .BinaryOutputEncoder
3135import org .locationtech .geomesa .utils .collection .SelfClosingIterator
32- import org .locationtech .geomesa .utils .geotools .{CRS_EPSG_4326 , SimpleFeatureTypes }
36+ import org .locationtech .geomesa .utils .geotools .{CRS_EPSG_4326 , FeatureUtils , SimpleFeatureTypes }
3337import org .locationtech .geomesa .utils .index .IndexMode
3438import org .locationtech .geomesa .utils .io .WithClose
3539import org .locationtech .geomesa .utils .text .WKTUtils
40+ import org .locationtech .jts .geom .Point
3641import org .specs2 .matcher .Matcher
3742import org .specs2 .mutable .Specification
3843import org .specs2 .runner .JUnitRunner
44+ import org .specs2 .specification .core .Fragments
3945
46+ import java .io .ByteArrayInputStream
4047import java .util .Date
4148import scala .collection .JavaConverters ._
4249
@@ -81,6 +88,26 @@ class AttributeIndexStrategyTest extends Specification with TestWithFeatureType
8188 sft.getAttributeShards
8289 }
8390
91+ lazy val mergedViewDs = {
92+ val newParams = Seq (s " ${catalog}_01 " , s " ${catalog}_02 " ).map(c => dsParams ++ Map (AccumuloDataStoreParams .CatalogParam .key -> c))
93+ newParams.foreach { p =>
94+ WithClose (DataStoreFinder .getDataStore(p.asJava)) { ds =>
95+ ds.createSchema(sft)
96+ WithClose (ds.getFeatureWriterAppend(sft.getTypeName, Transaction .AUTO_COMMIT )) { writer =>
97+ val feats = if (p(AccumuloDataStoreParams .CatalogParam .key).endsWith(" 1" )) { features.take(2 ) } else { features.drop(2 ) }
98+ feats.foreach(FeatureUtils .write(writer, _, useProvidedFid = true ))
99+ }
100+ }
101+ }
102+ val json = new Gson ().toJson(newParams.map(_.asJava).asJava)
103+ val params = Map (MergedDataStoreViewFactory .ConfigParam .key -> s " {stores= $json} " )
104+ DataStoreFinder .getDataStore(params.asJava)
105+ }
106+
107+ override def map (fragments : => Fragments ): Fragments = super .map(fragments) ^ fragmentFactory.step {
108+ mergedViewDs.dispose()
109+ }
110+
84111 step {
85112 addFeatures(features)
86113 }
@@ -102,6 +129,17 @@ class AttributeIndexStrategyTest extends Specification with TestWithFeatureType
102129 SelfClosingIterator (ds.getFeatureSource(sftName).getFeatures(query).features())
103130 }
104131
132+ def decodeArrow (reader : SimpleFeatureArrowFileReader ): List [SimpleFeature ] = {
133+ SelfClosingIterator (reader.features()).map { f =>
134+ // round the points, as precision is lost due to the arrow encoding
135+ val attributes = f.getAttributes.asScala.collect {
136+ case p : Point => s " POINT ( ${Math .round(p.getX * 10 ) / 10d } ${Math .round(p.getY * 10 ) / 10d }) "
137+ case a => a
138+ }
139+ ScalaSimpleFeature .create(f.getFeatureType, f.getID, attributes.toSeq: _* )
140+ }.toList
141+ }
142+
105143 " AttributeIndexStrategy" should {
106144 " print values" in {
107145 skipped(" used for debugging" )
@@ -194,6 +232,92 @@ class AttributeIndexStrategyTest extends Specification with TestWithFeatureType
194232 forall(bins.map(_.lon))(_ mustEqual 55f )
195233 }
196234
235+ " support arrow queries with join queries" in {
236+ foreach(Seq (ds, mergedViewDs)) { ds =>
237+ val query = new Query (sftName, ECQL .toFilter(" count>=2" ))
238+ query.getHints.put(ARROW_ENCODE , java.lang.Boolean .TRUE )
239+ query.getHints.put(ARROW_SORT_FIELD , " dtg" )
240+ query.getHints.put(ARROW_DICTIONARY_FIELDS , " name" )
241+ val plans = Option (ds).collect { case ds : AccumuloDataStore => ds.getQueryPlan(query) }.getOrElse(Seq .empty)
242+ forall(plans)(_ must beAnInstanceOf[JoinPlan ])
243+ val results = SelfClosingIterator (ds.getFeatureSource(sftName).getFeatures(query).features()).map(_.getAttribute(0 )).toList
244+ forall(results)(_ must beAnInstanceOf[Array [Byte ]])
245+ val arrows = results.foldLeft(Array .empty[Byte ]) { case (res, bytes) => res ++ bytes.asInstanceOf [Array [Byte ]] }
246+ def in () = new ByteArrayInputStream (arrows)
247+ WithClose (SimpleFeatureArrowFileReader .streaming(in)) { reader =>
248+ val results = decodeArrow(reader)
249+ results must haveSize(3 )
250+ results.map(_.getAttributeCount).distinct mustEqual Seq (sft.getAttributeCount)
251+ results.map(_.getAttribute(" name" )) must containAllOf(Seq (" bill" , " bob" , " charles" ))
252+ results.map(_.getAttribute(sft.indexOf(" name" ))) must containAllOf(Seq (" bill" , " bob" , " charles" ))
253+ }
254+ }
255+ }
256+
257+ " support arrow queries with join queries and transforms" in {
258+ foreach(Seq (ds, mergedViewDs)) { ds =>
259+ val query = new Query (sftName, ECQL .toFilter(" count>=2" ), " dtg" , " geom" , " name" ) // note: swap order
260+ query.getHints.put(ARROW_ENCODE , java.lang.Boolean .TRUE )
261+ query.getHints.put(ARROW_SORT_FIELD , " dtg" )
262+ query.getHints.put(ARROW_DICTIONARY_FIELDS , " name" )
263+ val plans = Option (ds).collect { case ds : AccumuloDataStore => ds.getQueryPlan(query) }.getOrElse(Seq .empty)
264+ forall(plans)(_ must beAnInstanceOf[JoinPlan ])
265+ val results = SelfClosingIterator (ds.getFeatureSource(sftName).getFeatures(query).features()).map(_.getAttribute(0 )).toList
266+ forall(results)(_ must beAnInstanceOf[Array [Byte ]])
267+ val arrows = results.foldLeft(Array .empty[Byte ]) { case (res, bytes) => res ++ bytes.asInstanceOf [Array [Byte ]] }
268+ def in () = new ByteArrayInputStream (arrows)
269+ WithClose (SimpleFeatureArrowFileReader .streaming(in)) { reader =>
270+ val results = decodeArrow(reader)
271+ results must haveSize(3 )
272+ results.map(_.getAttribute(" dtg" )) must containAllOf(Seq (billDate, bobDate, charlesDate))
273+ results.map(_.getAttribute(0 )) must containAllOf(Seq (billDate, bobDate, charlesDate))
274+ results.map(_.getAttribute(" geom" )) must containAllOf(Seq (billGeom, bobGeom, charlesGeom))
275+ results.map(_.getAttribute(1 )) must containAllOf(Seq (billGeom, bobGeom, charlesGeom))
276+ results.map(_.getAttribute(" name" )) must containAllOf(Seq (" bill" , " bob" , " charles" ))
277+ results.map(_.getAttribute(2 )) must containAllOf(Seq (" bill" , " bob" , " charles" ))
278+ }
279+ }
280+ }
281+
282+ " support arrow queries against index values" in {
283+ foreach(Seq (ds, mergedViewDs)) { ds =>
284+ val query = new Query (sftName, ECQL .toFilter(" count>=2" ), " geom" , " dtg" )
285+ query.getHints.put(ARROW_ENCODE , java.lang.Boolean .TRUE )
286+ query.getHints.put(ARROW_SORT_FIELD , " dtg" )
287+ val plans = Option (ds).collect { case ds : AccumuloDataStore => ds.getQueryPlan(query) }.getOrElse(Seq .empty)
288+ forall(plans)(_ must beAnInstanceOf[BatchScanPlan ])
289+ val results = SelfClosingIterator (ds.getFeatureSource(sftName).getFeatures(query).features()).map(_.getAttribute(0 )).toList
290+ forall(results)(_ must beAnInstanceOf[Array [Byte ]])
291+ val arrows = results.foldLeft(Array .empty[Byte ]) { case (res, bytes) => res ++ bytes.asInstanceOf [Array [Byte ]] }
292+ def in () = new ByteArrayInputStream (arrows)
293+ WithClose (SimpleFeatureArrowFileReader .streaming(in)) { reader =>
294+ val results = decodeArrow(reader)
295+ results must haveSize(3 )
296+ results.map(_.getAttribute(" dtg" )) must containAllOf(Seq (billDate, bobDate, charlesDate))
297+ }
298+ }
299+ }
300+
301+ " support arrow queries against full values" in {
302+ foreach(Seq (ds, mergedViewDs)) { ds =>
303+ val query = new Query (sftName, ECQL .toFilter(" name>'amy'" ), " geom" , " dtg" , " count" )
304+ query.getHints.put(ARROW_ENCODE , java.lang.Boolean .TRUE )
305+ query.getHints.put(ARROW_SORT_FIELD , " dtg" )
306+ query.getHints.put(ARROW_DICTIONARY_FIELDS , " count" )
307+ val plans = Option (ds).collect { case ds : AccumuloDataStore => ds.getQueryPlan(query) }.getOrElse(Seq .empty)
308+ forall(plans)(_ must beAnInstanceOf[BatchScanPlan ])
309+ val results = SelfClosingIterator (ds.getFeatureSource(sftName).getFeatures(query).features()).map(_.getAttribute(0 )).toList
310+ forall(results)(_ must beAnInstanceOf[Array [Byte ]])
311+ val arrows = results.foldLeft(Array .empty[Byte ]) { case (res, bytes) => res ++ bytes.asInstanceOf [Array [Byte ]] }
312+ def in () = new ByteArrayInputStream (arrows)
313+ WithClose (SimpleFeatureArrowFileReader .streaming(in)) { reader =>
314+ val results = decodeArrow(reader)
315+ results must haveSize(3 )
316+ results.map(_.getAttribute(" count" )) must containAllOf(Seq (2 , 3 , 4 ))
317+ }
318+ }
319+ }
320+
197321 " correctly query equals with spatio-temporal filter" in {
198322 // height filter matches bob and charles, st filters only match bob
199323 val stFilters = Seq (
@@ -313,37 +437,37 @@ class AttributeIndexStrategyTest extends Specification with TestWithFeatureType
313437
314438 " support sampling" in {
315439 val query = new Query (sftName, ECQL .toFilter(" name > 'a'" ))
316- query.getHints.put(SAMPLING , new java.lang. Float (.5f ))
440+ query.getHints.put(SAMPLING , Float .box (.5f ))
317441 val results = runQuery(query).toList
318442 results must haveLength(2 )
319443 }
320444
321445 " support sampling with cql" in {
322446 val query = new Query (sftName, ECQL .toFilter(" name > 'a' AND track > 'track'" ))
323- query.getHints.put(SAMPLING , new java.lang. Float (.5f ))
447+ query.getHints.put(SAMPLING , Float .box (.5f ))
324448 val results = runQuery(query).toList
325449 results must haveLength(2 )
326450 }
327451
328452 " support sampling with transformations" in {
329453 val query = new Query (sftName, ECQL .toFilter(" name > 'a'" ), " name" , " geom" )
330- query.getHints.put(SAMPLING , new java.lang. Float (.5f ))
454+ query.getHints.put(SAMPLING , Float .box (.5f ))
331455 val results = runQuery(query).toList
332456 results must haveLength(2 )
333457 forall(results)(_.getAttributeCount mustEqual 2 )
334458 }
335459
336460 " support sampling with cql and transformations" in {
337461 val query = new Query (sftName, ECQL .toFilter(" name > 'a' AND track > 'track'" ), " name" , " geom" )
338- query.getHints.put(SAMPLING , new java.lang. Float (.2f ))
462+ query.getHints.put(SAMPLING , Float .box (.2f ))
339463 val results = runQuery(query).toList
340464 results must haveLength(1 )
341465 results.head.getAttributeCount mustEqual 2
342466 }
343467
344468 " support sampling by thread" in {
345469 val query = new Query (sftName, ECQL .toFilter(" name > 'a'" ))
346- query.getHints.put(SAMPLING , new java.lang. Float (.5f ))
470+ query.getHints.put(SAMPLING , Float .box (.5f ))
347471 query.getHints.put(SAMPLE_BY , " track" )
348472 val results = runQuery(query).toList
349473 results.length must beLessThan(4 ) // note: due to sharding and multiple ranges, we don't get exact sampling
@@ -356,7 +480,7 @@ class AttributeIndexStrategyTest extends Specification with TestWithFeatureType
356480 val query = new Query (sftName, ECQL .toFilter(" name > 'a'" ))
357481 query.getHints.put(BIN_TRACK , " name" )
358482 query.getHints.put(BIN_BATCH_SIZE , 1000 )
359- query.getHints.put(SAMPLING , new java.lang. Float (.5f ))
483+ query.getHints.put(SAMPLING , Float .box (.5f ))
360484 // have to evaluate attributes before pulling into collection, as the same sf is reused
361485 val results = runQuery(query).map(_.getAttribute(BIN_ATTRIBUTE_INDEX )).toList
362486 forall(results)(_ must beAnInstanceOf[Array [Byte ]])
0 commit comments