Skip to content

Commit e0be030

Browse files
committed
GEOMESA-3440 Accumulo - Fix merged view Arrow join attribute index queries (#3267)
1 parent 1e28392 commit e0be030

File tree

2 files changed

+136
-7
lines changed

2 files changed

+136
-7
lines changed

geomesa-accumulo/geomesa-accumulo-datastore/src/main/scala/org/locationtech/geomesa/accumulo/data/AccumuloJoinIndexAdapter.scala

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -298,6 +298,11 @@ object AccumuloJoinIndexAdapter {
298298
}
299299
val toFeatures = AccumuloResultsToFeatures(recordIndex, resultSft)
300300
val reducer = new LocalTransformReducer(resultSft, None, None, None, hints)
301+
if (hints.isSkipReduce) {
302+
// override the return sft to reflect what we're actually returning,
303+
// since the arrow sft is only created in the local reduce step
304+
hints.hints.put(QueryHints.Internal.RETURN_SFT, resultSft)
305+
}
301306

302307
val recordTables = recordIndex.getTablesForQuery(filter.filter)
303308
val recordThreads = ds.config.queries.recordThreads

geomesa-accumulo/geomesa-accumulo-datastore/src/test/scala/org/locationtech/geomesa/accumulo/index/AttributeIndexStrategyTest.scala

Lines changed: 131 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88

99
package org.locationtech.geomesa.accumulo.index
1010

11+
import com.google.gson.Gson
1112
import org.geotools.api.data._
1213
import org.geotools.api.feature.simple.SimpleFeature
1314
import org.geotools.api.filter.Filter
@@ -18,7 +19,9 @@ import org.geotools.geometry.jts.ReferencedEnvelope
1819
import org.geotools.util.Converters
1920
import org.junit.runner.RunWith
2021
import org.locationtech.geomesa.accumulo.TestWithFeatureType
22+
import org.locationtech.geomesa.accumulo.data.{AccumuloDataStore, AccumuloDataStoreParams}
2123
import org.locationtech.geomesa.accumulo.data.AccumuloQueryPlan.{BatchScanPlan, JoinPlan}
24+
import org.locationtech.geomesa.arrow.io.SimpleFeatureArrowFileReader
2225
import org.locationtech.geomesa.features.ScalaSimpleFeature
2326
import org.locationtech.geomesa.filter._
2427
import org.locationtech.geomesa.index.api.FilterStrategy
@@ -27,16 +30,20 @@ import org.locationtech.geomesa.index.index.attribute.AttributeIndex
2730
import org.locationtech.geomesa.index.iterators.DensityScan
2831
import org.locationtech.geomesa.index.planning.FilterSplitter
2932
import org.locationtech.geomesa.index.utils.{ExplainNull, Explainer}
33+
import org.locationtech.geomesa.index.view.MergedDataStoreViewFactory
3034
import org.locationtech.geomesa.utils.bin.BinaryOutputEncoder
3135
import org.locationtech.geomesa.utils.collection.SelfClosingIterator
32-
import org.locationtech.geomesa.utils.geotools.{CRS_EPSG_4326, SimpleFeatureTypes}
36+
import org.locationtech.geomesa.utils.geotools.{CRS_EPSG_4326, FeatureUtils, SimpleFeatureTypes}
3337
import org.locationtech.geomesa.utils.index.IndexMode
3438
import org.locationtech.geomesa.utils.io.WithClose
3539
import org.locationtech.geomesa.utils.text.WKTUtils
40+
import org.locationtech.jts.geom.Point
3641
import org.specs2.matcher.Matcher
3742
import org.specs2.mutable.Specification
3843
import org.specs2.runner.JUnitRunner
44+
import org.specs2.specification.core.Fragments
3945

46+
import java.io.ByteArrayInputStream
4047
import java.util.Date
4148
import scala.collection.JavaConverters._
4249

@@ -81,6 +88,26 @@ class AttributeIndexStrategyTest extends Specification with TestWithFeatureType
8188
sft.getAttributeShards
8289
}
8390

91+
lazy val mergedViewDs = {
92+
val newParams = Seq(s"${catalog}_01", s"${catalog}_02").map(c => dsParams ++ Map(AccumuloDataStoreParams.CatalogParam.key -> c))
93+
newParams.foreach { p =>
94+
WithClose(DataStoreFinder.getDataStore(p.asJava)) { ds =>
95+
ds.createSchema(sft)
96+
WithClose(ds.getFeatureWriterAppend(sft.getTypeName, Transaction.AUTO_COMMIT)) { writer =>
97+
val feats = if (p(AccumuloDataStoreParams.CatalogParam.key).endsWith("1")) { features.take(2) } else { features.drop(2) }
98+
feats.foreach(FeatureUtils.write(writer, _, useProvidedFid = true))
99+
}
100+
}
101+
}
102+
val json = new Gson().toJson(newParams.map(_.asJava).asJava)
103+
val params = Map(MergedDataStoreViewFactory.ConfigParam.key -> s"{stores=$json}")
104+
DataStoreFinder.getDataStore(params.asJava)
105+
}
106+
107+
override def map(fragments: => Fragments): Fragments = super.map(fragments) ^ fragmentFactory.step {
108+
mergedViewDs.dispose()
109+
}
110+
84111
step {
85112
addFeatures(features)
86113
}
@@ -102,6 +129,17 @@ class AttributeIndexStrategyTest extends Specification with TestWithFeatureType
102129
SelfClosingIterator(ds.getFeatureSource(sftName).getFeatures(query).features())
103130
}
104131

132+
def decodeArrow(reader: SimpleFeatureArrowFileReader): List[SimpleFeature] = {
133+
SelfClosingIterator(reader.features()).map { f =>
134+
// round the points, as precision is lost due to the arrow encoding
135+
val attributes = f.getAttributes.asScala.collect {
136+
case p: Point => s"POINT (${Math.round(p.getX * 10) / 10d} ${Math.round(p.getY * 10) / 10d})"
137+
case a => a
138+
}
139+
ScalaSimpleFeature.create(f.getFeatureType, f.getID, attributes.toSeq: _*)
140+
}.toList
141+
}
142+
105143
"AttributeIndexStrategy" should {
106144
"print values" in {
107145
skipped("used for debugging")
@@ -194,6 +232,92 @@ class AttributeIndexStrategyTest extends Specification with TestWithFeatureType
194232
forall(bins.map(_.lon))(_ mustEqual 55f)
195233
}
196234

235+
"support arrow queries with join queries" in {
236+
foreach(Seq(ds, mergedViewDs)) { ds =>
237+
val query = new Query(sftName, ECQL.toFilter("count>=2"))
238+
query.getHints.put(ARROW_ENCODE, java.lang.Boolean.TRUE)
239+
query.getHints.put(ARROW_SORT_FIELD, "dtg")
240+
query.getHints.put(ARROW_DICTIONARY_FIELDS, "name")
241+
val plans = Option(ds).collect { case ds: AccumuloDataStore => ds.getQueryPlan(query) }.getOrElse(Seq.empty)
242+
forall(plans)(_ must beAnInstanceOf[JoinPlan])
243+
val results = SelfClosingIterator(ds.getFeatureSource(sftName).getFeatures(query).features()).map(_.getAttribute(0)).toList
244+
forall(results)(_ must beAnInstanceOf[Array[Byte]])
245+
val arrows = results.foldLeft(Array.empty[Byte]) { case (res, bytes) => res ++ bytes.asInstanceOf[Array[Byte]] }
246+
def in() = new ByteArrayInputStream(arrows)
247+
WithClose(SimpleFeatureArrowFileReader.streaming(in)) { reader =>
248+
val results = decodeArrow(reader)
249+
results must haveSize(3)
250+
results.map(_.getAttributeCount).distinct mustEqual Seq(sft.getAttributeCount)
251+
results.map(_.getAttribute("name")) must containAllOf(Seq("bill", "bob", "charles"))
252+
results.map(_.getAttribute(sft.indexOf("name"))) must containAllOf(Seq("bill", "bob", "charles"))
253+
}
254+
}
255+
}
256+
257+
"support arrow queries with join queries and transforms" in {
258+
foreach(Seq(ds, mergedViewDs)) { ds =>
259+
val query = new Query(sftName, ECQL.toFilter("count>=2"), "dtg", "geom", "name") // note: swap order
260+
query.getHints.put(ARROW_ENCODE, java.lang.Boolean.TRUE)
261+
query.getHints.put(ARROW_SORT_FIELD, "dtg")
262+
query.getHints.put(ARROW_DICTIONARY_FIELDS, "name")
263+
val plans = Option(ds).collect { case ds: AccumuloDataStore => ds.getQueryPlan(query) }.getOrElse(Seq.empty)
264+
forall(plans)(_ must beAnInstanceOf[JoinPlan])
265+
val results = SelfClosingIterator(ds.getFeatureSource(sftName).getFeatures(query).features()).map(_.getAttribute(0)).toList
266+
forall(results)(_ must beAnInstanceOf[Array[Byte]])
267+
val arrows = results.foldLeft(Array.empty[Byte]) { case (res, bytes) => res ++ bytes.asInstanceOf[Array[Byte]] }
268+
def in() = new ByteArrayInputStream(arrows)
269+
WithClose(SimpleFeatureArrowFileReader.streaming(in)) { reader =>
270+
val results = decodeArrow(reader)
271+
results must haveSize(3)
272+
results.map(_.getAttribute("dtg")) must containAllOf(Seq(billDate, bobDate, charlesDate))
273+
results.map(_.getAttribute(0)) must containAllOf(Seq(billDate, bobDate, charlesDate))
274+
results.map(_.getAttribute("geom")) must containAllOf(Seq(billGeom, bobGeom, charlesGeom))
275+
results.map(_.getAttribute(1)) must containAllOf(Seq(billGeom, bobGeom, charlesGeom))
276+
results.map(_.getAttribute("name")) must containAllOf(Seq("bill", "bob", "charles"))
277+
results.map(_.getAttribute(2)) must containAllOf(Seq("bill", "bob", "charles"))
278+
}
279+
}
280+
}
281+
282+
"support arrow queries against index values" in {
283+
foreach(Seq(ds, mergedViewDs)) { ds =>
284+
val query = new Query(sftName, ECQL.toFilter("count>=2"), "geom", "dtg")
285+
query.getHints.put(ARROW_ENCODE, java.lang.Boolean.TRUE)
286+
query.getHints.put(ARROW_SORT_FIELD, "dtg")
287+
val plans = Option(ds).collect { case ds: AccumuloDataStore => ds.getQueryPlan(query) }.getOrElse(Seq.empty)
288+
forall(plans)(_ must beAnInstanceOf[BatchScanPlan])
289+
val results = SelfClosingIterator(ds.getFeatureSource(sftName).getFeatures(query).features()).map(_.getAttribute(0)).toList
290+
forall(results)(_ must beAnInstanceOf[Array[Byte]])
291+
val arrows = results.foldLeft(Array.empty[Byte]) { case (res, bytes) => res ++ bytes.asInstanceOf[Array[Byte]] }
292+
def in() = new ByteArrayInputStream(arrows)
293+
WithClose(SimpleFeatureArrowFileReader.streaming(in)) { reader =>
294+
val results = decodeArrow(reader)
295+
results must haveSize(3)
296+
results.map(_.getAttribute("dtg")) must containAllOf(Seq(billDate, bobDate, charlesDate))
297+
}
298+
}
299+
}
300+
301+
"support arrow queries against full values" in {
302+
foreach(Seq(ds, mergedViewDs)) { ds =>
303+
val query = new Query(sftName, ECQL.toFilter("name>'amy'"), "geom", "dtg", "count")
304+
query.getHints.put(ARROW_ENCODE, java.lang.Boolean.TRUE)
305+
query.getHints.put(ARROW_SORT_FIELD, "dtg")
306+
query.getHints.put(ARROW_DICTIONARY_FIELDS, "count")
307+
val plans = Option(ds).collect { case ds: AccumuloDataStore => ds.getQueryPlan(query) }.getOrElse(Seq.empty)
308+
forall(plans)(_ must beAnInstanceOf[BatchScanPlan])
309+
val results = SelfClosingIterator(ds.getFeatureSource(sftName).getFeatures(query).features()).map(_.getAttribute(0)).toList
310+
forall(results)(_ must beAnInstanceOf[Array[Byte]])
311+
val arrows = results.foldLeft(Array.empty[Byte]) { case (res, bytes) => res ++ bytes.asInstanceOf[Array[Byte]] }
312+
def in() = new ByteArrayInputStream(arrows)
313+
WithClose(SimpleFeatureArrowFileReader.streaming(in)) { reader =>
314+
val results = decodeArrow(reader)
315+
results must haveSize(3)
316+
results.map(_.getAttribute("count")) must containAllOf(Seq(2, 3, 4))
317+
}
318+
}
319+
}
320+
197321
"correctly query equals with spatio-temporal filter" in {
198322
// height filter matches bob and charles, st filters only match bob
199323
val stFilters = Seq(
@@ -313,37 +437,37 @@ class AttributeIndexStrategyTest extends Specification with TestWithFeatureType
313437

314438
"support sampling" in {
315439
val query = new Query(sftName, ECQL.toFilter("name > 'a'"))
316-
query.getHints.put(SAMPLING, new java.lang.Float(.5f))
440+
query.getHints.put(SAMPLING, Float.box(.5f))
317441
val results = runQuery(query).toList
318442
results must haveLength(2)
319443
}
320444

321445
"support sampling with cql" in {
322446
val query = new Query(sftName, ECQL.toFilter("name > 'a' AND track > 'track'"))
323-
query.getHints.put(SAMPLING, new java.lang.Float(.5f))
447+
query.getHints.put(SAMPLING, Float.box(.5f))
324448
val results = runQuery(query).toList
325449
results must haveLength(2)
326450
}
327451

328452
"support sampling with transformations" in {
329453
val query = new Query(sftName, ECQL.toFilter("name > 'a'"), "name", "geom")
330-
query.getHints.put(SAMPLING, new java.lang.Float(.5f))
454+
query.getHints.put(SAMPLING, Float.box(.5f))
331455
val results = runQuery(query).toList
332456
results must haveLength(2)
333457
forall(results)(_.getAttributeCount mustEqual 2)
334458
}
335459

336460
"support sampling with cql and transformations" in {
337461
val query = new Query(sftName, ECQL.toFilter("name > 'a' AND track > 'track'"), "name", "geom")
338-
query.getHints.put(SAMPLING, new java.lang.Float(.2f))
462+
query.getHints.put(SAMPLING, Float.box(.2f))
339463
val results = runQuery(query).toList
340464
results must haveLength(1)
341465
results.head.getAttributeCount mustEqual 2
342466
}
343467

344468
"support sampling by thread" in {
345469
val query = new Query(sftName, ECQL.toFilter("name > 'a'"))
346-
query.getHints.put(SAMPLING, new java.lang.Float(.5f))
470+
query.getHints.put(SAMPLING, Float.box(.5f))
347471
query.getHints.put(SAMPLE_BY, "track")
348472
val results = runQuery(query).toList
349473
results.length must beLessThan(4) // note: due to sharding and multiple ranges, we don't get exact sampling
@@ -356,7 +480,7 @@ class AttributeIndexStrategyTest extends Specification with TestWithFeatureType
356480
val query = new Query(sftName, ECQL.toFilter("name > 'a'"))
357481
query.getHints.put(BIN_TRACK, "name")
358482
query.getHints.put(BIN_BATCH_SIZE, 1000)
359-
query.getHints.put(SAMPLING, new java.lang.Float(.5f))
483+
query.getHints.put(SAMPLING, Float.box(.5f))
360484
// have to evaluate attributes before pulling into collection, as the same sf is reused
361485
val results = runQuery(query).map(_.getAttribute(BIN_ATTRIBUTE_INDEX)).toList
362486
forall(results)(_ must beAnInstanceOf[Array[Byte]])

0 commit comments

Comments
 (0)