diff --git a/src/engine/SpatialJoinParser.cpp b/src/engine/SpatialJoinParser.cpp index 9412a85594..40a27b3c0a 100644 --- a/src/engine/SpatialJoinParser.cpp +++ b/src/engine/SpatialJoinParser.cpp @@ -13,7 +13,7 @@ namespace ad_utility::detail::parallel_wkt_parser { // _____________________________________________________________________________ WKTParser::WKTParser(sj::Sweeper* sweeper, size_t numThreads, bool usePrefiltering, - const std::optional& prefilterLatLngBox, + const std::optional<::util::geo::DBox>& prefilterLatLngBox, const Index& index) : sj::WKTParserBase(sweeper, numThreads), _numSkipped(numThreads), @@ -66,22 +66,22 @@ void WKTParser::processQueue(size_t t) { parseCounter++; } else if (dt == Datatype::GeoPoint) { const auto& p = job.valueId.getGeoPoint(); - const util::geo::DPoint utilPoint{p.getLng(), p.getLat()}; + const ::util::geo::DPoint utilPoint{p.getLng(), p.getLat()}; // If point is not contained in the prefilter box, we can skip it // immediately instead of feeding it to the parser. if (_prefilterLatLngBox.has_value() && - !util::geo::intersects(_prefilterLatLngBox.value(), utilPoint)) { + !::util::geo::intersects(_prefilterLatLngBox.value(), utilPoint)) { prefilterCounter++; continue; } // parse point directly auto mercPoint = latLngToWebMerc(utilPoint); - util::geo::I32Point addPoint{ + ::util::geo::I32Point addPoint{ static_cast(mercPoint.getX() * PREC), static_cast(mercPoint.getY() * PREC)}; - _bboxes[t] = util::geo::extendBox( + _bboxes[t] = ::util::geo::extendBox( _sweeper->add(addPoint, std::to_string(job.line), job.side, w), _bboxes[t]); parseCounter++; diff --git a/src/engine/SpatialJoinParser.h b/src/engine/SpatialJoinParser.h index 6a4b9ef5e7..ba07682b2b 100644 --- a/src/engine/SpatialJoinParser.h +++ b/src/engine/SpatialJoinParser.h @@ -40,7 +40,7 @@ inline bool operator==(const SpatialJoinParseJob& a, class WKTParser : public sj::WKTParserBase { public: WKTParser(sj::Sweeper* sweeper, size_t numThreads, bool usePrefiltering, - const std::optional& prefilterLatLngBox, + const std::optional<::util::geo::DBox>& prefilterLatLngBox, const Index& index); // Enqueue a new row from the input table (given the `ValueId` of the @@ -69,7 +69,7 @@ class WKTParser : public sj::WKTParserBase { // Configure prefiltering geometries by bounding box. bool _usePrefiltering; - std::optional _prefilterLatLngBox; + std::optional<::util::geo::DBox> _prefilterLatLngBox; // A reference to QLever's index is needed to access precomputed geometry // bounding boxes and to resolve `ValueId`s into WKT literals. diff --git a/src/index/DeltaTriples.cpp b/src/index/DeltaTriples.cpp index 367d6e4c84..50bfc72892 100644 --- a/src/index/DeltaTriples.cpp +++ b/src/index/DeltaTriples.cpp @@ -20,7 +20,6 @@ #include "index/Index.h" #include "index/IndexImpl.h" #include "index/LocatedTriples.h" -#include "util/LruCache.h" #include "util/Serializer/TripleSerializer.h" // ____________________________________________________________________________ @@ -167,16 +166,21 @@ DeltaTriplesCount DeltaTriples::getCounts() const { } // _____________________________________________________________________________ -DeltaTriples::Triples DeltaTriples::makeInternalTriples( - const Triples& triples) { +DeltaTriples::Triples DeltaTriples::makeInternalTriples(const Triples& triples, + bool insertion) { // NOTE: If this logic is ever changed, you need to also change the code // in `IndexBuilderTypes.h`, the function `getIdMapLambdas` specifically, // which adds the same extra triples for language tags to the internal triples // on the initial index build. Triples internalTriples; - constexpr size_t predicateCacheSize = 50; - ad_utility::util::LRUCache - predicateCache{predicateCacheSize}; + // Initialize on first use. + if (languagePredicate_.isUndefined()) { + languagePredicate_ = + TripleComponent{ + ad_utility::triple_component::Iri::fromIriref(LANGUAGE_PREDICATE)} + .toValueId(index_.getVocab(), localVocab_, + index_.encodedIriManager()); + } for (const auto& triple : triples) { const auto& ids = triple.ids(); Id objectId = ids.at(2); @@ -188,28 +192,39 @@ DeltaTriples::Triples DeltaTriples::makeInternalTriples( continue; } const auto& predicate = - predicateCache.getOrCompute(ids.at(1).getBits(), [this](Id::T bits) { + predicateCache_.getOrCompute(ids.at(1).getBits(), [this](Id::T bits) { auto optionalPredicate = ExportQueryExecutionTrees::idToLiteralOrIri( index_, Id::fromBits(bits), localVocab_, true); AD_CORRECTNESS_CHECK(optionalPredicate.has_value()); AD_CORRECTNESS_CHECK(optionalPredicate.value().isIri()); return std::move(optionalPredicate.value().getIri()); }); - auto specialPredicate = ad_utility::convertToLanguageTaggedPredicate( - predicate, - asStringViewUnsafe(optionalLiteralOrIri.value().getLanguageTag())); + auto langtag = + asStringViewUnsafe(optionalLiteralOrIri.value().getLanguageTag()); + auto specialPredicate = + ad_utility::convertToLanguageTaggedPredicate(predicate, langtag); Id specialId = TripleComponent{std::move(specialPredicate)}.toValueId( index_.getVocab(), localVocab_, index_.encodedIriManager()); - // NOTE: We currently only add one of the language triples, specifically - // ` @language@ "object"@language` because it is - // directly tied to a regular triple, so insertion and removal work exactly - // the same. ` ql:langtag <@language>` on the other hand needs - // either some sort of reference counting or we have to keep it - // indefinitely, even if the object is removed. This means that some queries - // will return no results for entries with language tags that were inserted - // via an UPDATE operation. + // Extra triple ` @language@ "object"@language`. internalTriples.push_back( IdTriple<0>{std::array{ids.at(0), specialId, ids.at(2), ids.at(3)}}); + Id langtagId = + languageTagCache_.getOrCompute(langtag, [this](const std::string& tag) { + return TripleComponent{ad_utility::convertLangtagToEntityUri(tag)} + .toValueId(index_.getVocab(), localVocab_, + index_.encodedIriManager()); + }); + + // Because we don't track the exact counts of existing objects, we just + // conservatively add these internal triples on insertion, and never remove + // them. This is inefficient, but never wrong because queries that use these + // internal triples will always join these internal triples with a regular + // index scan. + if (insertion) { + // Extra triple `"object"@language ql:langtag <@language>`. + internalTriples.push_back(IdTriple<0>{ + std::array{ids.at(2), languagePredicate_, langtagId, ids.at(3)}}); + } } // Because of the special predicates, we need to re-sort the triples. ql::ranges::sort(internalTriples); @@ -221,7 +236,7 @@ void DeltaTriples::insertTriples(CancellationHandle cancellationHandle, Triples triples, ad_utility::timer::TimeTracer& tracer) { tracer.beginTrace("makeInternalTriples"); - auto internalTriples = makeInternalTriples(triples); + auto internalTriples = makeInternalTriples(triples, true); tracer.endTrace("makeInternalTriples"); tracer.beginTrace("externalPermutation"); modifyTriplesImpl(cancellationHandle, std::move(triples), @@ -240,7 +255,7 @@ void DeltaTriples::deleteTriples(CancellationHandle cancellationHandle, Triples triples, ad_utility::timer::TimeTracer& tracer) { tracer.beginTrace("makeInternalTriples"); - auto internalTriples = makeInternalTriples(triples); + auto internalTriples = makeInternalTriples(triples, false); tracer.endTrace("makeInternalTriples"); tracer.beginTrace("externalPermutation"); modifyTriplesImpl(cancellationHandle, std::move(triples), @@ -417,6 +432,9 @@ DeltaTriplesCount operator-(const DeltaTriplesCount& lhs, DeltaTriples::DeltaTriples(const Index& index) : DeltaTriples(index.getImpl()) {} +// ____________________________________________________________________________ +DeltaTriples::DeltaTriples(const IndexImpl& index) : index_{index} {} + // ____________________________________________________________________________ DeltaTriplesManager::DeltaTriplesManager(const IndexImpl& index) : deltaTriples_{index}, diff --git a/src/index/DeltaTriples.h b/src/index/DeltaTriples.h index bb9057686a..a73af54e7a 100644 --- a/src/index/DeltaTriples.h +++ b/src/index/DeltaTriples.h @@ -19,6 +19,7 @@ #include "index/IndexBuilderTypes.h" #include "index/LocatedTriples.h" #include "index/Permutation.h" +#include "util/LruCache.h" #include "util/Synchronized.h" #include "util/TimeTracer.h" @@ -128,6 +129,23 @@ class DeltaTriples { // See the documentation of `setPersist()` below. std::optional filenameForPersisting_; + // Store the id of the `ql:langtag` predicate to avoid repeated disk lookups. + // This is initialized on first use. + Id languagePredicate_ = Id::makeUndefined(); + + // Store commonly used language tags of the form `<@lang>` to avoid repeated + // disk lookups. + static constexpr size_t languageTagCacheSize_ = 1000; + ad_utility::util::LRUCache languageTagCache_{ + languageTagCacheSize_}; + + // Cache commonly used predicates and their IRI representation between calls + // of `makeInternalTriples`. For example in wikidata `wdt:P31`, or `wdt:P279` + // are frequently used, so we try to avoid an expensive lookup from disk. + static constexpr size_t predicateCacheSize_ = 1000; + ad_utility::util::LRUCache + predicateCache_{predicateCacheSize_}; + // Assert that the Permutation Enum values have the expected int values. // This is used to store and lookup items that exist for permutation in an // array. @@ -167,7 +185,7 @@ class DeltaTriples { public: // Construct for given index. explicit DeltaTriples(const Index& index); - explicit DeltaTriples(const IndexImpl& index) : index_{index} {} + explicit DeltaTriples(const IndexImpl& index); // Disable accidental copying. DeltaTriples(const DeltaTriples&) = delete; @@ -213,8 +231,11 @@ class DeltaTriples { // bunch of triples to be inserted into the internal permutation to make // things like efficient language filters work. This currently performs a // lookup from disk to check the language tag, but in the future this may be - // implemented more efficiently. - Triples makeInternalTriples(const Triples& triples); + // implemented more efficiently. If `insertion` is false, this indicates that + // the triples are meant for deletion. In that case no triples are returned + // that may be unsafe to delete. In particular this refers to triples of the + // form ` ql:langtag <@language>`. + Triples makeInternalTriples(const Triples& triples, bool insertion); // Insert triples. void insertTriples(CancellationHandle cancellationHandle, Triples triples, diff --git a/src/rdfTypes/GeometryInfoHelpersImpl.h b/src/rdfTypes/GeometryInfoHelpersImpl.h index a522df27f9..86ce7521f9 100644 --- a/src/rdfTypes/GeometryInfoHelpersImpl.h +++ b/src/rdfTypes/GeometryInfoHelpersImpl.h @@ -46,7 +46,7 @@ using ParsedWkt = MultiPoint, MultiLine, MultiPolygon, Collection>; using ParseResult = std::pair>; -using DAnyGeometry = util::geo::AnyGeometry; +using DAnyGeometry = AnyGeometry; template CPP_concept WktSingleGeometryType = @@ -245,19 +245,16 @@ inline std::optional wktTypeToIri(uint8_t type) { // Reverse projection applied by `sj::WKTParser`: convert coordinates from web // mercator int32 to normal lat-long double coordinates. -inline util::geo::DPoint projectInt32WebMercToDoubleLatLng( - const util::geo::I32Point& p) { - return util::geo::webMercToLatLng( - static_cast(p.getX()) / PREC, - static_cast(p.getY()) / PREC); -}; +inline DPoint projectInt32WebMercToDoubleLatLng(const I32Point& p) { + return webMercToLatLng(static_cast(p.getX()) / PREC, + static_cast(p.getY()) / PREC); +} // Same as above, but for a bounding box. -inline util::geo::DBox projectInt32WebMercToDoubleLatLng( - const util::geo::I32Box& box) { +inline DBox projectInt32WebMercToDoubleLatLng(const I32Box& box) { return {projectInt32WebMercToDoubleLatLng(box.getLowerLeft()), projectInt32WebMercToDoubleLatLng(box.getUpperRight())}; -}; +} // Counts the number of geometries in a geometry collection. inline uint32_t countChildGeometries(const ParsedWkt& geom) { @@ -657,7 +654,7 @@ struct MetricDistanceVisitor { // Delegate the actual distance computation to `pb_util`. CPP_template(typename T, typename U)(requires IsPairOfUtilGeoms) double operator()(const T& a, const U& b) const { - return util::geo::webMercMeterDist(a, b); + return webMercMeterDist(a, b); } // Handle optional geometries that may be contained in a `ParseResult`. diff --git a/src/util/LruCache.h b/src/util/LruCache.h index 55276b5853..fc2fafa2a6 100644 --- a/src/util/LruCache.h +++ b/src/util/LruCache.h @@ -35,9 +35,9 @@ class LRUCache { // found. Otherwise, compute the value using `computeFunction` and store it in // the cache. If the cache is already at maximum capacity, evict the least // recently used element. - CPP_template(typename Func)( + CPP_template(typename Key, typename Func)( requires ad_utility::InvocableWithConvertibleReturnType< - Func, V, const K&>) const V& getOrCompute(const K& key, + Func, V, const K&>) const V& getOrCompute(Key&& key, Func computeFunction) { auto it = cache_.find(key); if (it != cache_.end()) { @@ -56,9 +56,10 @@ class LRUCache { lruKey = key; } else { // Push new element if not full - keys_.push_front(key); + keys_.push_front(K{key}); } - auto result = cache_.try_emplace(key, computeFunction(key), keys_.begin()); + auto result = cache_.try_emplace( + AD_FWD(key), computeFunction(keys_.front()), keys_.begin()); AD_CORRECTNESS_CHECK(result.second); return result.first->second.first; } diff --git a/test/DeltaTriplesTest.cpp b/test/DeltaTriplesTest.cpp index 358fb56b1b..beddf635b0 100644 --- a/test/DeltaTriplesTest.cpp +++ b/test/DeltaTriplesTest.cpp @@ -443,6 +443,10 @@ TEST_F(DeltaTriplesTest, insertTriplesAndDeleteTriples) { " \"def\"@de", " \"def\"@es"})); auto a = iri(""); auto b = iri(""); + auto lp = iri(LANGUAGE_PREDICATE); + auto de = TripleComponent{ad_utility::convertLangtagToEntityUri("de")}; + auto en = TripleComponent{ad_utility::convertLangtagToEntityUri("en")}; + auto es = TripleComponent{ad_utility::convertLangtagToEntityUri("es")}; EXPECT_THAT(deltaTriples, TriplesAre({{a, b, TripleComponent{1}}, {a, b, lit("\"abc\"")}, @@ -453,7 +457,11 @@ TEST_F(DeltaTriplesTest, insertTriplesAndDeleteTriples) { {a, iri(""), lit("\"def\"@de")}, {a, iri(""), lit("\"def\"@es")}}, {}, - {{a, iri("@de@"), lit("\"abc\"@de")}, + {{lit("\"abc\"@de"), lp, de}, + {lit("\"abc\"@en"), lp, en}, + {lit("\"def\"@de"), lp, de}, + {lit("\"def\"@es"), lp, es}, + {a, iri("@de@"), lit("\"abc\"@de")}, {a, iri("@en@"), lit("\"abc\"@en")}, {a, iri("@de@"), lit("\"def\"@de")}, {a, iri("@es@"), lit("\"def\"@es")}}, @@ -477,7 +485,10 @@ TEST_F(DeltaTriplesTest, insertTriplesAndDeleteTriples) { {a, b, iri("")}, {a, iri(""), lit("\"def\"@de")}, {a, iri(""), lit("\"def\"@es")}}, - {}, + {{lit("\"abc\"@de"), lp, de}, + {lit("\"abc\"@en"), lp, en}, + {lit("\"def\"@de"), lp, de}, + {lit("\"def\"@es"), lp, es}}, {{a, iri("@de@"), lit("\"abc\"@de")}, {a, iri("@en@"), lit("\"abc\"@en")}, {a, iri("@de@"), lit("\"def\"@de")}, diff --git a/test/ExecuteUpdateTest.cpp b/test/ExecuteUpdateTest.cpp index acaeefc9a1..48a75f2a57 100644 --- a/test/ExecuteUpdateTest.cpp +++ b/test/ExecuteUpdateTest.cpp @@ -162,14 +162,14 @@ TEST(ExecuteUpdate, executeUpdate) { expectExecuteUpdate("DROP SILENT ALL", NumTriples(0, 8, 8, 0, 1)); expectExecuteUpdate("ADD TO ", NumTriples(0, 0, 0)); expectExecuteUpdate("ADD SILENT TO DEFAULT", NumTriples(0, 0, 0)); - expectExecuteUpdate("ADD DEFAULT TO ", NumTriples(8, 0, 8, 1, 0)); + expectExecuteUpdate("ADD DEFAULT TO ", NumTriples(8, 0, 8, 2, 0)); expectExecuteUpdate("ADD SILENT DEFAULT TO DEFAULT", NumTriples(0, 0, 0)); expectExecuteUpdate("MOVE SILENT DEFAULT TO DEFAULT", NumTriples(0, 0, 0)); expectExecuteUpdate("MOVE GRAPH TO ", NumTriples(0, 0, 0)); expectExecuteUpdate("MOVE TO DEFAULT", NumTriples(0, 8, 8, 0, 1)); expectExecuteUpdate("MOVE DEFAULT TO GRAPH ", - NumTriples(8, 8, 16, 1, 1)); - expectExecuteUpdate("COPY DEFAULT TO ", NumTriples(8, 0, 8, 1, 0)); + NumTriples(8, 8, 16, 2, 1)); + expectExecuteUpdate("COPY DEFAULT TO ", NumTriples(8, 0, 8, 2, 0)); expectExecuteUpdate("COPY DEFAULT TO DEFAULT", NumTriples(0, 0, 0)); expectExecuteUpdate("COPY TO DEFAULT", NumTriples(0, 8, 8, 0, 1)); expectExecuteUpdate("CREATE SILENT GRAPH ", NumTriples(0, 0, 0)); @@ -198,17 +198,17 @@ TEST(ExecuteUpdate, executeUpdate) { expectExecuteUpdate("DROP SILENT ALL", NumTriples(0, 7, 7, 0, 3)); expectExecuteUpdate("ADD TO ", NumTriples(0, 0, 0)); expectExecuteUpdate("ADD TO ", NumTriples(0, 0, 0)); - expectExecuteUpdate("ADD SILENT TO DEFAULT", NumTriples(3, 0, 3, 2, 0)); + expectExecuteUpdate("ADD SILENT TO DEFAULT", NumTriples(3, 0, 3, 4, 0)); expectExecuteUpdate("ADD DEFAULT TO ", NumTriples(1, 0, 1)); expectExecuteUpdate("ADD SILENT DEFAULT TO DEFAULT", NumTriples(0, 0, 0)); expectExecuteUpdate("MOVE SILENT DEFAULT TO DEFAULT", NumTriples(0, 0, 0)); - expectExecuteUpdate("MOVE GRAPH TO ", NumTriples(3, 3, 6, 2, 2)); - expectExecuteUpdate("MOVE TO DEFAULT", NumTriples(3, 4, 7, 2, 2)); + expectExecuteUpdate("MOVE GRAPH TO ", NumTriples(3, 3, 6, 4, 2)); + expectExecuteUpdate("MOVE TO DEFAULT", NumTriples(3, 4, 7, 4, 2)); expectExecuteUpdate("MOVE DEFAULT TO GRAPH ", NumTriples(1, 1, 2)); expectExecuteUpdate("MOVE DEFAULT TO GRAPH ", NumTriples(1, 4, 5, 0, 2)); expectExecuteUpdate("COPY DEFAULT TO ", NumTriples(1, 3, 4, 0, 2)); expectExecuteUpdate("COPY DEFAULT TO DEFAULT", NumTriples(0, 0, 0)); - expectExecuteUpdate("COPY TO DEFAULT", NumTriples(3, 1, 4, 2, 0)); + expectExecuteUpdate("COPY TO DEFAULT", NumTriples(3, 1, 4, 4, 0)); expectExecuteUpdate("CREATE SILENT GRAPH ", NumTriples(0, 0, 0)); expectExecuteUpdate("CREATE GRAPH ", NumTriples(0, 0, 0)); } diff --git a/test/IndexTest.cpp b/test/IndexTest.cpp index 506be61ee1..0b84f01727 100644 --- a/test/IndexTest.cpp +++ b/test/IndexTest.cpp @@ -738,7 +738,7 @@ TEST(IndexImpl, recomputeStatistics) { auto newStats = indexImpl.recomputeStatistics( index.deltaTriplesManager().getCurrentLocatedTriplesSharedState()); EXPECT_NE(newStats, indexImpl.configurationJson_); - EXPECT_EQ(newStats["num-triples"], NNAI(5, 6)); + EXPECT_EQ(newStats["num-triples"], NNAI(5, 7)); EXPECT_EQ(newStats["num-predicates"], NNAI(2, 4)); if (loadAllPermutations) { EXPECT_EQ(newStats["num-subjects"], NNAI(4, 0)); diff --git a/test/LruCacheTest.cpp b/test/LruCacheTest.cpp index ece1928aae..4767f9c9cf 100644 --- a/test/LruCacheTest.cpp +++ b/test/LruCacheTest.cpp @@ -11,31 +11,31 @@ TEST(LRUCache, testLruCache) { // Type-erase the lambdas to get a better coverage report. using F = std::function; - EXPECT_EQ(1, cache.getOrCompute(1, [](int i) { - EXPECT_EQ(i, 1); - return 1; - })); - EXPECT_EQ(2, cache.getOrCompute(2, [](int i) { - EXPECT_EQ(i, 2); - return 2; - })); + EXPECT_EQ(1, (cache.getOrCompute(1, [](int i) { + EXPECT_EQ(i, 1); + return 1; + }))); + EXPECT_EQ(2, (cache.getOrCompute(2, [](int i) { + EXPECT_EQ(i, 2); + return 2; + }))); - EXPECT_EQ(1, cache.getOrCompute(1, [](int) { - ADD_FAILURE(); - return 0; - })); - EXPECT_EQ(3, cache.getOrCompute(3, [](int i) { - EXPECT_EQ(i, 3); - return 3; - })); - EXPECT_EQ(20, cache.getOrCompute(2, [](int i) { - EXPECT_EQ(i, 2); - return 20; - })); - EXPECT_EQ(10, cache.getOrCompute(1, [](int i) { - EXPECT_EQ(i, 1); - return 10; - })); + EXPECT_EQ(1, (cache.getOrCompute(1, [](int) { + ADD_FAILURE(); + return 0; + }))); + EXPECT_EQ(3, (cache.getOrCompute(3, [](int i) { + EXPECT_EQ(i, 3); + return 3; + }))); + EXPECT_EQ(20, (cache.getOrCompute(2, [](int i) { + EXPECT_EQ(i, 2); + return 20; + }))); + EXPECT_EQ(10, (cache.getOrCompute(1, [](int i) { + EXPECT_EQ(i, 1); + return 10; + }))); } // _____________________________________________________________________________