From d1090bcf8c7c46efe6fc2995b1251000d6953f37 Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Wed, 28 Jan 2026 14:41:40 +0100 Subject: [PATCH 1/7] Add missing internal triples for UPDATEs --- src/index/DeltaTriples.cpp | 46 +++++++++++++++++++++++++------------- src/index/DeltaTriples.h | 7 ++++-- test/DeltaTriplesTest.cpp | 15 +++++++++++-- test/ExecuteUpdateTest.cpp | 14 ++++++------ test/IndexTest.cpp | 2 +- 5 files changed, 57 insertions(+), 27 deletions(-) diff --git a/src/index/DeltaTriples.cpp b/src/index/DeltaTriples.cpp index 0167e601ae..f065bad5a3 100644 --- a/src/index/DeltaTriples.cpp +++ b/src/index/DeltaTriples.cpp @@ -167,8 +167,8 @@ DeltaTriplesCount DeltaTriples::getCounts() const { } // _____________________________________________________________________________ -DeltaTriples::Triples DeltaTriples::makeInternalTriples( - const Triples& triples) { +DeltaTriples::Triples DeltaTriples::makeInternalTriples(const Triples& triples, + bool insertion) { // NOTE: If this logic is ever changed, you need to also change the code // in `IndexBuilderTypes.h`, the function `getIdMapLambdas` specifically, // which adds the same extra triples for language tags to the internal triples @@ -177,6 +177,14 @@ DeltaTriples::Triples DeltaTriples::makeInternalTriples( constexpr size_t predicateCacheSize = 50; ad_utility::util::LRUCache predicateCache{predicateCacheSize}; + + // Don't compute the id if it's not used by the code below. + Id languagePredicate = + insertion ? TripleComponent{ad_utility::triple_component::Iri::fromIriref( + LANGUAGE_PREDICATE)} + .toValueId(index_.getVocab(), localVocab_, + index_.encodedIriManager()) + : Id::makeUndefined(); for (const auto& triple : triples) { const auto& ids = triple.ids(); Id objectId = ids.at(2); @@ -195,21 +203,29 @@ DeltaTriples::Triples DeltaTriples::makeInternalTriples( AD_CORRECTNESS_CHECK(optionalPredicate.value().isIri()); return std::move(optionalPredicate.value().getIri()); }); - auto specialPredicate = ad_utility::convertToLanguageTaggedPredicate( - predicate, - asStringViewUnsafe(optionalLiteralOrIri.value().getLanguageTag())); + auto langtag = + asStringViewUnsafe(optionalLiteralOrIri.value().getLanguageTag()); + auto specialPredicate = + ad_utility::convertToLanguageTaggedPredicate(predicate, langtag); Id specialId = TripleComponent{std::move(specialPredicate)}.toValueId( index_.getVocab(), localVocab_, index_.encodedIriManager()); - // NOTE: We currently only add one of the language triples, specifically - // ` @language@ "object"@language` because it is - // directly tied to a regular triple, so insertion and removal work exactly - // the same. ` ql:langtag <@language>` on the other hand needs - // either some sort of reference counting or we have to keep it - // indefinitely, even if the object is removed. This means that some queries - // will return no results for entries with language tags that were inserted - // via an UPDATE operation. + // Extra triple ` @language@ "object"@language`. internalTriples.push_back( IdTriple<0>{std::array{ids.at(0), specialId, ids.at(2), ids.at(3)}}); + Id langtagId = + TripleComponent{ad_utility::convertLangtagToEntityUri(langtag)} + .toValueId(index_.getVocab(), localVocab_, + index_.encodedIriManager()); + // Because we don't track the exact counts of existing objects, we just + // conservatively add these internal triples on insertion, and never remove + // them. This is inefficient, but never wrong because queries that use these + // internal triples will always join these internal triples with a regular + // index scan. + if (insertion) { + // Extra triple ` ql:langtag <@language>`. + internalTriples.push_back(IdTriple<0>{ + std::array{ids.at(2), languagePredicate, langtagId, ids.at(3)}}); + } } // Because of the special predicates, we need to re-sort the triples. ql::ranges::sort(internalTriples); @@ -221,7 +237,7 @@ void DeltaTriples::insertTriples(CancellationHandle cancellationHandle, Triples triples, ad_utility::timer::TimeTracer& tracer) { tracer.beginTrace("makeInternalTriples"); - auto internalTriples = makeInternalTriples(triples); + auto internalTriples = makeInternalTriples(triples, true); tracer.endTrace("makeInternalTriples"); tracer.beginTrace("externalPermutation"); modifyTriplesImpl(cancellationHandle, std::move(triples), @@ -240,7 +256,7 @@ void DeltaTriples::deleteTriples(CancellationHandle cancellationHandle, Triples triples, ad_utility::timer::TimeTracer& tracer) { tracer.beginTrace("makeInternalTriples"); - auto internalTriples = makeInternalTriples(triples); + auto internalTriples = makeInternalTriples(triples, false); tracer.endTrace("makeInternalTriples"); tracer.beginTrace("externalPermutation"); modifyTriplesImpl(cancellationHandle, std::move(triples), diff --git a/src/index/DeltaTriples.h b/src/index/DeltaTriples.h index d3202492a7..20b8d6dc18 100644 --- a/src/index/DeltaTriples.h +++ b/src/index/DeltaTriples.h @@ -213,8 +213,11 @@ class DeltaTriples { // bunch of triples to be inserted into the internal permutation to make // things like efficient language filters work. This currently performs a // lookup from disk to check the language tag, but in the future this may be - // implemented more efficiently. - Triples makeInternalTriples(const Triples& triples); + // implemented more efficiently. If `insertion` is false, then some internal + // triples will not be created, in particular the triple of the form + // ` ql:langtag <@language>`, which is not always safe to delete, + // but doesn't cause issues when it's present. + Triples makeInternalTriples(const Triples& triples, bool insertion); // Insert triples. void insertTriples(CancellationHandle cancellationHandle, Triples triples, diff --git a/test/DeltaTriplesTest.cpp b/test/DeltaTriplesTest.cpp index fc72521010..6f26167a55 100644 --- a/test/DeltaTriplesTest.cpp +++ b/test/DeltaTriplesTest.cpp @@ -443,6 +443,10 @@ TEST_F(DeltaTriplesTest, insertTriplesAndDeleteTriples) { " \"def\"@de", " \"def\"@es"})); auto a = iri(""); auto b = iri(""); + auto lp = iri(LANGUAGE_PREDICATE); + auto de = TripleComponent{ad_utility::convertLangtagToEntityUri("de")}; + auto en = TripleComponent{ad_utility::convertLangtagToEntityUri("en")}; + auto es = TripleComponent{ad_utility::convertLangtagToEntityUri("es")}; EXPECT_THAT(deltaTriples, TriplesAre({{a, b, TripleComponent{1}}, {a, b, lit("\"abc\"")}, @@ -453,7 +457,11 @@ TEST_F(DeltaTriplesTest, insertTriplesAndDeleteTriples) { {a, iri(""), lit("\"def\"@de")}, {a, iri(""), lit("\"def\"@es")}}, {}, - {{a, iri("@de@"), lit("\"abc\"@de")}, + {{lit("\"abc\"@de"), lp, de}, + {lit("\"abc\"@en"), lp, en}, + {lit("\"def\"@de"), lp, de}, + {lit("\"def\"@es"), lp, es}, + {a, iri("@de@"), lit("\"abc\"@de")}, {a, iri("@en@"), lit("\"abc\"@en")}, {a, iri("@de@"), lit("\"def\"@de")}, {a, iri("@es@"), lit("\"def\"@es")}}, @@ -477,7 +485,10 @@ TEST_F(DeltaTriplesTest, insertTriplesAndDeleteTriples) { {a, b, iri("")}, {a, iri(""), lit("\"def\"@de")}, {a, iri(""), lit("\"def\"@es")}}, - {}, + {{lit("\"abc\"@de"), lp, de}, + {lit("\"abc\"@en"), lp, en}, + {lit("\"def\"@de"), lp, de}, + {lit("\"def\"@es"), lp, es}}, {{a, iri("@de@"), lit("\"abc\"@de")}, {a, iri("@en@"), lit("\"abc\"@en")}, {a, iri("@de@"), lit("\"def\"@de")}, diff --git a/test/ExecuteUpdateTest.cpp b/test/ExecuteUpdateTest.cpp index acaeefc9a1..48a75f2a57 100644 --- a/test/ExecuteUpdateTest.cpp +++ b/test/ExecuteUpdateTest.cpp @@ -162,14 +162,14 @@ TEST(ExecuteUpdate, executeUpdate) { expectExecuteUpdate("DROP SILENT ALL", NumTriples(0, 8, 8, 0, 1)); expectExecuteUpdate("ADD TO ", NumTriples(0, 0, 0)); expectExecuteUpdate("ADD SILENT TO DEFAULT", NumTriples(0, 0, 0)); - expectExecuteUpdate("ADD DEFAULT TO ", NumTriples(8, 0, 8, 1, 0)); + expectExecuteUpdate("ADD DEFAULT TO ", NumTriples(8, 0, 8, 2, 0)); expectExecuteUpdate("ADD SILENT DEFAULT TO DEFAULT", NumTriples(0, 0, 0)); expectExecuteUpdate("MOVE SILENT DEFAULT TO DEFAULT", NumTriples(0, 0, 0)); expectExecuteUpdate("MOVE GRAPH TO ", NumTriples(0, 0, 0)); expectExecuteUpdate("MOVE TO DEFAULT", NumTriples(0, 8, 8, 0, 1)); expectExecuteUpdate("MOVE DEFAULT TO GRAPH ", - NumTriples(8, 8, 16, 1, 1)); - expectExecuteUpdate("COPY DEFAULT TO ", NumTriples(8, 0, 8, 1, 0)); + NumTriples(8, 8, 16, 2, 1)); + expectExecuteUpdate("COPY DEFAULT TO ", NumTriples(8, 0, 8, 2, 0)); expectExecuteUpdate("COPY DEFAULT TO DEFAULT", NumTriples(0, 0, 0)); expectExecuteUpdate("COPY TO DEFAULT", NumTriples(0, 8, 8, 0, 1)); expectExecuteUpdate("CREATE SILENT GRAPH ", NumTriples(0, 0, 0)); @@ -198,17 +198,17 @@ TEST(ExecuteUpdate, executeUpdate) { expectExecuteUpdate("DROP SILENT ALL", NumTriples(0, 7, 7, 0, 3)); expectExecuteUpdate("ADD TO ", NumTriples(0, 0, 0)); expectExecuteUpdate("ADD TO ", NumTriples(0, 0, 0)); - expectExecuteUpdate("ADD SILENT TO DEFAULT", NumTriples(3, 0, 3, 2, 0)); + expectExecuteUpdate("ADD SILENT TO DEFAULT", NumTriples(3, 0, 3, 4, 0)); expectExecuteUpdate("ADD DEFAULT TO ", NumTriples(1, 0, 1)); expectExecuteUpdate("ADD SILENT DEFAULT TO DEFAULT", NumTriples(0, 0, 0)); expectExecuteUpdate("MOVE SILENT DEFAULT TO DEFAULT", NumTriples(0, 0, 0)); - expectExecuteUpdate("MOVE GRAPH TO ", NumTriples(3, 3, 6, 2, 2)); - expectExecuteUpdate("MOVE TO DEFAULT", NumTriples(3, 4, 7, 2, 2)); + expectExecuteUpdate("MOVE GRAPH TO ", NumTriples(3, 3, 6, 4, 2)); + expectExecuteUpdate("MOVE TO DEFAULT", NumTriples(3, 4, 7, 4, 2)); expectExecuteUpdate("MOVE DEFAULT TO GRAPH ", NumTriples(1, 1, 2)); expectExecuteUpdate("MOVE DEFAULT TO GRAPH ", NumTriples(1, 4, 5, 0, 2)); expectExecuteUpdate("COPY DEFAULT TO ", NumTriples(1, 3, 4, 0, 2)); expectExecuteUpdate("COPY DEFAULT TO DEFAULT", NumTriples(0, 0, 0)); - expectExecuteUpdate("COPY TO DEFAULT", NumTriples(3, 1, 4, 2, 0)); + expectExecuteUpdate("COPY TO DEFAULT", NumTriples(3, 1, 4, 4, 0)); expectExecuteUpdate("CREATE SILENT GRAPH ", NumTriples(0, 0, 0)); expectExecuteUpdate("CREATE GRAPH ", NumTriples(0, 0, 0)); } diff --git a/test/IndexTest.cpp b/test/IndexTest.cpp index 2311d35db8..7bb8bf09b6 100644 --- a/test/IndexTest.cpp +++ b/test/IndexTest.cpp @@ -713,7 +713,7 @@ TEST(IndexImpl, recomputeStatistics) { auto newStats = indexImpl.recomputeStatistics( index.deltaTriplesManager().getCurrentLocatedTriplesSharedState()); EXPECT_NE(newStats, indexImpl.configurationJson_); - EXPECT_EQ(newStats["num-triples"], NNAI(5, 6)); + EXPECT_EQ(newStats["num-triples"], NNAI(5, 7)); EXPECT_EQ(newStats["num-predicates"], NNAI(2, 4)); if (loadAllPermutations) { EXPECT_EQ(newStats["num-subjects"], NNAI(4, 0)); From e700ff84c04bfbb0c37d161b72978a200ba4426c Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Thu, 29 Jan 2026 12:19:29 +0100 Subject: [PATCH 2/7] Address PR comments --- src/engine/SpatialJoinParser.cpp | 10 +++++----- src/engine/SpatialJoinParser.h | 4 ++-- src/index/DeltaTriples.cpp | 14 ++++++++------ src/index/DeltaTriples.h | 15 +++++++++++---- src/rdfTypes/GeometryInfoHelpersImpl.h | 19 ++++++++----------- 5 files changed, 34 insertions(+), 28 deletions(-) diff --git a/src/engine/SpatialJoinParser.cpp b/src/engine/SpatialJoinParser.cpp index 9412a85594..40a27b3c0a 100644 --- a/src/engine/SpatialJoinParser.cpp +++ b/src/engine/SpatialJoinParser.cpp @@ -13,7 +13,7 @@ namespace ad_utility::detail::parallel_wkt_parser { // _____________________________________________________________________________ WKTParser::WKTParser(sj::Sweeper* sweeper, size_t numThreads, bool usePrefiltering, - const std::optional& prefilterLatLngBox, + const std::optional<::util::geo::DBox>& prefilterLatLngBox, const Index& index) : sj::WKTParserBase(sweeper, numThreads), _numSkipped(numThreads), @@ -66,22 +66,22 @@ void WKTParser::processQueue(size_t t) { parseCounter++; } else if (dt == Datatype::GeoPoint) { const auto& p = job.valueId.getGeoPoint(); - const util::geo::DPoint utilPoint{p.getLng(), p.getLat()}; + const ::util::geo::DPoint utilPoint{p.getLng(), p.getLat()}; // If point is not contained in the prefilter box, we can skip it // immediately instead of feeding it to the parser. if (_prefilterLatLngBox.has_value() && - !util::geo::intersects(_prefilterLatLngBox.value(), utilPoint)) { + !::util::geo::intersects(_prefilterLatLngBox.value(), utilPoint)) { prefilterCounter++; continue; } // parse point directly auto mercPoint = latLngToWebMerc(utilPoint); - util::geo::I32Point addPoint{ + ::util::geo::I32Point addPoint{ static_cast(mercPoint.getX() * PREC), static_cast(mercPoint.getY() * PREC)}; - _bboxes[t] = util::geo::extendBox( + _bboxes[t] = ::util::geo::extendBox( _sweeper->add(addPoint, std::to_string(job.line), job.side, w), _bboxes[t]); parseCounter++; diff --git a/src/engine/SpatialJoinParser.h b/src/engine/SpatialJoinParser.h index 6a4b9ef5e7..ba07682b2b 100644 --- a/src/engine/SpatialJoinParser.h +++ b/src/engine/SpatialJoinParser.h @@ -40,7 +40,7 @@ inline bool operator==(const SpatialJoinParseJob& a, class WKTParser : public sj::WKTParserBase { public: WKTParser(sj::Sweeper* sweeper, size_t numThreads, bool usePrefiltering, - const std::optional& prefilterLatLngBox, + const std::optional<::util::geo::DBox>& prefilterLatLngBox, const Index& index); // Enqueue a new row from the input table (given the `ValueId` of the @@ -69,7 +69,7 @@ class WKTParser : public sj::WKTParserBase { // Configure prefiltering geometries by bounding box. bool _usePrefiltering; - std::optional _prefilterLatLngBox; + std::optional<::util::geo::DBox> _prefilterLatLngBox; // A reference to QLever's index is needed to access precomputed geometry // bounding boxes and to resolve `ValueId`s into WKT literals. diff --git a/src/index/DeltaTriples.cpp b/src/index/DeltaTriples.cpp index f065bad5a3..eaf7a7d1d0 100644 --- a/src/index/DeltaTriples.cpp +++ b/src/index/DeltaTriples.cpp @@ -20,7 +20,6 @@ #include "index/Index.h" #include "index/IndexImpl.h" #include "index/LocatedTriples.h" -#include "util/LruCache.h" #include "util/Serializer/TripleSerializer.h" // ____________________________________________________________________________ @@ -212,17 +211,20 @@ DeltaTriples::Triples DeltaTriples::makeInternalTriples(const Triples& triples, // Extra triple ` @language@ "object"@language`. internalTriples.push_back( IdTriple<0>{std::array{ids.at(0), specialId, ids.at(2), ids.at(3)}}); - Id langtagId = - TripleComponent{ad_utility::convertLangtagToEntityUri(langtag)} - .toValueId(index_.getVocab(), localVocab_, - index_.encodedIriManager()); + Id langtagId = languageTagCache_.getOrCompute( + std::string{langtag}, [this](const std::string& tag) { + return TripleComponent{ad_utility::convertLangtagToEntityUri(tag)} + .toValueId(index_.getVocab(), localVocab_, + index_.encodedIriManager()); + }); + // Because we don't track the exact counts of existing objects, we just // conservatively add these internal triples on insertion, and never remove // them. This is inefficient, but never wrong because queries that use these // internal triples will always join these internal triples with a regular // index scan. if (insertion) { - // Extra triple ` ql:langtag <@language>`. + // Extra triple `"object"@language ql:langtag <@language>`. internalTriples.push_back(IdTriple<0>{ std::array{ids.at(2), languagePredicate, langtagId, ids.at(3)}}); } diff --git a/src/index/DeltaTriples.h b/src/index/DeltaTriples.h index 20b8d6dc18..eae9ba98c6 100644 --- a/src/index/DeltaTriples.h +++ b/src/index/DeltaTriples.h @@ -19,6 +19,7 @@ #include "index/IndexBuilderTypes.h" #include "index/LocatedTriples.h" #include "index/Permutation.h" +#include "util/LruCache.h" #include "util/Synchronized.h" #include "util/TimeTracer.h" @@ -128,6 +129,12 @@ class DeltaTriples { // See the documentation of `setPersist()` below. std::optional filenameForPersisting_; + // Store commonly used language tags of the form `<@lang>` to avoid repeated + // disk lookups. + static constexpr size_t languageTagCacheSize_ = 20; + ad_utility::util::LRUCache languageTagCache_{ + languageTagCacheSize_}; + // Assert that the Permutation Enum values have the expected int values. // This is used to store and lookup items that exist for permutation in an // array. @@ -213,10 +220,10 @@ class DeltaTriples { // bunch of triples to be inserted into the internal permutation to make // things like efficient language filters work. This currently performs a // lookup from disk to check the language tag, but in the future this may be - // implemented more efficiently. If `insertion` is false, then some internal - // triples will not be created, in particular the triple of the form - // ` ql:langtag <@language>`, which is not always safe to delete, - // but doesn't cause issues when it's present. + // implemented more efficiently. If `insertion` is false, this indicates that + // the triples are meant for deletion. In that case no triples are returned + // that may be unsafe to delete. In particular this refers to triples of the + // form ` ql:langtag <@language>`. Triples makeInternalTriples(const Triples& triples, bool insertion); // Insert triples. diff --git a/src/rdfTypes/GeometryInfoHelpersImpl.h b/src/rdfTypes/GeometryInfoHelpersImpl.h index a522df27f9..86ce7521f9 100644 --- a/src/rdfTypes/GeometryInfoHelpersImpl.h +++ b/src/rdfTypes/GeometryInfoHelpersImpl.h @@ -46,7 +46,7 @@ using ParsedWkt = MultiPoint, MultiLine, MultiPolygon, Collection>; using ParseResult = std::pair>; -using DAnyGeometry = util::geo::AnyGeometry; +using DAnyGeometry = AnyGeometry; template CPP_concept WktSingleGeometryType = @@ -245,19 +245,16 @@ inline std::optional wktTypeToIri(uint8_t type) { // Reverse projection applied by `sj::WKTParser`: convert coordinates from web // mercator int32 to normal lat-long double coordinates. -inline util::geo::DPoint projectInt32WebMercToDoubleLatLng( - const util::geo::I32Point& p) { - return util::geo::webMercToLatLng( - static_cast(p.getX()) / PREC, - static_cast(p.getY()) / PREC); -}; +inline DPoint projectInt32WebMercToDoubleLatLng(const I32Point& p) { + return webMercToLatLng(static_cast(p.getX()) / PREC, + static_cast(p.getY()) / PREC); +} // Same as above, but for a bounding box. -inline util::geo::DBox projectInt32WebMercToDoubleLatLng( - const util::geo::I32Box& box) { +inline DBox projectInt32WebMercToDoubleLatLng(const I32Box& box) { return {projectInt32WebMercToDoubleLatLng(box.getLowerLeft()), projectInt32WebMercToDoubleLatLng(box.getUpperRight())}; -}; +} // Counts the number of geometries in a geometry collection. inline uint32_t countChildGeometries(const ParsedWkt& geom) { @@ -657,7 +654,7 @@ struct MetricDistanceVisitor { // Delegate the actual distance computation to `pb_util`. CPP_template(typename T, typename U)(requires IsPairOfUtilGeoms) double operator()(const T& a, const U& b) const { - return util::geo::webMercMeterDist(a, b); + return webMercMeterDist(a, b); } // Handle optional geometries that may be contained in a `ParseResult`. From 4874a9449932b7eacd9ac3f57c8ef8ca57303cee Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Fri, 30 Jan 2026 12:39:37 +0100 Subject: [PATCH 3/7] Address PR comments --- src/index/DeltaTriples.cpp | 27 ++++++++++++--------------- src/index/DeltaTriples.h | 10 +++++++++- src/util/LruCache.h | 9 +++++---- 3 files changed, 26 insertions(+), 20 deletions(-) diff --git a/src/index/DeltaTriples.cpp b/src/index/DeltaTriples.cpp index 533dd3dce2..908a9e311c 100644 --- a/src/index/DeltaTriples.cpp +++ b/src/index/DeltaTriples.cpp @@ -173,17 +173,6 @@ DeltaTriples::Triples DeltaTriples::makeInternalTriples(const Triples& triples, // which adds the same extra triples for language tags to the internal triples // on the initial index build. Triples internalTriples; - constexpr size_t predicateCacheSize = 50; - ad_utility::util::LRUCache - predicateCache{predicateCacheSize}; - - // Don't compute the id if it's not used by the code below. - Id languagePredicate = - insertion ? TripleComponent{ad_utility::triple_component::Iri::fromIriref( - LANGUAGE_PREDICATE)} - .toValueId(index_.getVocab(), localVocab_, - index_.encodedIriManager()) - : Id::makeUndefined(); for (const auto& triple : triples) { const auto& ids = triple.ids(); Id objectId = ids.at(2); @@ -195,7 +184,7 @@ DeltaTriples::Triples DeltaTriples::makeInternalTriples(const Triples& triples, continue; } const auto& predicate = - predicateCache.getOrCompute(ids.at(1).getBits(), [this](Id::T bits) { + predicateCache_.getOrCompute(ids.at(1).getBits(), [this](Id::T bits) { auto optionalPredicate = ExportQueryExecutionTrees::idToLiteralOrIri( index_, Id::fromBits(bits), localVocab_, true); AD_CORRECTNESS_CHECK(optionalPredicate.has_value()); @@ -211,8 +200,8 @@ DeltaTriples::Triples DeltaTriples::makeInternalTriples(const Triples& triples, // Extra triple ` @language@ "object"@language`. internalTriples.push_back( IdTriple<0>{std::array{ids.at(0), specialId, ids.at(2), ids.at(3)}}); - Id langtagId = languageTagCache_.getOrCompute( - std::string{langtag}, [this](const std::string& tag) { + Id langtagId = + languageTagCache_.getOrCompute(langtag, [this](const std::string& tag) { return TripleComponent{ad_utility::convertLangtagToEntityUri(tag)} .toValueId(index_.getVocab(), localVocab_, index_.encodedIriManager()); @@ -226,7 +215,7 @@ DeltaTriples::Triples DeltaTriples::makeInternalTriples(const Triples& triples, if (insertion) { // Extra triple `"object"@language ql:langtag <@language>`. internalTriples.push_back(IdTriple<0>{ - std::array{ids.at(2), languagePredicate, langtagId, ids.at(3)}}); + std::array{ids.at(2), languagePredicate_, langtagId, ids.at(3)}}); } } // Because of the special predicates, we need to re-sort the triples. @@ -435,6 +424,14 @@ DeltaTriplesCount operator-(const DeltaTriplesCount& lhs, DeltaTriples::DeltaTriples(const Index& index) : DeltaTriples(index.getImpl()) {} +// ____________________________________________________________________________ +DeltaTriples::DeltaTriples(const IndexImpl& index) + : index_{index}, + languagePredicate_{TripleComponent{ + ad_utility::triple_component::Iri::fromIriref(LANGUAGE_PREDICATE)} + .toValueId(index_.getVocab(), localVocab_, + index_.encodedIriManager())} {} + // ____________________________________________________________________________ DeltaTriplesManager::DeltaTriplesManager(const IndexImpl& index) : deltaTriples_{index}, diff --git a/src/index/DeltaTriples.h b/src/index/DeltaTriples.h index 4ad7b99436..7a6350b4da 100644 --- a/src/index/DeltaTriples.h +++ b/src/index/DeltaTriples.h @@ -129,12 +129,20 @@ class DeltaTriples { // See the documentation of `setPersist()` below. std::optional filenameForPersisting_; + // Store the id of the `ql:langtag` predicate to avoid repeated disk lookups. + Id languagePredicate_; + // Store commonly used language tags of the form `<@lang>` to avoid repeated // disk lookups. static constexpr size_t languageTagCacheSize_ = 20; ad_utility::util::LRUCache languageTagCache_{ languageTagCacheSize_}; + // Cache commonly used predicates between calls. + static constexpr size_t predicateCacheSize_ = 50; + ad_utility::util::LRUCache + predicateCache_{predicateCacheSize_}; + // Assert that the Permutation Enum values have the expected int values. // This is used to store and lookup items that exist for permutation in an // array. @@ -174,7 +182,7 @@ class DeltaTriples { public: // Construct for given index. explicit DeltaTriples(const Index& index); - explicit DeltaTriples(const IndexImpl& index) : index_{index} {} + explicit DeltaTriples(const IndexImpl& index); // Disable accidental copying. DeltaTriples(const DeltaTriples&) = delete; diff --git a/src/util/LruCache.h b/src/util/LruCache.h index 55276b5853..fc2fafa2a6 100644 --- a/src/util/LruCache.h +++ b/src/util/LruCache.h @@ -35,9 +35,9 @@ class LRUCache { // found. Otherwise, compute the value using `computeFunction` and store it in // the cache. If the cache is already at maximum capacity, evict the least // recently used element. - CPP_template(typename Func)( + CPP_template(typename Key, typename Func)( requires ad_utility::InvocableWithConvertibleReturnType< - Func, V, const K&>) const V& getOrCompute(const K& key, + Func, V, const K&>) const V& getOrCompute(Key&& key, Func computeFunction) { auto it = cache_.find(key); if (it != cache_.end()) { @@ -56,9 +56,10 @@ class LRUCache { lruKey = key; } else { // Push new element if not full - keys_.push_front(key); + keys_.push_front(K{key}); } - auto result = cache_.try_emplace(key, computeFunction(key), keys_.begin()); + auto result = cache_.try_emplace( + AD_FWD(key), computeFunction(keys_.front()), keys_.begin()); AD_CORRECTNESS_CHECK(result.second); return result.first->second.first; } From 11b746b08fd4af03c1fa50921d9464cafad4904a Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Fri, 30 Jan 2026 13:43:22 +0100 Subject: [PATCH 4/7] Fix compilation and address PR comments --- src/index/DeltaTriples.h | 6 +++-- test/LruCacheTest.cpp | 48 ++++++++++++++++++++-------------------- 2 files changed, 28 insertions(+), 26 deletions(-) diff --git a/src/index/DeltaTriples.h b/src/index/DeltaTriples.h index 7a6350b4da..dc825e870b 100644 --- a/src/index/DeltaTriples.h +++ b/src/index/DeltaTriples.h @@ -138,8 +138,10 @@ class DeltaTriples { ad_utility::util::LRUCache languageTagCache_{ languageTagCacheSize_}; - // Cache commonly used predicates between calls. - static constexpr size_t predicateCacheSize_ = 50; + // Cache commonly used predicates and their IRI representation between calls + // of `makeInternalTriples`. For example in wikidata `wdt:P31`, or `wdt:P279` + // are frequently used, so we try to avoid an expensive lookup from disk. + static constexpr size_t predicateCacheSize_ = 100; ad_utility::util::LRUCache predicateCache_{predicateCacheSize_}; diff --git a/test/LruCacheTest.cpp b/test/LruCacheTest.cpp index ece1928aae..4767f9c9cf 100644 --- a/test/LruCacheTest.cpp +++ b/test/LruCacheTest.cpp @@ -11,31 +11,31 @@ TEST(LRUCache, testLruCache) { // Type-erase the lambdas to get a better coverage report. using F = std::function; - EXPECT_EQ(1, cache.getOrCompute(1, [](int i) { - EXPECT_EQ(i, 1); - return 1; - })); - EXPECT_EQ(2, cache.getOrCompute(2, [](int i) { - EXPECT_EQ(i, 2); - return 2; - })); + EXPECT_EQ(1, (cache.getOrCompute(1, [](int i) { + EXPECT_EQ(i, 1); + return 1; + }))); + EXPECT_EQ(2, (cache.getOrCompute(2, [](int i) { + EXPECT_EQ(i, 2); + return 2; + }))); - EXPECT_EQ(1, cache.getOrCompute(1, [](int) { - ADD_FAILURE(); - return 0; - })); - EXPECT_EQ(3, cache.getOrCompute(3, [](int i) { - EXPECT_EQ(i, 3); - return 3; - })); - EXPECT_EQ(20, cache.getOrCompute(2, [](int i) { - EXPECT_EQ(i, 2); - return 20; - })); - EXPECT_EQ(10, cache.getOrCompute(1, [](int i) { - EXPECT_EQ(i, 1); - return 10; - })); + EXPECT_EQ(1, (cache.getOrCompute(1, [](int) { + ADD_FAILURE(); + return 0; + }))); + EXPECT_EQ(3, (cache.getOrCompute(3, [](int i) { + EXPECT_EQ(i, 3); + return 3; + }))); + EXPECT_EQ(20, (cache.getOrCompute(2, [](int i) { + EXPECT_EQ(i, 2); + return 20; + }))); + EXPECT_EQ(10, (cache.getOrCompute(1, [](int i) { + EXPECT_EQ(i, 1); + return 10; + }))); } // _____________________________________________________________________________ From 4ce3437cf3868104e5644a635ba6141fdd9735c1 Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Fri, 30 Jan 2026 15:06:00 +0100 Subject: [PATCH 5/7] Fix invalid assertion --- src/index/DeltaTriples.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/index/DeltaTriples.cpp b/src/index/DeltaTriples.cpp index 908a9e311c..6bf9560b15 100644 --- a/src/index/DeltaTriples.cpp +++ b/src/index/DeltaTriples.cpp @@ -588,7 +588,12 @@ void DeltaTriples::readFromDisk() { if (!filenameForPersisting_.has_value()) { return; } - AD_CONTRACT_CHECK(localVocab_.empty()); + AD_CONTRACT_CHECK( + localVocab_.empty() || + (languagePredicate_.getDatatype() == Datatype::LocalVocabIndex && + localVocab_.size() == 1), + "The local vocab must be empty or only contain the language " + "predicate before reading delta triples from disk."); auto [vocab, idRanges] = ad_utility::deserializeIds( filenameForPersisting_.value(), index_.getBlankNodeManager()); if (idRanges.empty()) { From a7eeba76d22a04308c88ac5e5abc28da8e8da5bd Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Fri, 30 Jan 2026 15:28:40 +0100 Subject: [PATCH 6/7] Fix unit tests --- src/index/DeltaTriples.cpp | 22 ++++++++++------------ src/index/DeltaTriples.h | 3 ++- 2 files changed, 12 insertions(+), 13 deletions(-) diff --git a/src/index/DeltaTriples.cpp b/src/index/DeltaTriples.cpp index 6bf9560b15..50bfc72892 100644 --- a/src/index/DeltaTriples.cpp +++ b/src/index/DeltaTriples.cpp @@ -173,6 +173,14 @@ DeltaTriples::Triples DeltaTriples::makeInternalTriples(const Triples& triples, // which adds the same extra triples for language tags to the internal triples // on the initial index build. Triples internalTriples; + // Initialize on first use. + if (languagePredicate_.isUndefined()) { + languagePredicate_ = + TripleComponent{ + ad_utility::triple_component::Iri::fromIriref(LANGUAGE_PREDICATE)} + .toValueId(index_.getVocab(), localVocab_, + index_.encodedIriManager()); + } for (const auto& triple : triples) { const auto& ids = triple.ids(); Id objectId = ids.at(2); @@ -425,12 +433,7 @@ DeltaTriples::DeltaTriples(const Index& index) : DeltaTriples(index.getImpl()) {} // ____________________________________________________________________________ -DeltaTriples::DeltaTriples(const IndexImpl& index) - : index_{index}, - languagePredicate_{TripleComponent{ - ad_utility::triple_component::Iri::fromIriref(LANGUAGE_PREDICATE)} - .toValueId(index_.getVocab(), localVocab_, - index_.encodedIriManager())} {} +DeltaTriples::DeltaTriples(const IndexImpl& index) : index_{index} {} // ____________________________________________________________________________ DeltaTriplesManager::DeltaTriplesManager(const IndexImpl& index) @@ -588,12 +591,7 @@ void DeltaTriples::readFromDisk() { if (!filenameForPersisting_.has_value()) { return; } - AD_CONTRACT_CHECK( - localVocab_.empty() || - (languagePredicate_.getDatatype() == Datatype::LocalVocabIndex && - localVocab_.size() == 1), - "The local vocab must be empty or only contain the language " - "predicate before reading delta triples from disk."); + AD_CONTRACT_CHECK(localVocab_.empty()); auto [vocab, idRanges] = ad_utility::deserializeIds( filenameForPersisting_.value(), index_.getBlankNodeManager()); if (idRanges.empty()) { diff --git a/src/index/DeltaTriples.h b/src/index/DeltaTriples.h index dc825e870b..7a8d3e4af6 100644 --- a/src/index/DeltaTriples.h +++ b/src/index/DeltaTriples.h @@ -130,7 +130,8 @@ class DeltaTriples { std::optional filenameForPersisting_; // Store the id of the `ql:langtag` predicate to avoid repeated disk lookups. - Id languagePredicate_; + // This is initialized on first use. + Id languagePredicate_ = Id::makeUndefined(); // Store commonly used language tags of the form `<@lang>` to avoid repeated // disk lookups. From 6b1d3eb6b28b6b08b0c1e0905ad9e0eaf10c1cba Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Fri, 30 Jan 2026 19:55:10 +0100 Subject: [PATCH 7/7] Increase cache sizes to 1000 --- src/index/DeltaTriples.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/index/DeltaTriples.h b/src/index/DeltaTriples.h index 7a8d3e4af6..a73af54e7a 100644 --- a/src/index/DeltaTriples.h +++ b/src/index/DeltaTriples.h @@ -135,14 +135,14 @@ class DeltaTriples { // Store commonly used language tags of the form `<@lang>` to avoid repeated // disk lookups. - static constexpr size_t languageTagCacheSize_ = 20; + static constexpr size_t languageTagCacheSize_ = 1000; ad_utility::util::LRUCache languageTagCache_{ languageTagCacheSize_}; // Cache commonly used predicates and their IRI representation between calls // of `makeInternalTriples`. For example in wikidata `wdt:P31`, or `wdt:P279` // are frequently used, so we try to avoid an expensive lookup from disk. - static constexpr size_t predicateCacheSize_ = 100; + static constexpr size_t predicateCacheSize_ = 1000; ad_utility::util::LRUCache predicateCache_{predicateCacheSize_};