From a52ef9c45426640c688dc3dca62a1d07dbd6ea67 Mon Sep 17 00:00:00 2001 From: ullingerc Date: Mon, 12 Jan 2026 11:48:56 +0100 Subject: [PATCH 01/64] some experiments --- src/engine/MaterializedViews.cpp | 32 ++++++++++++++-- src/engine/MaterializedViews.h | 33 ++++++++++++++++ src/engine/QueryPlanner.cpp | 65 +++++++++++++++++++++++++++++++- src/engine/QueryPlanner.h | 6 ++- 4 files changed, 130 insertions(+), 6 deletions(-) diff --git a/src/engine/MaterializedViews.cpp b/src/engine/MaterializedViews.cpp index ea6f482e70..d87d46238a 100644 --- a/src/engine/MaterializedViews.cpp +++ b/src/engine/MaterializedViews.cpp @@ -343,11 +343,23 @@ std::shared_ptr MaterializedView::permutation() const { // _____________________________________________________________________________ void MaterializedViewsManager::loadView(const std::string& name) const { - auto lock = loadedViews_.wlock(); - if (lock->contains(name)) { - return; + { + auto lock = loadedViews_.wlock(); + if (lock->contains(name)) { + return; + } + lock->insert({name, std::make_shared(onDiskBase_, name)}); + } + // TODO + if (name == "geom") { + auto lock = joinPatterns_.wlock(); + auto x = ad_utility::triple_component::Iri::fromIriref( + ""); + auto y = ad_utility::triple_component::Iri::fromIriref( + ""); + lock->insert( + {JoinPattern{SingleChain{x, y}}, loadedViews_.rlock()->at(name)}); } - lock->insert({name, std::make_shared(onDiskBase_, name)}); }; // _____________________________________________________________________________ @@ -554,6 +566,18 @@ std::shared_ptr MaterializedView::makeIndexScan( viewQuery.getVarsToKeep()); } +// _____________________________________________________________________________ +std::shared_ptr MaterializedViewsManager::makeIndexScan( + QueryExecutionContext*, const JoinPattern& joinPattern) const { + auto lock = loadedViews_.rlock(); + if (lock->contains(joinPattern)) { + auto view = lock->at(joinPattern); + // view->makeIndexScan(qec, joinPattern, ...) + // we need join pattern, var names from query,. + } + return nullptr; +} + // _____________________________________________________________________________ std::shared_ptr MaterializedViewsManager::makeIndexScan( QueryExecutionContext* qec, diff --git a/src/engine/MaterializedViews.h b/src/engine/MaterializedViews.h index 5b3ad0082c..d157cbfcd1 100644 --- a/src/engine/MaterializedViews.h +++ b/src/engine/MaterializedViews.h @@ -16,6 +16,7 @@ #include "parser/MaterializedViewQuery.h" #include "parser/ParsedQuery.h" #include "parser/SparqlTriple.h" +#include "rdfTypes/Iri.h" #include "util/HashMap.h" // Forward declarations @@ -222,6 +223,31 @@ class MaterializedView { const parsedQuery::MaterializedViewQuery& viewQuery) const; }; +// +struct SingleChain { + // ?something ?x . + ad_utility::triple_component::Iri predJoinObj_; + // ?x ?something_else . + ad_utility::triple_component::Iri predJoinSubj_; + + CPP_template(typename H, typename G)( + requires ql::concepts::same_as) friend H + AbslHashValue(H h, const G& g) { + return H::combine(std::move(h), g.predJoinObj_, g.predJoinSubj_); + } + + // Serialize to JSON + // Deserialize from JSON + // Detect +}; +// Hashing doesn't work for stars if we only want a part of the star... +// struct Star { +// std::vector predicates_; // Should be +// sorted +// // TODO hash +// }; +using JoinPattern = std::variant; + // The `MaterializedViewsManager` is part of the `QueryExecutionContext` and is // used to manage the currently loaded `MaterializedViews` in a `Server` or // `Qlever` instance. @@ -231,6 +257,10 @@ class MaterializedViewsManager { mutable ad_utility::Synchronized< ad_utility::HashMap>> loadedViews_; + // TODO multiple? + mutable ad_utility::Synchronized< + ad_utility::HashMap>> + joinPatterns_; public: MaterializedViewsManager() = default; @@ -257,6 +287,9 @@ class MaterializedViewsManager { std::shared_ptr makeIndexScan( QueryExecutionContext* qec, const parsedQuery::MaterializedViewQuery& viewQuery) const; + // TODO . If no matching -> return nullptr. + std::shared_ptr makeIndexScan( + QueryExecutionContext* qec, const JoinPattern& joinPattern) const; }; #endif // QLEVER_SRC_ENGINE_MATERIALIZEDVIEWS_H_ diff --git a/src/engine/QueryPlanner.cpp b/src/engine/QueryPlanner.cpp index 08f79eef16..291fe463c8 100644 --- a/src/engine/QueryPlanner.cpp +++ b/src/engine/QueryPlanner.cpp @@ -2248,6 +2248,12 @@ std::vector QueryPlanner::createJoinCandidates( candidates.push_back(std::move(plan)); } + // TODO this should be replaced by triple graph version below . st we do not + // check the same repeatedly. + if (auto opt = createMaterializedViewJoinReplacement(a, b, jcs)) { + candidates.push_back(std::move(opt.value())); + } + // "NORMAL" CASE: // The join class takes care of sorting the subtrees if necessary SubtreePlan plan = @@ -2526,6 +2532,61 @@ auto QueryPlanner::createJoinWithTransitivePath(const SubtreePlan& a, #endif } +// _____________________________________________________________________________ +auto QueryPlanner::createMaterializedViewJoinReplacement( + const SubtreePlan& a, const SubtreePlan& b, const JoinColumns&) const + -> std::optional { + auto isIndexScan = [](const auto& tree) -> std::shared_ptr { + return std::dynamic_pointer_cast(tree._qet->getRootOperation()); + }; + auto aScan = isIndexScan(a); + auto bScan = isIndexScan(b); + if (!aScan || !bScan) { + return std::nullopt; + } + + // TODO + if (!aScan->predicate().isIri() || !bScan->predicate().isIri()) { + return std::nullopt; + } + // TODO andersrum + std::string x = ""; + std::string y = ""; + if (aScan->predicate().getIri().toStringRepresentation() == x && + bScan->predicate().getIri().toStringRepresentation() == y && + aScan->object() == bScan->subject() && + aScan->subject() != bScan->object() && + aScan->subject() != aScan->object()) { + SingleChain chain{aScan->predicate().getIri(), bScan->predicate().getIri()}; + // _qec->materializedViewsManager().makeIndexScan(QueryExecutionContext + // *qec, const parsedQuery::MaterializedViewQuery &viewQuery) + AD_LOG_INFO << "SCAN: " + << aScan->predicate().getIri().toStringRepresentation() + << " CHAIN JOIN " + << bScan->predicate().getIri().toStringRepresentation() + << std::endl; + parsedQuery::MaterializedViewQuery q{ + ad_utility::triple_component::Iri::fromIriref( + "")}; + q.addParameter(SparqlTriple{ + ad_utility::triple_component::Iri::fromIriref(""), + ad_utility::triple_component::Iri::fromIriref(""), + aScan->subject()}); + q.addParameter(SparqlTriple{ + ad_utility::triple_component::Iri::fromIriref(""), + ad_utility::triple_component::Iri::fromIriref(""), + aScan->object()}); + q.addParameter(SparqlTriple{ + ad_utility::triple_component::Iri::fromIriref(""), + ad_utility::triple_component::Iri::fromIriref(""), + bScan->object()}); + return getMaterializedViewIndexScanPlan(q); + } + // + + return std::nullopt; +} + // ______________________________________________________________________________________ auto QueryPlanner::createJoinWithHasPredicateScan(const SubtreePlan& a, const SubtreePlan& b, @@ -3157,7 +3218,7 @@ void QueryPlanner::GraphPatternPlanner::visitPathSearch( // _______________________________________________________________ SubtreePlan QueryPlanner::getMaterializedViewIndexScanPlan( - const parsedQuery::MaterializedViewQuery& viewQuery) { + const parsedQuery::MaterializedViewQuery& viewQuery) const { return makeSubtreePlan( _qec->materializedViewsManager().makeIndexScan(_qec, viewQuery)); } @@ -3305,6 +3366,8 @@ void QueryPlanner::GraphPatternPlanner::visitSubquery( // _______________________________________________________________ void QueryPlanner::GraphPatternPlanner::optimizeCommutatively() { auto tg = planner_.createTripleGraph(&candidateTriples_); + // tg._adjLists; + // TODO somehow add candidates for matviews auto lastRow = planner_ .fillDpTab(tg, rootPattern_->_filters, rootPattern_->textLimits_, candidatePlans_) diff --git a/src/engine/QueryPlanner.h b/src/engine/QueryPlanner.h index 94c6ce9a30..1aeacf7cdf 100644 --- a/src/engine/QueryPlanner.h +++ b/src/engine/QueryPlanner.h @@ -450,6 +450,10 @@ class QueryPlanner { const SubtreePlan& b, const JoinColumns& jcs); + // TODO + std::optional createMaterializedViewJoinReplacement( + const SubtreePlan& a, const SubtreePlan& b, const JoinColumns& jcs) const; + vector getOrderByRow( const ParsedQuery& pq, const std::vector>& dpTab) const; @@ -613,7 +617,7 @@ class QueryPlanner { // Given a `MaterializedViewQuery` construct a `SubtreePlan` for an // `IndexScan` operation on the requested materialized view. SubtreePlan getMaterializedViewIndexScanPlan( - const parsedQuery::MaterializedViewQuery& viewQuery); + const parsedQuery::MaterializedViewQuery& viewQuery) const; // An internal helper class that encapsulates the functionality to optimize // a single graph pattern. It tightly interacts with the outer `QueryPlanner` From ad50dbc5525bb131403e16613f5ad723adf59c97 Mon Sep 17 00:00:00 2001 From: ullingerc Date: Mon, 12 Jan 2026 12:33:36 +0100 Subject: [PATCH 02/64] Allow writing materialized views from queries with less than four columns --- src/engine/MaterializedViews.cpp | 26 +++++++++++++++++++++----- src/engine/MaterializedViews.h | 10 ++++++++-- test/MaterializedViewsTest.cpp | 17 +++++++++++------ 3 files changed, 40 insertions(+), 13 deletions(-) diff --git a/src/engine/MaterializedViews.cpp b/src/engine/MaterializedViews.cpp index ea6f482e70..69e4656991 100644 --- a/src/engine/MaterializedViews.cpp +++ b/src/engine/MaterializedViews.cpp @@ -11,6 +11,7 @@ #include #include #include +#include #include #include "engine/IndexScan.h" @@ -50,7 +51,12 @@ MaterializedViewWriter::MaterializedViewWriter( columnNames_ = ::ranges::to>(columnNamesAndPermutation | ql::views::keys); columnPermutation_ = ::ranges::to>( - columnNamesAndPermutation | ql::views::values); + columnNamesAndPermutation | ql::views::values | + ql::views::filter([](const auto& idx) { return idx.has_value(); }) | + ql::views::transform([](const auto& idx) { return idx.value(); })); + numAddEmptyColumns_ = ::ranges::count_if( + columnNamesAndPermutation, + [](const auto& col) { return col.second.has_value(); }); } // _____________________________________________________________________________ @@ -86,16 +92,21 @@ MaterializedViewWriter::getIdTableColumnNamesAndPermutation() const { auto targetVarsAndCols = qet_->selectedVariablesToColumnIndices(parsedQuery_.selectClause()); - AD_CONTRACT_CHECK(targetVarsAndCols.size() >= 4, - "Currently the query used to write a materialized view " - "needs to have at least four columns."); - return ::ranges::to( + // Column information for the columns selected by the user's query. + auto result = ::ranges::to( targetVarsAndCols | ql::views::transform([](const auto& opt) { AD_CONTRACT_CHECK(opt.has_value()); return ColumnNameAndIndex{opt.value().variable_, opt.value().columnIndex_}; })); + + // Add dummy columns such that the view has at least four columns in total. + for (uint8_t i = 0; i < 4 - targetVarsAndCols.size(); ++i) { + result.push_back({Variable{absl::StrCat("?_empty_", i)}, std::nullopt}); + } + + return result; } // _____________________________________________________________________________ @@ -107,6 +118,11 @@ void MaterializedViewWriter::permuteIdTableAndCheckNoLocalVocabEntries( // particular, the indexed column should be the first. block.setColumnSubset(columnPermutation_); + // Add empty columns such that the view has at least four columns. + for (uint8_t i = 0; i < numAddEmptyColumns_; ++i) { + block.addEmptyColumn(); + } + // Check that there are no values of type `LocalVocabIndex` in the selected // columns of the `IdTable` as materialized views do not support them as of // now. diff --git a/src/engine/MaterializedViews.h b/src/engine/MaterializedViews.h index 5b3ad0082c..7fcefd5cb5 100644 --- a/src/engine/MaterializedViews.h +++ b/src/engine/MaterializedViews.h @@ -55,6 +55,10 @@ class MaterializedViewWriter { // with the same column ordering as the `SELECT` statement. std::vector columnPermutation_; + // The number of empty columns to add to the query result such that the + // resulting table has at least four columns. + uint8_t numAddEmptyColumns_; + using RangeOfIdTables = ad_utility::InputRangeTypeErased>; // SPO comparator using Comparator = SortTriple<0, 1, 2>; @@ -82,12 +86,14 @@ class MaterializedViewWriter { // the `QueryExecutionTree` must be permuted to match the requested target // columns and column ordering. This is called in the constructor to populate // `columnNamesAndPermutation_`. - using ColumnNameAndIndex = std::pair; + using ColumnNameAndIndex = std::pair>; using ColumnNamesAndPermutation = std::vector; ColumnNamesAndPermutation getIdTableColumnNamesAndPermutation() const; // The number of columns of the view. - size_t numCols() const { return columnPermutation_.size(); } + size_t numCols() const { + return columnPermutation_.size() + numAddEmptyColumns_; + } // Helper to permute an `IdTable` according to `columnPermutation_` and verify // that there are no `LocalVocabEntry` values in any of the selected columns. diff --git a/test/MaterializedViewsTest.cpp b/test/MaterializedViewsTest.cpp index 30f20c421e..e9d8f705f0 100644 --- a/test/MaterializedViewsTest.cpp +++ b/test/MaterializedViewsTest.cpp @@ -340,16 +340,21 @@ TEST_F(MaterializedViewsTest, ColumnPermutation) { ad_utility::MediaType::tsv); EXPECT_EQ(res, "?o\n\"abc\"\n\"xyz\"\n"); } + + // Test that writing a view with less than four columns is possible. + { + MaterializedViewWriter::writeViewToDisk( + testIndexBase_, "testView5", + qlv().parseAndPlanQuery("SELECT * { ?s ?p ?o }")); + MaterializedView view{testIndexBase_, "testView5"}; + EXPECT_THAT(columnNames(view), + ::testing::ElementsAreArray( + std::vector{V{"?s"}, V{"?p"}, V{"?o"}, V{"?_empty_0"}})); + } } // _____________________________________________________________________________ TEST_F(MaterializedViewsTest, InvalidInputToWriter) { - AD_EXPECT_THROW_WITH_MESSAGE( - MaterializedViewWriter::writeViewToDisk( - testIndexBase_, "testView1", - qlv().parseAndPlanQuery("SELECT * { ?s ?p ?o }")), - ::testing::HasSubstr("Currently the query used to write a materialized " - "view needs to have at least four columns")); AD_EXPECT_THROW_WITH_MESSAGE( MaterializedViewWriter::writeViewToDisk( testIndexBase_, "Something Out!of~the.ordinary", From 63973a9061b7ef86bca928df02b418985831cc2c Mon Sep 17 00:00:00 2001 From: ullingerc Date: Mon, 12 Jan 2026 14:37:09 +0100 Subject: [PATCH 03/64] logging --- src/engine/MaterializedViews.cpp | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/engine/MaterializedViews.cpp b/src/engine/MaterializedViews.cpp index 69e4656991..52ded971f3 100644 --- a/src/engine/MaterializedViews.cpp +++ b/src/engine/MaterializedViews.cpp @@ -92,6 +92,7 @@ MaterializedViewWriter::getIdTableColumnNamesAndPermutation() const { auto targetVarsAndCols = qet_->selectedVariablesToColumnIndices(parsedQuery_.selectClause()); + const size_t numCols = targetVarsAndCols.size(); // Column information for the columns selected by the user's query. auto result = ::ranges::to( @@ -102,8 +103,14 @@ MaterializedViewWriter::getIdTableColumnNamesAndPermutation() const { })); // Add dummy columns such that the view has at least four columns in total. - for (uint8_t i = 0; i < 4 - targetVarsAndCols.size(); ++i) { - result.push_back({Variable{absl::StrCat("?_empty_", i)}, std::nullopt}); + if (numCols < 4) { + AD_LOG_INFO << "The query to write the materialized view '" << name_ + << "' selects only " << numCols << " column(s). " << 4 - numCols + << " empty column(s) will be appended." << std::endl; + + for (uint8_t i = 0; i < 4 - numCols; ++i) { + result.push_back({Variable{absl::StrCat("?_empty_", i)}, std::nullopt}); + } } return result; From fdcf7b3408191a13527fc993926067d51113de76 Mon Sep 17 00:00:00 2001 From: ullingerc Date: Mon, 12 Jan 2026 14:37:12 +0100 Subject: [PATCH 04/64] docs --- src/engine/MaterializedViews.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/engine/MaterializedViews.h b/src/engine/MaterializedViews.h index 7fcefd5cb5..1a0126a91e 100644 --- a/src/engine/MaterializedViews.h +++ b/src/engine/MaterializedViews.h @@ -47,7 +47,9 @@ class MaterializedViewWriter { ad_utility::MemorySize memoryLimit_; ad_utility::AllocatorWithLimit allocator_; - // The correctly ordered column names of the view. + // The correctly ordered column names of the view. These may include variables + // of the form `?_empty_N` if the query to write the view has less than four + // columns. std::vector columnNames_; // The columns of the `IdTable`s we get when executing the query can be in From 8032b7f723371b6f07cba0cec3bc83ec889dde6d Mon Sep 17 00:00:00 2001 From: ullingerc Date: Mon, 12 Jan 2026 14:42:50 +0100 Subject: [PATCH 05/64] test logging --- test/MaterializedViewsTest.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/test/MaterializedViewsTest.cpp b/test/MaterializedViewsTest.cpp index e9d8f705f0..575a394acc 100644 --- a/test/MaterializedViewsTest.cpp +++ b/test/MaterializedViewsTest.cpp @@ -343,6 +343,7 @@ TEST_F(MaterializedViewsTest, ColumnPermutation) { // Test that writing a view with less than four columns is possible. { + clearLog(); MaterializedViewWriter::writeViewToDisk( testIndexBase_, "testView5", qlv().parseAndPlanQuery("SELECT * { ?s ?p ?o }")); @@ -350,6 +351,7 @@ TEST_F(MaterializedViewsTest, ColumnPermutation) { EXPECT_THAT(columnNames(view), ::testing::ElementsAreArray( std::vector{V{"?s"}, V{"?p"}, V{"?o"}, V{"?_empty_0"}})); + EXPECT_THAT(log_.str(), ::testing::HasSubstr("1 empty column")); } } From 94359556c6df2f10d56da5f9dfc3c5b9d855cf78 Mon Sep 17 00:00:00 2001 From: ullingerc Date: Mon, 12 Jan 2026 17:12:37 +0100 Subject: [PATCH 06/64] stuff --- src/engine/MaterializedViews.cpp | 45 +++++++++++++++----------------- src/engine/MaterializedViews.h | 12 +++++---- src/engine/QueryPlanner.cpp | 3 ++- 3 files changed, 30 insertions(+), 30 deletions(-) diff --git a/src/engine/MaterializedViews.cpp b/src/engine/MaterializedViews.cpp index d87d46238a..aa6948160f 100644 --- a/src/engine/MaterializedViews.cpp +++ b/src/engine/MaterializedViews.cpp @@ -343,24 +343,21 @@ std::shared_ptr MaterializedView::permutation() const { // _____________________________________________________________________________ void MaterializedViewsManager::loadView(const std::string& name) const { - { - auto lock = loadedViews_.wlock(); - if (lock->contains(name)) { - return; - } - lock->insert({name, std::make_shared(onDiskBase_, name)}); - } - // TODO - if (name == "geom") { - auto lock = joinPatterns_.wlock(); - auto x = ad_utility::triple_component::Iri::fromIriref( - ""); - auto y = ad_utility::triple_component::Iri::fromIriref( - ""); - lock->insert( - {JoinPattern{SingleChain{x, y}}, loadedViews_.rlock()->at(name)}); - } -}; + {auto lock = loadedViews_.wlock(); +if (lock->contains(name)) { + return; +} +lock->insert({name, std::make_shared(onDiskBase_, name)}); +} +// TODO +// if (name == "geom") { +// auto lock = joinPatterns_.wlock(); +// std::string x = (""); +// std::string y = (""); +// lock->insert({SingleChain{x, y}, loadedViews_.rlock()->at(name)}); +// } +} +; // _____________________________________________________________________________ std::shared_ptr MaterializedViewsManager::getView( @@ -568,13 +565,13 @@ std::shared_ptr MaterializedView::makeIndexScan( // _____________________________________________________________________________ std::shared_ptr MaterializedViewsManager::makeIndexScan( - QueryExecutionContext*, const JoinPattern& joinPattern) const { + QueryExecutionContext*, const JoinPattern&) const { auto lock = loadedViews_.rlock(); - if (lock->contains(joinPattern)) { - auto view = lock->at(joinPattern); - // view->makeIndexScan(qec, joinPattern, ...) - // we need join pattern, var names from query,. - } + // if (lock->contains(joinPattern)) { + // auto view = lock->at(joinPattern); + // // view->makeIndexScan(qec, joinPattern, ...) + // // we need join pattern, var names from query,. + // } return nullptr; } diff --git a/src/engine/MaterializedViews.h b/src/engine/MaterializedViews.h index d157cbfcd1..9a84c4af1d 100644 --- a/src/engine/MaterializedViews.h +++ b/src/engine/MaterializedViews.h @@ -226,9 +226,11 @@ class MaterializedView { // struct SingleChain { // ?something ?x . - ad_utility::triple_component::Iri predJoinObj_; + std::string predJoinObj_; // ?x ?something_else . - ad_utility::triple_component::Iri predJoinSubj_; + std::string predJoinSubj_; + + QL_DEFINE_DEFAULTED_EQUALITY_OPERATOR_CONSTEXPR(SingleChain); CPP_template(typename H, typename G)( requires ql::concepts::same_as) friend H @@ -258,9 +260,9 @@ class MaterializedViewsManager { ad_utility::HashMap>> loadedViews_; // TODO multiple? - mutable ad_utility::Synchronized< - ad_utility::HashMap>> - joinPatterns_; + // mutable ad_utility::Synchronized< + // ad_utility::HashMap>> + // joinPatterns_; public: MaterializedViewsManager() = default; diff --git a/src/engine/QueryPlanner.cpp b/src/engine/QueryPlanner.cpp index 291fe463c8..81e1058b73 100644 --- a/src/engine/QueryPlanner.cpp +++ b/src/engine/QueryPlanner.cpp @@ -2557,7 +2557,8 @@ auto QueryPlanner::createMaterializedViewJoinReplacement( aScan->object() == bScan->subject() && aScan->subject() != bScan->object() && aScan->subject() != aScan->object()) { - SingleChain chain{aScan->predicate().getIri(), bScan->predicate().getIri()}; + // SingleChain chain{aScan->predicate().getIri(), + // bScan->predicate().getIri()}; // _qec->materializedViewsManager().makeIndexScan(QueryExecutionContext // *qec, const parsedQuery::MaterializedViewQuery &viewQuery) AD_LOG_INFO << "SCAN: " From a29a3a1cf7a34bed966321970c2ee4b7ed06fe48 Mon Sep 17 00:00:00 2001 From: ullingerc Date: Tue, 13 Jan 2026 10:41:11 +0100 Subject: [PATCH 07/64] apply Johannes' feedback --- src/engine/MaterializedViews.cpp | 25 ++++++++----------------- src/engine/MaterializedViews.h | 7 +++++-- test/MaterializedViewsTest.cpp | 15 ++++++++++----- 3 files changed, 23 insertions(+), 24 deletions(-) diff --git a/src/engine/MaterializedViews.cpp b/src/engine/MaterializedViews.cpp index 52ded971f3..a5dd7a4538 100644 --- a/src/engine/MaterializedViews.cpp +++ b/src/engine/MaterializedViews.cpp @@ -11,7 +11,6 @@ #include #include #include -#include #include #include "engine/IndexScan.h" @@ -47,16 +46,13 @@ MaterializedViewWriter::MaterializedViewWriter( qet_ = qet; qec_ = qec; parsedQuery_ = std::move(parsedQuery); - auto columnNamesAndPermutation = getIdTableColumnNamesAndPermutation(); + auto [columnNamesAndPermutation, numAddEmptyColumns] = + getIdTableColumnNamesAndPermutation(); columnNames_ = ::ranges::to>(columnNamesAndPermutation | ql::views::keys); columnPermutation_ = ::ranges::to>( - columnNamesAndPermutation | ql::views::values | - ql::views::filter([](const auto& idx) { return idx.has_value(); }) | - ql::views::transform([](const auto& idx) { return idx.value(); })); - numAddEmptyColumns_ = ::ranges::count_if( - columnNamesAndPermutation, - [](const auto& col) { return col.second.has_value(); }); + columnNamesAndPermutation | ql::views::values); + numAddEmptyColumns_ = numAddEmptyColumns; } // _____________________________________________________________________________ @@ -95,7 +91,7 @@ MaterializedViewWriter::getIdTableColumnNamesAndPermutation() const { const size_t numCols = targetVarsAndCols.size(); // Column information for the columns selected by the user's query. - auto result = ::ranges::to( + auto existingCols = ::ranges::to>( targetVarsAndCols | ql::views::transform([](const auto& opt) { AD_CONTRACT_CHECK(opt.has_value()); return ColumnNameAndIndex{opt.value().variable_, @@ -103,17 +99,15 @@ MaterializedViewWriter::getIdTableColumnNamesAndPermutation() const { })); // Add dummy columns such that the view has at least four columns in total. + uint8_t numAddEmptyCols = 0; if (numCols < 4) { AD_LOG_INFO << "The query to write the materialized view '" << name_ << "' selects only " << numCols << " column(s). " << 4 - numCols << " empty column(s) will be appended." << std::endl; - - for (uint8_t i = 0; i < 4 - numCols; ++i) { - result.push_back({Variable{absl::StrCat("?_empty_", i)}, std::nullopt}); - } + numAddEmptyCols = 4 - numCols; } - return result; + return {std::move(existingCols), numAddEmptyCols}; } // _____________________________________________________________________________ @@ -343,9 +337,6 @@ MaterializedView::MaterializedView(std::string onDiskBase, std::string name) // Make variable to column map auto columnNames = viewInfoJson.at("columns").get>(); - AD_CORRECTNESS_CHECK( - columnNames.size() >= 4, - "Expected at least four columns in materialized view metadata"); for (const auto& [index, columnName] : ::ranges::views::enumerate(columnNames)) { varToColMap_.insert({Variable{columnName}, diff --git a/src/engine/MaterializedViews.h b/src/engine/MaterializedViews.h index 1a0126a91e..c0097dc323 100644 --- a/src/engine/MaterializedViews.h +++ b/src/engine/MaterializedViews.h @@ -88,8 +88,11 @@ class MaterializedViewWriter { // the `QueryExecutionTree` must be permuted to match the requested target // columns and column ordering. This is called in the constructor to populate // `columnNamesAndPermutation_`. - using ColumnNameAndIndex = std::pair>; - using ColumnNamesAndPermutation = std::vector; + using ColumnNameAndIndex = std::pair; + struct ColumnNamesAndPermutation { + std::vector columnNamesAndIndices_; + uint8_t numAddEmptyColumns_; + }; ColumnNamesAndPermutation getIdTableColumnNamesAndPermutation() const; // The number of columns of the view. diff --git a/test/MaterializedViewsTest.cpp b/test/MaterializedViewsTest.cpp index 575a394acc..a6edc99754 100644 --- a/test/MaterializedViewsTest.cpp +++ b/test/MaterializedViewsTest.cpp @@ -341,17 +341,22 @@ TEST_F(MaterializedViewsTest, ColumnPermutation) { EXPECT_EQ(res, "?o\n\"abc\"\n\"xyz\"\n"); } - // Test that writing a view with less than four columns is possible. + // Test that writing and reading from a view with less than four columns is + // possible. { clearLog(); MaterializedViewWriter::writeViewToDisk( testIndexBase_, "testView5", - qlv().parseAndPlanQuery("SELECT * { ?s ?p ?o }")); + qlv().parseAndPlanQuery("SELECT * { ?p ?o }")); MaterializedView view{testIndexBase_, "testView5"}; EXPECT_THAT(columnNames(view), - ::testing::ElementsAreArray( - std::vector{V{"?s"}, V{"?p"}, V{"?o"}, V{"?_empty_0"}})); - EXPECT_THAT(log_.str(), ::testing::HasSubstr("1 empty column")); + ::testing::ElementsAreArray(std::vector{V{"?p"}, V{"?o"}})); + EXPECT_THAT(log_.str(), ::testing::HasSubstr("2 empty column(s)")); + auto res = qlv().query( + "PREFIX view: " + "SELECT * { view:testView5-o ?o }", + ad_utility::MediaType::tsv); + EXPECT_EQ(res, "?o\n\"abc\"\n"); } } From addf9c6c6347ee8199a740f3fa451db252e5d2c3 Mon Sep 17 00:00:00 2001 From: ullingerc Date: Tue, 13 Jan 2026 10:42:40 +0100 Subject: [PATCH 08/64] remove obsolete comment --- src/engine/MaterializedViews.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/engine/MaterializedViews.h b/src/engine/MaterializedViews.h index c0097dc323..55418d2c51 100644 --- a/src/engine/MaterializedViews.h +++ b/src/engine/MaterializedViews.h @@ -47,9 +47,7 @@ class MaterializedViewWriter { ad_utility::MemorySize memoryLimit_; ad_utility::AllocatorWithLimit allocator_; - // The correctly ordered column names of the view. These may include variables - // of the form `?_empty_N` if the query to write the view has less than four - // columns. + // The correctly ordered column names of the view. std::vector columnNames_; // The columns of the `IdTable`s we get when executing the query can be in From 1a75011bfda6200c1afcf9bf0800773de50b8f7c Mon Sep 17 00:00:00 2001 From: ullingerc Date: Tue, 13 Jan 2026 15:59:43 +0100 Subject: [PATCH 09/64] write join pattern --- src/engine/IndexScan.h | 4 +++ src/engine/MaterializedViews.cpp | 52 +++++++++++++++++++++++++++++++- src/engine/MaterializedViews.h | 3 ++ 3 files changed, 58 insertions(+), 1 deletion(-) diff --git a/src/engine/IndexScan.h b/src/engine/IndexScan.h index ed3f318a2e..4ad2e213e3 100644 --- a/src/engine/IndexScan.h +++ b/src/engine/IndexScan.h @@ -94,6 +94,10 @@ class IndexScan final : public Operation { std::vector resultSortedOn() const override; + std::array triple() const { + return {subject_, predicate_, object_}; + } + // Set `PrefilterExpression`s and return updated `QueryExecutionTree` pointer // if necessary. std::optional> diff --git a/src/engine/MaterializedViews.cpp b/src/engine/MaterializedViews.cpp index 8239ccdcd6..0bc94cf8ed 100644 --- a/src/engine/MaterializedViews.cpp +++ b/src/engine/MaterializedViews.cpp @@ -14,6 +14,7 @@ #include #include "engine/IndexScan.h" +#include "engine/Join.h" #include "engine/QueryExecutionContext.h" #include "engine/QueryExecutionTree.h" #include "engine/VariableToColumnMap.h" @@ -110,6 +111,54 @@ MaterializedViewWriter::getIdTableColumnNamesAndPermutation() const { return {std::move(existingCols), numAddEmptyCols}; } +// _____________________________________________________________________________ +std::string MaterializedViewWriter::detectJoinPattern() const { + auto op = qet_->getRootOperation(); + auto join = std::dynamic_pointer_cast(op); + if (join == nullptr) { + return ""; + } + const auto& children = join->getChildren(); + AD_CORRECTNESS_CHECK(children.size() == 2); + auto getIndexScan = [](QueryExecutionTree* ptr) { + return std::dynamic_pointer_cast(ptr->getRootOperation()); + }; + auto left = getIndexScan(children.at(0)); + auto right = getIndexScan(children.at(1)); + if (left == nullptr || right == nullptr) { + return ""; + } + + // we are looking for the pattern: + // ?a ?b . + // ?b ?c . + + auto check = [](std::shared_ptr a, + std::shared_ptr b) -> std::string { + auto [s1, p1, o1] = a->triple(); + auto [s2, p2, o2] = b->triple(); + + if (s1.isVariable() && s1 != s2 && s1 != o1 && p1.isIri() && + o1.isVariable() && o1 == s2 && p2.isIri() && o2.isVariable() && + s1 != o2 && o1 != o2) { + return absl::StrCat(p1.toString(), " CHAIN ", p2.toString()); + // TODO remember variables + } + return ""; + }; + + auto lr = check(left, right); + if (!lr.empty()) { + return lr; + } + auto rl = check(right, left); + if (!rl.empty()) { + return rl; + } + + return ""; +} + // _____________________________________________________________________________ void MaterializedViewWriter::permuteIdTableAndCheckNoLocalVocabEntries( IdTable& block) const { @@ -281,7 +330,8 @@ void MaterializedViewWriter::writeViewMetadata() const { {"columns", (columnNames_ | ql::views::transform([](const Variable& v) { return v.name(); }) | - ::ranges::to>())}}; + ::ranges::to>())}, + {"joinPattern", detectJoinPattern()}}; ad_utility::makeOfstream(getFilenameBase() + ".viewinfo.json") << viewInfo.dump() << std::endl; } diff --git a/src/engine/MaterializedViews.h b/src/engine/MaterializedViews.h index fbc3ed47b1..0b2c8866ff 100644 --- a/src/engine/MaterializedViews.h +++ b/src/engine/MaterializedViews.h @@ -126,6 +126,9 @@ class MaterializedViewWriter { // `CompressedRelationWriter`. Returns the permutation metadata. IndexMetaDataMmap writePermutation(RangeOfIdTables sortedBlocksSPO) const; + // TODO + std::string detectJoinPattern() const; + // Helper for `computeResultAndWritePermutation`: Writes the metadata JSON // files with column names and ordering to disk. void writeViewMetadata() const; From adbb5db7147b48ec3a8b96b205ccb7d10fe6ecb4 Mon Sep 17 00:00:00 2001 From: ullingerc Date: Wed, 14 Jan 2026 14:34:26 +0100 Subject: [PATCH 10/64] more experiments --- src/engine/CMakeLists.txt | 2 +- src/engine/MaterializedViews.cpp | 49 +++++++++----- src/engine/MaterializedViews.h | 11 ++- src/engine/MaterializedViewsQueryAnalysis.cpp | 67 +++++++++++++++++++ src/engine/MaterializedViewsQueryAnalysis.h | 44 ++++++++++++ 5 files changed, 155 insertions(+), 18 deletions(-) create mode 100644 src/engine/MaterializedViewsQueryAnalysis.cpp create mode 100644 src/engine/MaterializedViewsQueryAnalysis.h diff --git a/src/engine/CMakeLists.txt b/src/engine/CMakeLists.txt index 7e575a5cca..2f463c4a7d 100644 --- a/src/engine/CMakeLists.txt +++ b/src/engine/CMakeLists.txt @@ -18,7 +18,7 @@ add_library(engine QueryExecutionContext.cpp ExistsJoin.cpp SparqlProtocol.cpp ParsedRequestBuilder.cpp NeutralOptional.cpp Load.cpp StripColumns.cpp NamedResultCache.cpp ExplicitIdTableOperation.cpp StringMapping.cpp MaterializedViews.cpp - PermutationSelector.cpp) + PermutationSelector.cpp MaterializedViewsQueryAnalysis.cpp) qlever_target_link_libraries(engine util index parser global sparqlExpressions SortPerformanceEstimator Boost::iostreams s2 spatialjoin-dev pb_util pb_util_geo) diff --git a/src/engine/MaterializedViews.cpp b/src/engine/MaterializedViews.cpp index 0bc94cf8ed..836fa72689 100644 --- a/src/engine/MaterializedViews.cpp +++ b/src/engine/MaterializedViews.cpp @@ -113,6 +113,14 @@ MaterializedViewWriter::getIdTableColumnNamesAndPermutation() const { // _____________________________________________________________________________ std::string MaterializedViewWriter::detectJoinPattern() const { + // AD_LOG_INFO << parsedQuery_._rootGraphPattern._graphPatterns.at(0) + // .getBasic() + // ._triples.at(0) + // .asString() + // << std::endl; + // auto parsedQuery = SparqlParser::parseQuery( + // &index_.getImpl().encodedIriManager(), std::move(query), {}); + auto op = qet_->getRootOperation(); auto join = std::dynamic_pointer_cast(op); if (join == nullptr) { @@ -331,7 +339,9 @@ void MaterializedViewWriter::writeViewMetadata() const { return v.name(); }) | ::ranges::to>())}, - {"joinPattern", detectJoinPattern()}}; + {"query", parsedQuery_._originalString}, + // {"joinPattern", detectJoinPattern()} + }; ad_utility::makeOfstream(getFilenameBase() + ".viewinfo.json") << viewInfo.dump() << std::endl; } @@ -340,7 +350,8 @@ void MaterializedViewWriter::writeViewMetadata() const { void MaterializedViewWriter::computeResultAndWritePermutation() const { // Run query and sort the result externally (only if necessary) AD_LOG_INFO << "Computing result for materialized view query " << name_ - << "..." << std::endl; + << ":\n" + << parsedQuery_._originalString << std::endl; auto result = qet_->getResult(true); Sorter spoSorter{getFilenameBase() + ".spo-sorter.dat", numCols(), @@ -394,6 +405,9 @@ MaterializedView::MaterializedView(std::string onDiskBase, std::string name) ColumnIndexAndTypeInfo::PossiblyUndefined}}); } + // Restore original query string. + originalQuery_ = viewInfoJson.at("query").get(); + // Read permutation permutation_->loadFromDisk(filename, false); AD_CORRECTNESS_CHECK(permutation_->isLoaded()); @@ -407,21 +421,24 @@ std::shared_ptr MaterializedView::permutation() const { // _____________________________________________________________________________ void MaterializedViewsManager::loadView(const std::string& name) const { - {auto lock = loadedViews_.wlock(); -if (lock->contains(name)) { - return; -} -lock->insert({name, std::make_shared(onDiskBase_, name)}); -} -// TODO -// if (name == "geom") { -// auto lock = joinPatterns_.wlock(); -// std::string x = (""); -// std::string y = (""); -// lock->insert({SingleChain{x, y}, loadedViews_.rlock()->at(name)}); -// } + auto lock = loadedViews_.wlock(); + auto patternLock = queryPatternCache_.wlock(); + if (lock->contains(name)) { + return; + } + auto view = std::make_shared(onDiskBase_, name); + lock->insert({name, view}); + // Analyzing the view when loading instead of (de)serializing an analysis + // result has the benefit that query analysis can be extended without needing + // to rewrite views. + patternLock->analyzeView(view); + // if (name == "geom") { + // auto lock = joinPatterns_.wlock(); + // std::string x = (""); + // std::string y = (""); + // lock->insert({SingleChain{x, y}, loadedViews_.rlock()->at(name)}); + // } } -; // _____________________________________________________________________________ std::shared_ptr MaterializedViewsManager::getView( diff --git a/src/engine/MaterializedViews.h b/src/engine/MaterializedViews.h index 0b2c8866ff..449d0093c9 100644 --- a/src/engine/MaterializedViews.h +++ b/src/engine/MaterializedViews.h @@ -7,6 +7,7 @@ #ifndef QLEVER_SRC_ENGINE_MATERIALIZEDVIEWS_H_ #define QLEVER_SRC_ENGINE_MATERIALIZEDVIEWS_H_ +#include "engine/MaterializedViewsQueryAnalysis.h" #include "engine/VariableToColumnMap.h" #include "engine/idTable/CompressedExternalIdTable.h" #include "index/DeltaTriples.h" @@ -18,6 +19,7 @@ #include "parser/SparqlTriple.h" #include "rdfTypes/Iri.h" #include "util/HashMap.h" +#include "util/Synchronized.h" // Forward declarations class QueryExecutionContext; @@ -27,7 +29,7 @@ class IndexScan; // For the future, materialized views save their version. If we change something // about the way materialized views are stored, we can break the existing ones // cleanly without breaking the entire index format. -static constexpr size_t MATERIALIZED_VIEWS_VERSION = 1; +static constexpr size_t MATERIALIZED_VIEWS_VERSION = 2; // The `MaterializedViewWriter` can be used to write a new materialized view to // disk, given an already planned query. The query will be executed lazily and @@ -162,6 +164,7 @@ class MaterializedView { Permutation::Enum::SPO, ad_utility::makeUnlimitedAllocator())}; VariableToColumnMap varToColMap_; std::shared_ptr locatedTriplesState_; + std::string originalQuery_; using AdditionalScanColumns = SparqlTripleSimple::AdditionalScanColumns; @@ -183,6 +186,9 @@ class MaterializedView { return varToColMap_; } + // Get the original query string used for writing the view. + const std::string& originalQuery() const { return originalQuery_; } + // Return the combined filename from the index' `onDiskBase` and the name of // the view. Note that this function does not check for validity or existence. static std::string getFilenameBase(std::string_view onDiskBase, @@ -275,6 +281,9 @@ class MaterializedViewsManager { // mutable ad_utility::Synchronized< // ad_utility::HashMap>> // joinPatterns_; + mutable ad_utility::Synchronized< + materializedViewsQueryAnalysis::QueryPatternCache> + queryPatternCache_; public: MaterializedViewsManager() = default; diff --git a/src/engine/MaterializedViewsQueryAnalysis.cpp b/src/engine/MaterializedViewsQueryAnalysis.cpp new file mode 100644 index 0000000000..bad46cb5ef --- /dev/null +++ b/src/engine/MaterializedViewsQueryAnalysis.cpp @@ -0,0 +1,67 @@ +// Copyright 2026 The QLever Authors, in particular: +// +// 2026 Christoph Ullinger , UFR +// +// UFR = University of Freiburg, Chair of Algorithms and Data Structures + +#include "engine/MaterializedViewsQueryAnalysis.h" + +#include + +#include "engine/MaterializedViews.h" +#include "parser/GraphPatternOperation.h" +#include "parser/SparqlParser.h" + +namespace materializedViewsQueryAnalysis { + +// _____________________________________________________________________________ +void QueryPatternCache::analyzeView(ViewPtr view) { + AD_LOG_INFO << view->name() << std::endl; + auto q = view->originalQuery(); + EncodedIriManager e; // TODO currently we dont use this + auto parsed = SparqlParser::parseQuery(&e, q, {}); + + // parsed._rootGraphPattern._graphPatterns.at(0) + // .getBasic() + // ._triples.at(0) + // .asString() + auto graphPatterns = ::ranges::to( + parsed._rootGraphPattern._graphPatterns | + ql::views::filter([](const auto& pattern) { + // it should be safe to ignore certain kinds of graph patterns like + // BIND, VALUES, OPTIONAL (where the values of the other cols dont + // change and no rows are omitted, only possibly repeated) + return !std::holds_alternative(pattern) && + !std::holds_alternative(pattern) && + !std::holds_alternative(pattern); + })); + if (graphPatterns.size() != 1) { + return; + } + const auto& graphPattern = graphPatterns.at(0); + if (!std::holds_alternative(graphPattern)) { + return; + } + const auto& triples = graphPattern.getBasic()._triples; + if (triples.size() == 0) { + return; + } + // TODO Property path is stored as a single predicate here + AD_LOG_INFO << triples.size() << std::endl; + if (triples.size() == 2) { + // Could be chain + } + // Predicate in view + for (const auto& triple : triples) { + auto predicate = triple.getSimplePredicate(); + if (predicate.has_value()) { + if (!predicateInView_.contains(predicate.value())) { + predicateInView_[predicate.value()] = {}; + } + predicateInView_[predicate.value()].push_back(view); + } + } + // TODO other +} + +} // namespace materializedViewsQueryAnalysis diff --git a/src/engine/MaterializedViewsQueryAnalysis.h b/src/engine/MaterializedViewsQueryAnalysis.h new file mode 100644 index 0000000000..87fa16362d --- /dev/null +++ b/src/engine/MaterializedViewsQueryAnalysis.h @@ -0,0 +1,44 @@ +// Copyright 2026 The QLever Authors, in particular: +// +// 2026 Christoph Ullinger , UFR +// +// UFR = University of Freiburg, Chair of Algorithms and Data Structures + +#ifndef QLEVER_SRC_ENGINE_MATERIALIZEDVIEWSQUERYANALYSIS_H_ +#define QLEVER_SRC_ENGINE_MATERIALIZEDVIEWSQUERYANALYSIS_H_ + +#include "rdfTypes/Variable.h" +#include "util/HashMap.h" + +class MaterializedView; + +namespace materializedViewsQueryAnalysis { + +// +using ViewPtr = std::shared_ptr; + +// +using ChainedPredicates = std::pair; +struct ChainInfo { + Variable subject_; + Variable chain_; + Variable object_; + ViewPtr view_; +}; + +class QueryPatternCache { + // Simple chains can be found by direct access into a hash map. + ad_utility::HashMap> + simpleChainCache_; + + // cache for predicates + ad_utility::HashMap> predicateInView_; + + // TODO cache for stars + public: + void analyzeView(ViewPtr view); +}; + +} // namespace materializedViewsQueryAnalysis + +#endif // QLEVER_SRC_ENGINE_MATERIALIZEDVIEWSQUERYANALYSIS_H_ From b3ee50810cc8a77362055016938b167885768f41 Mon Sep 17 00:00:00 2001 From: ullingerc Date: Mon, 19 Jan 2026 12:41:34 +0100 Subject: [PATCH 11/64] first working draft --- src/engine/MaterializedViews.cpp | 54 ++++- src/engine/MaterializedViews.h | 11 +- src/engine/MaterializedViewsQueryAnalysis.cpp | 195 +++++++++++++++--- src/engine/MaterializedViewsQueryAnalysis.h | 42 +++- src/engine/QueryPlanner.cpp | 92 +++++---- src/parser/MaterializedViewQuery.cpp | 6 + src/parser/MaterializedViewQuery.h | 6 +- 7 files changed, 337 insertions(+), 69 deletions(-) diff --git a/src/engine/MaterializedViews.cpp b/src/engine/MaterializedViews.cpp index 836fa72689..56a1bfbf43 100644 --- a/src/engine/MaterializedViews.cpp +++ b/src/engine/MaterializedViews.cpp @@ -15,6 +15,7 @@ #include "engine/IndexScan.h" #include "engine/Join.h" +#include "engine/MaterializedViewsQueryAnalysis.h" #include "engine/QueryExecutionContext.h" #include "engine/QueryExecutionTree.h" #include "engine/VariableToColumnMap.h" @@ -431,7 +432,10 @@ void MaterializedViewsManager::loadView(const std::string& name) const { // Analyzing the view when loading instead of (de)serializing an analysis // result has the benefit that query analysis can be extended without needing // to rewrite views. - patternLock->analyzeView(view); + if (patternLock->analyzeView(view)) { + AD_LOG_INFO << "The materialized view '" << name + << "' was added to the query pattern cache." << std::endl; + } // if (name == "geom") { // auto lock = joinPatterns_.wlock(); // std::string x = (""); @@ -647,7 +651,7 @@ std::shared_ptr MaterializedView::makeIndexScan( // _____________________________________________________________________________ std::shared_ptr MaterializedViewsManager::makeIndexScan( QueryExecutionContext*, const JoinPattern&) const { - auto lock = loadedViews_.rlock(); + // auto lock = loadedViews_.rlock(); // if (lock->contains(joinPattern)) { // auto view = lock->at(joinPattern); // // view->makeIndexScan(qec, joinPattern, ...) @@ -656,6 +660,52 @@ std::shared_ptr MaterializedViewsManager::makeIndexScan( return nullptr; } +// _____________________________________________________________________________ +std::shared_ptr +MaterializedViewsManager::makeSingleChainReplacementIndexScan( + QueryExecutionContext* qec, std::shared_ptr left, + std::shared_ptr right) const { + auto lock = loadedViews_.rlock(); + auto patternLock = queryPatternCache_.rlock(); + AD_LOG_INFO << "makeSingleChainReplacementIndexScan." << std::endl; + auto res = + [&]() -> std::optional { + if (auto c = patternLock->checkSimpleChain(left, right)) { + return c; + } + if (auto c = patternLock->checkSimpleChain(right, left)) { + return c; + } + return std::nullopt; + }(); + + if (!res.has_value()) { + return nullptr; + } + if (res.value().chainInfos_.size() == 0) { + return nullptr; + } + // TODO we should maybe consider all the possible views (could have different + // sorting) + const auto& [subj, chain, obj, view] = res.value().chainInfos_.at(0); + if (!res.value().subject_.isVariable() && + view->variableToColumnMap().at(subj).columnIndex_ != 0) { + // subject of chain is fixed, but subject is not first column of + // materialized view + AD_LOG_INFO + << "We could use view for join but column ordering doesn't match." + << std::endl; + return nullptr; + } + parsedQuery::MaterializedViewQuery::RequestedColumns cols{ + {subj, res.value().subject_}, + {chain, res.value().chain_}, + {obj, res.value().object_}}; + AD_LOG_INFO << "return scan." << std::endl; + return view->makeIndexScan( + qec, parsedQuery::MaterializedViewQuery{view->name(), std::move(cols)}); +} + // _____________________________________________________________________________ std::shared_ptr MaterializedViewsManager::makeIndexScan( QueryExecutionContext* qec, diff --git a/src/engine/MaterializedViews.h b/src/engine/MaterializedViews.h index 449d0093c9..b63d5b1059 100644 --- a/src/engine/MaterializedViews.h +++ b/src/engine/MaterializedViews.h @@ -266,7 +266,11 @@ struct SingleChain { // sorted // // TODO hash // }; -using JoinPattern = std::variant; +// using JoinPattern = std::variant; + +struct JoinPattern { + // TODO only simple chain for now +}; // The `MaterializedViewsManager` is part of the `QueryExecutionContext` and is // used to manage the currently loaded `MaterializedViews` in a `Server` or @@ -313,6 +317,11 @@ class MaterializedViewsManager { // TODO . If no matching -> return nullptr. std::shared_ptr makeIndexScan( QueryExecutionContext* qec, const JoinPattern& joinPattern) const; + + // TODO . If no matching -> return nullptr. + std::shared_ptr makeSingleChainReplacementIndexScan( + QueryExecutionContext* qec, std::shared_ptr left, + std::shared_ptr right) const; }; #endif // QLEVER_SRC_ENGINE_MATERIALIZEDVIEWS_H_ diff --git a/src/engine/MaterializedViewsQueryAnalysis.cpp b/src/engine/MaterializedViewsQueryAnalysis.cpp index bad46cb5ef..bc94be70c0 100644 --- a/src/engine/MaterializedViewsQueryAnalysis.cpp +++ b/src/engine/MaterializedViewsQueryAnalysis.cpp @@ -6,8 +6,10 @@ #include "engine/MaterializedViewsQueryAnalysis.h" +#include #include +#include "engine/IndexScan.h" #include "engine/MaterializedViews.h" #include "parser/GraphPatternOperation.h" #include "parser/SparqlParser.h" @@ -15,53 +17,198 @@ namespace materializedViewsQueryAnalysis { // _____________________________________________________________________________ -void QueryPatternCache::analyzeView(ViewPtr view) { +ad_utility::HashSet getVariablesPresentInBasicGraphPatterns( + const std::vector& graphPatterns) { + ad_utility::HashSet vars; + for (const auto& graphPattern : graphPatterns) { + if (!std::holds_alternative(graphPattern)) { + continue; + } + for (const auto& triple : graphPattern.getBasic()._triples) { + if (triple.s_.isVariable()) { + vars.insert(triple.s_.getVariable()); + } + if (triple.o_.isVariable()) { + vars.insert(triple.o_.getVariable()); + } + if (auto p = triple.getPredicateVariable()) { + vars.insert(p.value()); + } + } + } + return vars; +} + +// _____________________________________________________________________________ +bool BasicGraphPatternsInvariantTo::operator()( + const parsedQuery::Optional&) const { + // TODO + return false; +} + +// _____________________________________________________________________________ +bool BasicGraphPatternsInvariantTo::operator()( + const parsedQuery::Bind& bind) const { + return !variables_.contains(bind._target); +} + +// _____________________________________________________________________________ +bool BasicGraphPatternsInvariantTo::operator()( + const parsedQuery::Values& values) const { + return !std::ranges::any_of( + values._inlineValues._variables, + [this](const auto& var) { return variables_.contains(var); }); +} + +// _____________________________________________________________________________ +std::optional QueryPatternCache::checkSimpleChain( + std::shared_ptr left, std::shared_ptr right) const { + if (!left || !right || !left->predicate().isIri() || + !right->predicate().isIri()) { + return std::nullopt; + } + if (left->object() == right->subject() && + left->subject() != right->object() && left->subject() != left->object() && + right->subject() != right->object() && left->object().isVariable() && + right->object().isVariable()) { + materializedViewsQueryAnalysis::ChainedPredicates preds{ + left->predicate().getIri().toStringRepresentation(), + right->predicate().getIri().toStringRepresentation()}; + if (simpleChainCache_.contains(preds)) { + return UserQueryChain{left->subject(), left->object().getVariable(), + right->object().getVariable(), + simpleChainCache_.at(preds)}; + } + } + return std::nullopt; +} + +// _____________________________________________________________________________ +bool QueryPatternCache::analyzeSimpleChain(ViewPtr view, const SparqlTriple& a, + const SparqlTriple& b) { + // Check predicates. + auto aPred = a.getSimplePredicate(); + if (!aPred.has_value()) { + return false; + } + auto bPred = b.getSimplePredicate(); + if (!bPred.has_value()) { + return false; + } + + // Check variables. + if (!a.s_.isVariable()) { + return false; + } + auto aSubj = a.s_.getVariable(); + + if (!a.o_.isVariable() || a.o_.getVariable() == aSubj) { + return false; + } + auto chainVar = a.o_.getVariable(); + + if (!b.s_.isVariable() || b.s_.getVariable() != chainVar) { + return false; + } + + if (!b.o_.isVariable() || b.o_.getVariable() == chainVar || + b.o_.getVariable() == aSubj) { + return false; + } + auto bObj = b.o_.getVariable(); + + // Insert chain. + ChainedPredicates preds{aPred.value(), bPred.value()}; + AD_LOG_INFO << view->name() << " --> " << aPred.value() << " " + << bPred.value() << " " << aSubj.name() << " " << chainVar.name() + << " " << bObj.name() << std::endl; + if (!simpleChainCache_.contains(preds)) { + simpleChainCache_[preds] = {}; + } + simpleChainCache_[preds].push_back( + ChainInfo{std::move(aSubj), std::move(chainVar), std::move(bObj), view}); + return true; +} + +// _____________________________________________________________________________ +bool QueryPatternCache::analyzeView(ViewPtr view) { AD_LOG_INFO << view->name() << std::endl; auto q = view->originalQuery(); EncodedIriManager e; // TODO currently we dont use this auto parsed = SparqlParser::parseQuery(&e, q, {}); + // TODO do we want to report the reason for non-optimizable queries? + // parsed._rootGraphPattern._graphPatterns.at(0) // .getBasic() // ._triples.at(0) // .asString() - auto graphPatterns = ::ranges::to( - parsed._rootGraphPattern._graphPatterns | - ql::views::filter([](const auto& pattern) { - // it should be safe to ignore certain kinds of graph patterns like - // BIND, VALUES, OPTIONAL (where the values of the other cols dont - // change and no rows are omitted, only possibly repeated) - return !std::holds_alternative(pattern) && - !std::holds_alternative(pattern) && - !std::holds_alternative(pattern); - })); - if (graphPatterns.size() != 1) { - return; + // auto graphPatterns = ::ranges::to( + // parsed._rootGraphPattern._graphPatterns | + // ql::views::filter([](const auto& pattern) { + // // it should be safe to ignore certain kinds of graph patterns like + // // BIND, VALUES, OPTIONAL (where the values of the other cols dont + // // change and no rows are omitted, only possibly repeated) + // // TODO this only works if they contain no variables from the other + // // triples + // return !std::holds_alternative(pattern) && + // !std::holds_alternative(pattern) && + // !std::holds_alternative(pattern); + // })); + const auto& graphPatterns = parsed._rootGraphPattern._graphPatterns; + BasicGraphPatternsInvariantTo invariantCheck{ + getVariablesPresentInBasicGraphPatterns(graphPatterns)}; + // Filter out graph patterns that do not change the result of the basic graph + // pattern analyzed + // TODO (deduplication necessary when reading , the vars should not be in the + // first three) + auto graphPatternsFiltered = + ::ranges::to(parsed._rootGraphPattern._graphPatterns | + ql::views::filter([&](const auto& pattern) { + return !std::visit(invariantCheck, pattern); + })); + if (graphPatternsFiltered.size() != 1) { + return false; } - const auto& graphPattern = graphPatterns.at(0); + const auto& graphPattern = graphPatternsFiltered.at(0); if (!std::holds_alternative(graphPattern)) { - return; + return false; } const auto& triples = graphPattern.getBasic()._triples; if (triples.size() == 0) { - return; + return false; } // TODO Property path is stored as a single predicate here AD_LOG_INFO << triples.size() << std::endl; + bool patternFound = false; + if (triples.size() == 2) { // Could be chain + const auto& a = triples.at(0); + const auto& b = triples.at(1); + if (!analyzeSimpleChain(view, a, b)) { + patternFound = patternFound || analyzeSimpleChain(view, b, a); + } else { + patternFound = true; + } } - // Predicate in view - for (const auto& triple : triples) { - auto predicate = triple.getSimplePredicate(); - if (predicate.has_value()) { - if (!predicateInView_.contains(predicate.value())) { - predicateInView_[predicate.value()] = {}; + + // TODO other patterns + + // Predicate in view - only if any pattern is detected + if (patternFound) { + for (const auto& triple : triples) { + auto predicate = triple.getSimplePredicate(); + if (predicate.has_value()) { + if (!predicateInView_.contains(predicate.value())) { + predicateInView_[predicate.value()] = {}; + } + predicateInView_[predicate.value()].push_back(view); } - predicateInView_[predicate.value()].push_back(view); } } - // TODO other + + return patternFound; } } // namespace materializedViewsQueryAnalysis diff --git a/src/engine/MaterializedViewsQueryAnalysis.h b/src/engine/MaterializedViewsQueryAnalysis.h index 87fa16362d..b32cb22194 100644 --- a/src/engine/MaterializedViewsQueryAnalysis.h +++ b/src/engine/MaterializedViewsQueryAnalysis.h @@ -7,10 +7,15 @@ #ifndef QLEVER_SRC_ENGINE_MATERIALIZEDVIEWSQUERYANALYSIS_H_ #define QLEVER_SRC_ENGINE_MATERIALIZEDVIEWSQUERYANALYSIS_H_ +#include "parser/GraphPatternOperation.h" +#include "parser/SparqlTriple.h" +#include "parser/TripleComponent.h" #include "rdfTypes/Variable.h" #include "util/HashMap.h" +#include "util/TypeTraits.h" class MaterializedView; +class IndexScan; namespace materializedViewsQueryAnalysis { @@ -26,6 +31,33 @@ struct ChainInfo { ViewPtr view_; }; +// +ad_utility::HashSet getVariablesPresentInBasicGraphPatterns( + const std::vector& graphPatterns); + +// +struct BasicGraphPatternsInvariantTo { + ad_utility::HashSet variables_; + + bool operator()(const parsedQuery::Optional& optional) const; + bool operator()(const parsedQuery::Bind& bind) const; + bool operator()(const parsedQuery::Values& values) const; + + CPP_template(typename T)(requires( + !ad_utility::SimilarToAny)) bool + operator()(const T&) const { + return false; + } +}; + +struct UserQueryChain { + TripleComponent subject_; // Allow fixing the subject of the chain + Variable chain_; + Variable object_; + const std::vector& chainInfos_; +}; + class QueryPatternCache { // Simple chains can be found by direct access into a hash map. ad_utility::HashMap> @@ -36,7 +68,15 @@ class QueryPatternCache { // TODO cache for stars public: - void analyzeView(ViewPtr view); + bool analyzeView(ViewPtr view); + + std::optional checkSimpleChain( + std::shared_ptr left, std::shared_ptr right) const; + + private: + // checks only one direction, so call with a-b and b-a + bool analyzeSimpleChain(ViewPtr view, const SparqlTriple& a, + const SparqlTriple& b); }; } // namespace materializedViewsQueryAnalysis diff --git a/src/engine/QueryPlanner.cpp b/src/engine/QueryPlanner.cpp index 81e1058b73..29ca73b776 100644 --- a/src/engine/QueryPlanner.cpp +++ b/src/engine/QueryPlanner.cpp @@ -2540,51 +2540,63 @@ auto QueryPlanner::createMaterializedViewJoinReplacement( return std::dynamic_pointer_cast(tree._qet->getRootOperation()); }; auto aScan = isIndexScan(a); - auto bScan = isIndexScan(b); - if (!aScan || !bScan) { + if (!aScan) { return std::nullopt; } - - // TODO - if (!aScan->predicate().isIri() || !bScan->predicate().isIri()) { + auto bScan = isIndexScan(b); + if (!bScan) { return std::nullopt; } - // TODO andersrum - std::string x = ""; - std::string y = ""; - if (aScan->predicate().getIri().toStringRepresentation() == x && - bScan->predicate().getIri().toStringRepresentation() == y && - aScan->object() == bScan->subject() && - aScan->subject() != bScan->object() && - aScan->subject() != aScan->object()) { - // SingleChain chain{aScan->predicate().getIri(), - // bScan->predicate().getIri()}; - // _qec->materializedViewsManager().makeIndexScan(QueryExecutionContext - // *qec, const parsedQuery::MaterializedViewQuery &viewQuery) - AD_LOG_INFO << "SCAN: " - << aScan->predicate().getIri().toStringRepresentation() - << " CHAIN JOIN " - << bScan->predicate().getIri().toStringRepresentation() - << std::endl; - parsedQuery::MaterializedViewQuery q{ - ad_utility::triple_component::Iri::fromIriref( - "")}; - q.addParameter(SparqlTriple{ - ad_utility::triple_component::Iri::fromIriref(""), - ad_utility::triple_component::Iri::fromIriref(""), - aScan->subject()}); - q.addParameter(SparqlTriple{ - ad_utility::triple_component::Iri::fromIriref(""), - ad_utility::triple_component::Iri::fromIriref(""), - aScan->object()}); - q.addParameter(SparqlTriple{ - ad_utility::triple_component::Iri::fromIriref(""), - ad_utility::triple_component::Iri::fromIriref(""), - bScan->object()}); - return getMaterializedViewIndexScanPlan(q); - } - // + // // TODO + // if (!aScan->predicate().isIri() || !bScan->predicate().isIri()) { + // return std::nullopt; + // } + // // TODO andersrum + // // std::string x = ""; + // // std::string y = ""; + // if ( // if (aScan->predicate().getIri().toStringRepresentation() == x && + // // bScan->predicate().getIri().toStringRepresentation() == y && + // aScan->object() == bScan->subject() && + // aScan->subject() != bScan->object() && + // aScan->subject() != aScan->object() && + // bScan->subject() != bScan->object()) { + // // SingleChain chain{aScan->predicate().getIri(), + // // bScan->predicate().getIri()}; + // // _qec->materializedViewsManager().makeIndexScan(QueryExecutionContext + // // *qec, const parsedQuery::MaterializedViewQuery &viewQuery) + // AD_LOG_INFO << "SCAN: " + // << aScan->predicate().getIri().toStringRepresentation() + // << " CHAIN JOIN " + // << bScan->predicate().getIri().toStringRepresentation() + // << std::endl; + // parsedQuery::MaterializedViewQuery q{ + // ad_utility::triple_component::Iri::fromIriref( + // "")}; + // q.addParameter(SparqlTriple{ + // ad_utility::triple_component::Iri::fromIriref(""), + // ad_utility::triple_component::Iri::fromIriref(""), + // aScan->subject()}); + // q.addParameter(SparqlTriple{ + // ad_utility::triple_component::Iri::fromIriref(""), + // ad_utility::triple_component::Iri::fromIriref(""), + // aScan->object()}); + // q.addParameter(SparqlTriple{ + // ad_utility::triple_component::Iri::fromIriref(""), + // ad_utility::triple_component::Iri::fromIriref(""), + // bScan->object()}); + // return getMaterializedViewIndexScanPlan(q); + // } + // // + auto replacement = + _qec->materializedViewsManager().makeSingleChainReplacementIndexScan( + _qec, aScan, bScan); + if (replacement != nullptr) { + auto plan = makeSubtreePlan(replacement); + // This is equivalent to a join. + mergeSubtreePlanIds(plan, a, b); + return plan; + } return std::nullopt; } diff --git a/src/parser/MaterializedViewQuery.cpp b/src/parser/MaterializedViewQuery.cpp index 7ad56493e9..2ee31d2a4d 100644 --- a/src/parser/MaterializedViewQuery.cpp +++ b/src/parser/MaterializedViewQuery.cpp @@ -92,6 +92,12 @@ MaterializedViewQuery::MaterializedViewQuery(const SparqlTriple& triple) { addRequestedColumn(requestedColumn, simpleTriple.o_); } +// _____________________________________________________________________________ +MaterializedViewQuery::MaterializedViewQuery(std::string name, + RequestedColumns requestedColumns) + : viewName_{std::move(name)}, + requestedColumns_{std::move(requestedColumns)} {}; + // _____________________________________________________________________________ ad_utility::HashSet MaterializedViewQuery::getVarsToKeep() const { ad_utility::HashSet varsToKeep; diff --git a/src/parser/MaterializedViewQuery.h b/src/parser/MaterializedViewQuery.h index 4a44b4f9fc..886e239943 100644 --- a/src/parser/MaterializedViewQuery.h +++ b/src/parser/MaterializedViewQuery.h @@ -47,7 +47,8 @@ struct MaterializedViewQuery : MagicServiceQuery { // column names in the query result or literals/IRIs to restrict the column // on. This can be used for filtering the results and reading any number of // payload columns from the materialized view. - ad_utility::HashMap requestedColumns_; + using RequestedColumns = ad_utility::HashMap; + RequestedColumns requestedColumns_; // This constructor takes an IRI consisting of the magic service IRI for // materialized views with the view name as a suffix. If this is used, add the @@ -58,6 +59,9 @@ struct MaterializedViewQuery : MagicServiceQuery { // are necessary in this case. explicit MaterializedViewQuery(const SparqlTriple& triple); + // For query rewriting: Initialize directly using name and requested columns. + MaterializedViewQuery(std::string name, RequestedColumns requestedColumns); + void addParameter(const SparqlTriple& triple) override; // Return the variables that should be visible from this read on the From 7071e409b6fec0701292d0755cbc306593b8ec93 Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Mon, 19 Jan 2026 12:59:31 +0100 Subject: [PATCH 12/64] Fix build failure --- src/index/IndexImpl.h | 3 +++ test/IndexTest.cpp | 4 ++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/index/IndexImpl.h b/src/index/IndexImpl.h index 2a804d200a..945f98d38e 100644 --- a/src/index/IndexImpl.h +++ b/src/index/IndexImpl.h @@ -225,6 +225,9 @@ class IndexImpl { const auto& OPS() const { return getPermutationImpl(ops_, "OPS"); } const auto& OSP() const { return getPermutationImpl(osp_, "OSP"); } + // Function only exposed for testing. + auto& SPOForTesting() { return const_cast(SPO()); } + static const IndexImpl& staticGlobalSingletonIndex() { AD_CORRECTNESS_CHECK(globalSingletonIndex_ != nullptr); return *globalSingletonIndex_; diff --git a/test/IndexTest.cpp b/test/IndexTest.cpp index 3f149d9d9e..2311d35db8 100644 --- a/test/IndexTest.cpp +++ b/test/IndexTest.cpp @@ -703,8 +703,8 @@ TEST(IndexImpl, recomputeStatistics) { // Simulate scenario where not all permutations are loaded. if (!loadAllPermutations) { // Overwrite with unloaded permutation. - indexImpl.SPO() = Permutation{Permutation::SPO, - ad_utility::makeUnlimitedAllocator()}; + indexImpl.SPOForTesting() = Permutation{ + Permutation::SPO, ad_utility::makeUnlimitedAllocator()}; // Zero out original values. indexImpl.configurationJson_["num-subjects"] = NNAI(0, 0); indexImpl.configurationJson_["num-objects"] = NNAI(0, 0); From e8956f39d4d0d09da99d577612432074b3b95ace Mon Sep 17 00:00:00 2001 From: ullingerc Date: Mon, 19 Jan 2026 15:41:58 +0100 Subject: [PATCH 13/64] comment --- src/engine/MaterializedViewsQueryAnalysis.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/engine/MaterializedViewsQueryAnalysis.cpp b/src/engine/MaterializedViewsQueryAnalysis.cpp index bc94be70c0..533c3257d7 100644 --- a/src/engine/MaterializedViewsQueryAnalysis.cpp +++ b/src/engine/MaterializedViewsQueryAnalysis.cpp @@ -182,6 +182,7 @@ bool QueryPatternCache::analyzeView(ViewPtr view) { AD_LOG_INFO << triples.size() << std::endl; bool patternFound = false; + // TODO what about chain by property path if (triples.size() == 2) { // Could be chain const auto& a = triples.at(0); From 38d2aaf2f1528ba183c0f27ff413829de6be7422 Mon Sep 17 00:00:00 2001 From: ullingerc Date: Tue, 20 Jan 2026 16:53:11 +0100 Subject: [PATCH 14/64] add tests --- test/MaterializedViewsTest.cpp | 55 ++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/test/MaterializedViewsTest.cpp b/test/MaterializedViewsTest.cpp index a6edc99754..2f92427cc4 100644 --- a/test/MaterializedViewsTest.cpp +++ b/test/MaterializedViewsTest.cpp @@ -8,6 +8,7 @@ #include #include "./MaterializedViewsTestHelpers.h" +#include "./QueryPlannerTestHelpers.h" #include "./ServerTestHelpers.h" #include "./util/HttpRequestHelpers.h" #include "engine/IndexScan.h" @@ -740,3 +741,57 @@ TEST_F(MaterializedViewsTestLarge, LazyScan) { EXPECT_EQ(count.getInt(), 2 * numFakeSubjects_); } } + +// _____________________________________________________________________________ +TEST(MaterializedViewsQueryRewriteTest, simpleChain) { + namespace h = queryPlannerTestHelpers; + + // Test dataset and query. + const std::string simpleChain = "SELECT * { ?s ?m . ?m ?o }"; + const std::string simpleChainRenamed = "SELECT * { ?b ?c . ?a ?b }"; + const std::string simpleChainFixed = "SELECT * { / ?c . }"; + const std::string chainTtl = + " . \n" + " . \n" + " . \n" + " . \n" + " \"abc\" ."; + const std::string onDiskBase = "_materializedViewRewriteChain"; + const std::string viewName = "testViewChain"; + + // Initialized libqlever. + materializedViewsTestHelpers::makeTestIndex(onDiskBase, chainTtl); + auto cleanUp = absl::MakeCleanup( + [&]() { materializedViewsTestHelpers::removeTestIndex(onDiskBase); }); + qlever::EngineConfig config; + config.baseName_ = onDiskBase; + qlever::Qlever qlv{config}; + + // Without the materialized view, a regular join is executed. + h::expect(simpleChain, h::Join(h::IndexScanFromStrings("?s", "", "?m"), + h::IndexScanFromStrings("?m", "", "?o"))); + + // Write a chain structure to the materialized view. + MaterializedViewWriter::writeViewToDisk(onDiskBase, viewName, + qlv.parseAndPlanQuery(simpleChain)); + qlv.loadMaterializedView(viewName); + + // With the materialized view loaded, an index scan on the view is performed + // instead of a regular join. + { + auto [qet, qec, parsed] = qlv.parseAndPlanQuery(simpleChain); + EXPECT_THAT(*qet, h::IndexScanFromStrings("?s", "?m", "?o", + {Permutation::Enum::SPO})); + } + { + auto [qet, qec, parsed] = qlv.parseAndPlanQuery(simpleChainRenamed); + EXPECT_THAT(*qet, h::IndexScanFromStrings("?a", "?b", "?c", + {Permutation::Enum::SPO})); + } + { + auto [qet, qec, parsed] = qlv.parseAndPlanQuery(simpleChainFixed); + EXPECT_THAT( + *qet, h::IndexScanFromStrings("", "?_QLever_internal_variable_qp_0", + "?c", {Permutation::Enum::SPO})); + } +} From a73a174709b35bfde152500987e2cb0b32813caf Mon Sep 17 00:00:00 2001 From: ullingerc Date: Tue, 20 Jan 2026 16:53:23 +0100 Subject: [PATCH 15/64] make stuff more nice --- src/engine/MaterializedViews.cpp | 76 +------------------------------- src/engine/MaterializedViews.h | 41 ----------------- src/engine/QueryPlanner.cpp | 67 ++++++++-------------------- src/engine/QueryPlanner.h | 10 ++++- 4 files changed, 28 insertions(+), 166 deletions(-) diff --git a/src/engine/MaterializedViews.cpp b/src/engine/MaterializedViews.cpp index 56a1bfbf43..b596cd4273 100644 --- a/src/engine/MaterializedViews.cpp +++ b/src/engine/MaterializedViews.cpp @@ -112,62 +112,6 @@ MaterializedViewWriter::getIdTableColumnNamesAndPermutation() const { return {std::move(existingCols), numAddEmptyCols}; } -// _____________________________________________________________________________ -std::string MaterializedViewWriter::detectJoinPattern() const { - // AD_LOG_INFO << parsedQuery_._rootGraphPattern._graphPatterns.at(0) - // .getBasic() - // ._triples.at(0) - // .asString() - // << std::endl; - // auto parsedQuery = SparqlParser::parseQuery( - // &index_.getImpl().encodedIriManager(), std::move(query), {}); - - auto op = qet_->getRootOperation(); - auto join = std::dynamic_pointer_cast(op); - if (join == nullptr) { - return ""; - } - const auto& children = join->getChildren(); - AD_CORRECTNESS_CHECK(children.size() == 2); - auto getIndexScan = [](QueryExecutionTree* ptr) { - return std::dynamic_pointer_cast(ptr->getRootOperation()); - }; - auto left = getIndexScan(children.at(0)); - auto right = getIndexScan(children.at(1)); - if (left == nullptr || right == nullptr) { - return ""; - } - - // we are looking for the pattern: - // ?a ?b . - // ?b ?c . - - auto check = [](std::shared_ptr a, - std::shared_ptr b) -> std::string { - auto [s1, p1, o1] = a->triple(); - auto [s2, p2, o2] = b->triple(); - - if (s1.isVariable() && s1 != s2 && s1 != o1 && p1.isIri() && - o1.isVariable() && o1 == s2 && p2.isIri() && o2.isVariable() && - s1 != o2 && o1 != o2) { - return absl::StrCat(p1.toString(), " CHAIN ", p2.toString()); - // TODO remember variables - } - return ""; - }; - - auto lr = check(left, right); - if (!lr.empty()) { - return lr; - } - auto rl = check(right, left); - if (!rl.empty()) { - return rl; - } - - return ""; -} - // _____________________________________________________________________________ void MaterializedViewWriter::permuteIdTableAndCheckNoLocalVocabEntries( IdTable& block) const { @@ -341,7 +285,6 @@ void MaterializedViewWriter::writeViewMetadata() const { }) | ::ranges::to>())}, {"query", parsedQuery_._originalString}, - // {"joinPattern", detectJoinPattern()} }; ad_utility::makeOfstream(getFilenameBase() + ".viewinfo.json") << viewInfo.dump() << std::endl; @@ -436,12 +379,6 @@ void MaterializedViewsManager::loadView(const std::string& name) const { AD_LOG_INFO << "The materialized view '" << name << "' was added to the query pattern cache." << std::endl; } - // if (name == "geom") { - // auto lock = joinPatterns_.wlock(); - // std::string x = (""); - // std::string y = (""); - // lock->insert({SingleChain{x, y}, loadedViews_.rlock()->at(name)}); - // } } // _____________________________________________________________________________ @@ -648,18 +585,6 @@ std::shared_ptr MaterializedView::makeIndexScan( viewQuery.getVarsToKeep()); } -// _____________________________________________________________________________ -std::shared_ptr MaterializedViewsManager::makeIndexScan( - QueryExecutionContext*, const JoinPattern&) const { - // auto lock = loadedViews_.rlock(); - // if (lock->contains(joinPattern)) { - // auto view = lock->at(joinPattern); - // // view->makeIndexScan(qec, joinPattern, ...) - // // we need join pattern, var names from query,. - // } - return nullptr; -} - // _____________________________________________________________________________ std::shared_ptr MaterializedViewsManager::makeSingleChainReplacementIndexScan( @@ -685,6 +610,7 @@ MaterializedViewsManager::makeSingleChainReplacementIndexScan( if (res.value().chainInfos_.size() == 0) { return nullptr; } + // TODO we should maybe consider all the possible views (could have different // sorting) const auto& [subj, chain, obj, view] = res.value().chainInfos_.at(0); diff --git a/src/engine/MaterializedViews.h b/src/engine/MaterializedViews.h index b63d5b1059..d87c24c4da 100644 --- a/src/engine/MaterializedViews.h +++ b/src/engine/MaterializedViews.h @@ -128,9 +128,6 @@ class MaterializedViewWriter { // `CompressedRelationWriter`. Returns the permutation metadata. IndexMetaDataMmap writePermutation(RangeOfIdTables sortedBlocksSPO) const; - // TODO - std::string detectJoinPattern() const; - // Helper for `computeResultAndWritePermutation`: Writes the metadata JSON // files with column names and ordering to disk. void writeViewMetadata() const; @@ -241,37 +238,6 @@ class MaterializedView { const parsedQuery::MaterializedViewQuery& viewQuery) const; }; -// -struct SingleChain { - // ?something ?x . - std::string predJoinObj_; - // ?x ?something_else . - std::string predJoinSubj_; - - QL_DEFINE_DEFAULTED_EQUALITY_OPERATOR_CONSTEXPR(SingleChain); - - CPP_template(typename H, typename G)( - requires ql::concepts::same_as) friend H - AbslHashValue(H h, const G& g) { - return H::combine(std::move(h), g.predJoinObj_, g.predJoinSubj_); - } - - // Serialize to JSON - // Deserialize from JSON - // Detect -}; -// Hashing doesn't work for stars if we only want a part of the star... -// struct Star { -// std::vector predicates_; // Should be -// sorted -// // TODO hash -// }; -// using JoinPattern = std::variant; - -struct JoinPattern { - // TODO only simple chain for now -}; - // The `MaterializedViewsManager` is part of the `QueryExecutionContext` and is // used to manage the currently loaded `MaterializedViews` in a `Server` or // `Qlever` instance. @@ -281,10 +247,6 @@ class MaterializedViewsManager { mutable ad_utility::Synchronized< ad_utility::HashMap>> loadedViews_; - // TODO multiple? - // mutable ad_utility::Synchronized< - // ad_utility::HashMap>> - // joinPatterns_; mutable ad_utility::Synchronized< materializedViewsQueryAnalysis::QueryPatternCache> queryPatternCache_; @@ -314,9 +276,6 @@ class MaterializedViewsManager { std::shared_ptr makeIndexScan( QueryExecutionContext* qec, const parsedQuery::MaterializedViewQuery& viewQuery) const; - // TODO . If no matching -> return nullptr. - std::shared_ptr makeIndexScan( - QueryExecutionContext* qec, const JoinPattern& joinPattern) const; // TODO . If no matching -> return nullptr. std::shared_ptr makeSingleChainReplacementIndexScan( diff --git a/src/engine/QueryPlanner.cpp b/src/engine/QueryPlanner.cpp index 29ca73b776..fea24fc813 100644 --- a/src/engine/QueryPlanner.cpp +++ b/src/engine/QueryPlanner.cpp @@ -2248,9 +2248,11 @@ std::vector QueryPlanner::createJoinCandidates( candidates.push_back(std::move(plan)); } - // TODO this should be replaced by triple graph version below . st we do not - // check the same repeatedly. - if (auto opt = createMaterializedViewJoinReplacement(a, b, jcs)) { + // If the given plans are two index scans and we would join them on a single + // column, we check for a possible replacement with a materialized view. + // TODO Other join types should be checked in + // `optimizeCommutatively` + if (auto opt = createMaterializedViewSimpleJoinReplacement(a, b, jcs)) { candidates.push_back(std::move(opt.value())); } @@ -2533,9 +2535,14 @@ auto QueryPlanner::createJoinWithTransitivePath(const SubtreePlan& a, } // _____________________________________________________________________________ -auto QueryPlanner::createMaterializedViewJoinReplacement( - const SubtreePlan& a, const SubtreePlan& b, const JoinColumns&) const +auto QueryPlanner::createMaterializedViewSimpleJoinReplacement( + const SubtreePlan& a, const SubtreePlan& b, const JoinColumns& jcs) const -> std::optional { + if (jcs.size() != 1) { + return std::nullopt; + } + + // Both children must be index scans for the simple materialized view rewrite. auto isIndexScan = [](const auto& tree) -> std::shared_ptr { return std::dynamic_pointer_cast(tree._qet->getRootOperation()); }; @@ -2548,52 +2555,16 @@ auto QueryPlanner::createMaterializedViewJoinReplacement( return std::nullopt; } - // // TODO - // if (!aScan->predicate().isIri() || !bScan->predicate().isIri()) { - // return std::nullopt; - // } - // // TODO andersrum - // // std::string x = ""; - // // std::string y = ""; - // if ( // if (aScan->predicate().getIri().toStringRepresentation() == x && - // // bScan->predicate().getIri().toStringRepresentation() == y && - // aScan->object() == bScan->subject() && - // aScan->subject() != bScan->object() && - // aScan->subject() != aScan->object() && - // bScan->subject() != bScan->object()) { - // // SingleChain chain{aScan->predicate().getIri(), - // // bScan->predicate().getIri()}; - // // _qec->materializedViewsManager().makeIndexScan(QueryExecutionContext - // // *qec, const parsedQuery::MaterializedViewQuery &viewQuery) - // AD_LOG_INFO << "SCAN: " - // << aScan->predicate().getIri().toStringRepresentation() - // << " CHAIN JOIN " - // << bScan->predicate().getIri().toStringRepresentation() - // << std::endl; - // parsedQuery::MaterializedViewQuery q{ - // ad_utility::triple_component::Iri::fromIriref( - // "")}; - // q.addParameter(SparqlTriple{ - // ad_utility::triple_component::Iri::fromIriref(""), - // ad_utility::triple_component::Iri::fromIriref(""), - // aScan->subject()}); - // q.addParameter(SparqlTriple{ - // ad_utility::triple_component::Iri::fromIriref(""), - // ad_utility::triple_component::Iri::fromIriref(""), - // aScan->object()}); - // q.addParameter(SparqlTriple{ - // ad_utility::triple_component::Iri::fromIriref(""), - // ad_utility::triple_component::Iri::fromIriref(""), - // bScan->object()}); - // return getMaterializedViewIndexScanPlan(q); - // } - // // + // Try to construct an alternative join plan using the + // `MaterializedViewsManager`. So far only single chain joins (that is + // `?s / ?o` or `?s ?m . ?m ?o`) are supported. auto replacement = _qec->materializedViewsManager().makeSingleChainReplacementIndexScan( _qec, aScan, bScan); if (replacement != nullptr) { auto plan = makeSubtreePlan(replacement); - // This is equivalent to a join. + // This is equivalent to a join between both plans, so we must mark all + // included nodes. mergeSubtreePlanIds(plan, a, b); return plan; } @@ -3379,8 +3350,8 @@ void QueryPlanner::GraphPatternPlanner::visitSubquery( // _______________________________________________________________ void QueryPlanner::GraphPatternPlanner::optimizeCommutatively() { auto tg = planner_.createTripleGraph(&candidateTriples_); - // tg._adjLists; - // TODO somehow add candidates for matviews + // TODO Further optimizations for replacing more complex joins with + // materialized view scans. auto lastRow = planner_ .fillDpTab(tg, rootPattern_->_filters, rootPattern_->textLimits_, candidatePlans_) diff --git a/src/engine/QueryPlanner.h b/src/engine/QueryPlanner.h index 1aeacf7cdf..5aae7d5d13 100644 --- a/src/engine/QueryPlanner.h +++ b/src/engine/QueryPlanner.h @@ -450,8 +450,14 @@ class QueryPlanner { const SubtreePlan& b, const JoinColumns& jcs); - // TODO - std::optional createMaterializedViewJoinReplacement( + // Helper that checks if `a` and `b` are `IndexScan` plans and the + // `MaterializedViewManager` has a replacement for a simple join between them. + // Returns an `IndexScan` plan on the respective materialized view, if + // available. + // TODO Add a second version which detects more complex joins from + // a `BasicGraphPattern` (and plans the non-optimized parts with another + // `GraphPatternPlanner`). + std::optional createMaterializedViewSimpleJoinReplacement( const SubtreePlan& a, const SubtreePlan& b, const JoinColumns& jcs) const; vector getOrderByRow( From 062e408ecec72b2db2500c3ecff625b7de6dbeb6 Mon Sep 17 00:00:00 2001 From: ullingerc Date: Tue, 20 Jan 2026 17:14:35 +0100 Subject: [PATCH 16/64] further clean up --- src/engine/MaterializedViews.cpp | 49 ++++++++++-------- src/engine/MaterializedViewsQueryAnalysis.cpp | 51 +++++++------------ src/engine/MaterializedViewsQueryAnalysis.h | 32 +++++++++--- 3 files changed, 71 insertions(+), 61 deletions(-) diff --git a/src/engine/MaterializedViews.cpp b/src/engine/MaterializedViews.cpp index b596cd4273..f8ed9e9b21 100644 --- a/src/engine/MaterializedViews.cpp +++ b/src/engine/MaterializedViews.cpp @@ -592,42 +592,49 @@ MaterializedViewsManager::makeSingleChainReplacementIndexScan( std::shared_ptr right) const { auto lock = loadedViews_.rlock(); auto patternLock = queryPatternCache_.rlock(); - AD_LOG_INFO << "makeSingleChainReplacementIndexScan." << std::endl; - auto res = + + // Try both possible orderings of the join's children. + auto userQueryChain = [&]() -> std::optional { - if (auto c = patternLock->checkSimpleChain(left, right)) { - return c; + if (auto chain = patternLock->checkSimpleChain(left, right)) { + return chain; } - if (auto c = patternLock->checkSimpleChain(right, left)) { - return c; + if (auto chain = patternLock->checkSimpleChain(right, left)) { + return chain; } return std::nullopt; }(); - if (!res.has_value()) { + // The join between `left` and `right` does not constitue a chain. + if (!userQueryChain.has_value()) { return nullptr; } - if (res.value().chainInfos_.size() == 0) { + if (userQueryChain.value().chainInfos_.size() == 0) { return nullptr; } - // TODO we should maybe consider all the possible views (could have different - // sorting) - const auto& [subj, chain, obj, view] = res.value().chainInfos_.at(0); - if (!res.value().subject_.isVariable() && + // TODO We should consider all the possible views for the + // combination of predicates. They could have different sorting. + const auto& [subj, chain, obj, view] = + userQueryChain.value().chainInfos_.at(0); + + // Ensure the subject is the first column if it is fixed. + if (!userQueryChain.value().subject_.isVariable() && view->variableToColumnMap().at(subj).columnIndex_ != 0) { - // subject of chain is fixed, but subject is not first column of - // materialized view - AD_LOG_INFO - << "We could use view for join but column ordering doesn't match." - << std::endl; + AD_LOG_INFO << "We could use the materialized view '" << view->name() + << "' for the join on '" << chain.name() + << "', however the subject is fixed to '" + << userQueryChain.value().subject_.toRdfLiteral() + << "' and column '" << subj.name() + << "' is not the first column of the view." << std::endl; return nullptr; } + + // Construct requested columns for scan specification. parsedQuery::MaterializedViewQuery::RequestedColumns cols{ - {subj, res.value().subject_}, - {chain, res.value().chain_}, - {obj, res.value().object_}}; - AD_LOG_INFO << "return scan." << std::endl; + {subj, userQueryChain.value().subject_}, + {chain, userQueryChain.value().chain_}, + {obj, userQueryChain.value().object_}}; return view->makeIndexScan( qec, parsedQuery::MaterializedViewQuery{view->name(), std::move(cols)}); } diff --git a/src/engine/MaterializedViewsQueryAnalysis.cpp b/src/engine/MaterializedViewsQueryAnalysis.cpp index 533c3257d7..8bbeb3d1ea 100644 --- a/src/engine/MaterializedViewsQueryAnalysis.cpp +++ b/src/engine/MaterializedViewsQueryAnalysis.cpp @@ -42,7 +42,8 @@ ad_utility::HashSet getVariablesPresentInBasicGraphPatterns( // _____________________________________________________________________________ bool BasicGraphPatternsInvariantTo::operator()( const parsedQuery::Optional&) const { - // TODO + // TODO Analyze if the optional binds values from the outside + // query. return false; } @@ -117,11 +118,8 @@ bool QueryPatternCache::analyzeSimpleChain(ViewPtr view, const SparqlTriple& a, } auto bObj = b.o_.getVariable(); - // Insert chain. + // Insert chain to cache. ChainedPredicates preds{aPred.value(), bPred.value()}; - AD_LOG_INFO << view->name() << " --> " << aPred.value() << " " - << bPred.value() << " " << aSubj.name() << " " << chainVar.name() - << " " << bObj.name() << std::endl; if (!simpleChainCache_.contains(preds)) { simpleChainCache_[preds] = {}; } @@ -134,34 +132,22 @@ bool QueryPatternCache::analyzeSimpleChain(ViewPtr view, const SparqlTriple& a, bool QueryPatternCache::analyzeView(ViewPtr view) { AD_LOG_INFO << view->name() << std::endl; auto q = view->originalQuery(); - EncodedIriManager e; // TODO currently we dont use this + // We do not need the `EncodedIriManager` because we are only interested in + // analyzing the query structure, not in converting its components to + // `ValueId`s. + EncodedIriManager e; auto parsed = SparqlParser::parseQuery(&e, q, {}); - // TODO do we want to report the reason for non-optimizable queries? - - // parsed._rootGraphPattern._graphPatterns.at(0) - // .getBasic() - // ._triples.at(0) - // .asString() - // auto graphPatterns = ::ranges::to( - // parsed._rootGraphPattern._graphPatterns | - // ql::views::filter([](const auto& pattern) { - // // it should be safe to ignore certain kinds of graph patterns like - // // BIND, VALUES, OPTIONAL (where the values of the other cols dont - // // change and no rows are omitted, only possibly repeated) - // // TODO this only works if they contain no variables from the other - // // triples - // return !std::holds_alternative(pattern) && - // !std::holds_alternative(pattern) && - // !std::holds_alternative(pattern); - // })); + // TODO Do we want to report the reason for non-optimizable + // queries? + const auto& graphPatterns = parsed._rootGraphPattern._graphPatterns; BasicGraphPatternsInvariantTo invariantCheck{ getVariablesPresentInBasicGraphPatterns(graphPatterns)}; // Filter out graph patterns that do not change the result of the basic graph - // pattern analyzed - // TODO (deduplication necessary when reading , the vars should not be in the - // first three) + // pattern analyzed. + // TODO Deduplication necessary when reading, the variables should + // not be in the first three auto graphPatternsFiltered = ::ranges::to(parsed._rootGraphPattern._graphPatterns | ql::views::filter([&](const auto& pattern) { @@ -174,17 +160,15 @@ bool QueryPatternCache::analyzeView(ViewPtr view) { if (!std::holds_alternative(graphPattern)) { return false; } + // TODO Property path is stored as a single predicate here. const auto& triples = graphPattern.getBasic()._triples; if (triples.size() == 0) { return false; } - // TODO Property path is stored as a single predicate here - AD_LOG_INFO << triples.size() << std::endl; bool patternFound = false; - // TODO what about chain by property path + // TODO Possibly handle chain by property path. if (triples.size() == 2) { - // Could be chain const auto& a = triples.at(0); const auto& b = triples.at(1); if (!analyzeSimpleChain(view, a, b)) { @@ -194,9 +178,10 @@ bool QueryPatternCache::analyzeView(ViewPtr view) { } } - // TODO other patterns + // TODO Add support for other patterns, in particular, stars. - // Predicate in view - only if any pattern is detected + // Remember predicates that appear in certain views, only if any pattern is + // detected. if (patternFound) { for (const auto& triple : triples) { auto predicate = triple.getSimplePredicate(); diff --git a/src/engine/MaterializedViewsQueryAnalysis.h b/src/engine/MaterializedViewsQueryAnalysis.h index b32cb22194..72717fd221 100644 --- a/src/engine/MaterializedViewsQueryAnalysis.h +++ b/src/engine/MaterializedViewsQueryAnalysis.h @@ -14,15 +14,17 @@ #include "util/HashMap.h" #include "util/TypeTraits.h" +// Forward declarations to prevent cyclic dependencies. class MaterializedView; class IndexScan; +// _____________________________________________________________________________ namespace materializedViewsQueryAnalysis { -// using ViewPtr = std::shared_ptr; -// +// Key and value types of the cache for simple chains, that is queries of the +// form `?s ?m . ?m ?o`. using ChainedPredicates = std::pair; struct ChainInfo { Variable subject_; @@ -31,11 +33,14 @@ struct ChainInfo { ViewPtr view_; }; -// +// Extract all variables present in a set of graph patterns. Required for +// `BasicGraphPatternsInvariantTo` below. ad_utility::HashSet getVariablesPresentInBasicGraphPatterns( const std::vector& graphPatterns); -// +// Check whether certain graph patterns can be ignored as they do not affect the +// result of a query when we are only interested in the bindings for variables +// from `variables_`. struct BasicGraphPatternsInvariantTo { ad_utility::HashSet variables_; @@ -51,30 +56,43 @@ struct BasicGraphPatternsInvariantTo { } }; +// Similar to `ChainInfo`, this struct represents a simple chain, however it may +// bind the subject. struct UserQueryChain { TripleComponent subject_; // Allow fixing the subject of the chain Variable chain_; Variable object_; + // TODO Switch to `shared_ptr` const std::vector& chainInfos_; }; +// Cache data structure for the `MaterializedViewsManager`. This object can be +// used for quickly looking up if a given query can be optimized by making use +// of an exisiting materialized view. class QueryPatternCache { // Simple chains can be found by direct access into a hash map. ad_utility::HashMap> simpleChainCache_; - // cache for predicates + // Cache for predicates appearing in a materialized view. ad_utility::HashMap> predicateInView_; - // TODO cache for stars + // TODO Data structure for join stars. public: + // Given a materialized view, analyze its write query and populate the cache. + // This is called from `MaterializedViewsManager::loadView`. bool analyzeView(ViewPtr view); + // Check if a simple chain on the two `IndexScan`s given can be optimized by + // any loaded materialized views. std::optional checkSimpleChain( std::shared_ptr left, std::shared_ptr right) const; private: - // checks only one direction, so call with a-b and b-a + // Helper for `analyzeView`, that checks for a simple chain. It returns `true` + // iff a simple chain `a->b` is present. + // NOTE: This function only checks only one direction, so it should also be + // called with `a` and `b` switched if it returns `false`. bool analyzeSimpleChain(ViewPtr view, const SparqlTriple& a, const SparqlTriple& b); }; From e71b605efc2f85cd2c9064359aaea57d6085f233 Mon Sep 17 00:00:00 2001 From: ullingerc Date: Tue, 20 Jan 2026 17:18:45 +0100 Subject: [PATCH 17/64] fix spelling --- src/engine/MaterializedViews.cpp | 2 +- src/engine/MaterializedViewsQueryAnalysis.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/engine/MaterializedViews.cpp b/src/engine/MaterializedViews.cpp index f8ed9e9b21..623441ed3f 100644 --- a/src/engine/MaterializedViews.cpp +++ b/src/engine/MaterializedViews.cpp @@ -605,7 +605,7 @@ MaterializedViewsManager::makeSingleChainReplacementIndexScan( return std::nullopt; }(); - // The join between `left` and `right` does not constitue a chain. + // The join between `left` and `right` does not constitute a chain. if (!userQueryChain.has_value()) { return nullptr; } diff --git a/src/engine/MaterializedViewsQueryAnalysis.h b/src/engine/MaterializedViewsQueryAnalysis.h index 72717fd221..aac5b563c5 100644 --- a/src/engine/MaterializedViewsQueryAnalysis.h +++ b/src/engine/MaterializedViewsQueryAnalysis.h @@ -68,7 +68,7 @@ struct UserQueryChain { // Cache data structure for the `MaterializedViewsManager`. This object can be // used for quickly looking up if a given query can be optimized by making use -// of an exisiting materialized view. +// of an existing materialized view. class QueryPatternCache { // Simple chains can be found by direct access into a hash map. ad_utility::HashMap> From d4c21909a1ec9b44dfa6af6265dcc3c74a441ed4 Mon Sep 17 00:00:00 2001 From: ullingerc Date: Wed, 21 Jan 2026 11:01:14 +0100 Subject: [PATCH 18/64] initialize to zero --- src/engine/MaterializedViews.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/engine/MaterializedViews.cpp b/src/engine/MaterializedViews.cpp index a5dd7a4538..0b9cb4963e 100644 --- a/src/engine/MaterializedViews.cpp +++ b/src/engine/MaterializedViews.cpp @@ -122,6 +122,12 @@ void MaterializedViewWriter::permuteIdTableAndCheckNoLocalVocabEntries( // Add empty columns such that the view has at least four columns. for (uint8_t i = 0; i < numAddEmptyColumns_; ++i) { block.addEmptyColumn(); + // Initialize the new empty column to `UNDEF` (all bits zero) such that it + // can be compressed optimally. + const size_t col = block.numColumns() - 1; + for (size_t row = 0; row < block.numRows(); ++row) { + block.at(row, col) = ValueId::makeUndefined(); + } } // Check that there are no values of type `LocalVocabIndex` in the selected From 3dd5f871ca2b24638dae130f1fcc09acd1ee9dca Mon Sep 17 00:00:00 2001 From: ullingerc Date: Wed, 21 Jan 2026 11:19:02 +0100 Subject: [PATCH 19/64] Save original query string when writing a materialized view --- src/engine/MaterializedViews.cpp | 25 ++++++++++++++++++------- src/engine/MaterializedViews.h | 6 +++++- test/MaterializedViewsTest.cpp | 19 +++++++++++++++++++ 3 files changed, 42 insertions(+), 8 deletions(-) diff --git a/src/engine/MaterializedViews.cpp b/src/engine/MaterializedViews.cpp index ea6f482e70..1ece944bc6 100644 --- a/src/engine/MaterializedViews.cpp +++ b/src/engine/MaterializedViews.cpp @@ -258,29 +258,31 @@ IndexMetaDataMmap MaterializedViewWriter::writePermutation( // _____________________________________________________________________________ void MaterializedViewWriter::writeViewMetadata() const { - // Export column names to view info JSON file + // Export column names to view info JSON file. nlohmann::json viewInfo = { {"version", MATERIALIZED_VIEWS_VERSION}, {"columns", (columnNames_ | ql::views::transform([](const Variable& v) { return v.name(); }) | - ::ranges::to>())}}; + ::ranges::to>())}, + {"query", parsedQuery_._originalString}, + }; ad_utility::makeOfstream(getFilenameBase() + ".viewinfo.json") << viewInfo.dump() << std::endl; } // _____________________________________________________________________________ void MaterializedViewWriter::computeResultAndWritePermutation() const { - // Run query and sort the result externally (only if necessary) - AD_LOG_INFO << "Computing result for materialized view query " << name_ - << "..." << std::endl; + // Run query and sort the result externally (only if necessary). + AD_LOG_INFO << "Computing query result for materialized view '" << name_ + << "': " << parsedQuery_._originalString << std::endl; auto result = qet_->getResult(true); Sorter spoSorter{getFilenameBase() + ".spo-sorter.dat", numCols(), memoryLimit_, allocator_}; RangeOfIdTables sortedBlocksSPO = getSortedBlocks(spoSorter, result); - // Write compressed relation to disk + // Write compressed relation to disk. AD_LOG_INFO << "Writing materialized view " << name_ << " to disk ..." << std::endl; auto spoMetaData = writePermutation(std::move(sortedBlocksSPO)); @@ -316,7 +318,13 @@ MaterializedView::MaterializedView(std::string onDiskBase, std::string name) // Check version of view and restore column names auto version = viewInfoJson.at("version").get(); - AD_CORRECTNESS_CHECK(version == MATERIALIZED_VIEWS_VERSION); + if (version != MATERIALIZED_VIEWS_VERSION) { + throw std::runtime_error{absl::StrCat( + "The materialized view '", name_, "' is saved with format version ", + version, ", however this version of QLever expects format version ", + MATERIALIZED_VIEWS_VERSION, + ". Please re-write the materialized view.")}; + } // Make variable to column map auto columnNames = viewInfoJson.at("columns").get>(); @@ -330,6 +338,9 @@ MaterializedView::MaterializedView(std::string onDiskBase, std::string name) ColumnIndexAndTypeInfo::PossiblyUndefined}}); } + // Restore original query string. + originalQuery_ = viewInfoJson.at("query").get(); + // Read permutation permutation_->loadFromDisk(filename, false); AD_CORRECTNESS_CHECK(permutation_->isLoaded()); diff --git a/src/engine/MaterializedViews.h b/src/engine/MaterializedViews.h index 5b3ad0082c..16b93bf907 100644 --- a/src/engine/MaterializedViews.h +++ b/src/engine/MaterializedViews.h @@ -26,7 +26,7 @@ class IndexScan; // For the future, materialized views save their version. If we change something // about the way materialized views are stored, we can break the existing ones // cleanly without breaking the entire index format. -static constexpr size_t MATERIALIZED_VIEWS_VERSION = 1; +static constexpr size_t MATERIALIZED_VIEWS_VERSION = 2; // The `MaterializedViewWriter` can be used to write a new materialized view to // disk, given an already planned query. The query will be executed lazily and @@ -149,6 +149,7 @@ class MaterializedView { Permutation::Enum::SPO, ad_utility::makeUnlimitedAllocator())}; VariableToColumnMap varToColMap_; std::shared_ptr locatedTriplesState_; + std::string originalQuery_; using AdditionalScanColumns = SparqlTripleSimple::AdditionalScanColumns; @@ -170,6 +171,9 @@ class MaterializedView { return varToColMap_; } + // Get the original query string used for writing the view. + const std::string& originalQuery() const { return originalQuery_; } + // Return the combined filename from the index' `onDiskBase` and the name of // the view. Note that this function does not check for validity or existence. static std::string getFilenameBase(std::string_view onDiskBase, diff --git a/test/MaterializedViewsTest.cpp b/test/MaterializedViewsTest.cpp index 30f20c421e..5c53e3e6e7 100644 --- a/test/MaterializedViewsTest.cpp +++ b/test/MaterializedViewsTest.cpp @@ -376,6 +376,7 @@ TEST_F(MaterializedViewsTest, ManualConfigurations) { EXPECT_EQ(view->name(), "testView1"); EXPECT_EQ(view->permutation()->permutation(), Permutation::Enum::SPO); EXPECT_NE(view->locatedTriplesState(), nullptr); + EXPECT_EQ(view->originalQuery(), simpleWriteQuery_); MaterializedViewsManager managerNoBaseName; AD_EXPECT_THROW_WITH_MESSAGE( @@ -525,6 +526,24 @@ TEST_F(MaterializedViewsTest, ManualConfigurations) { ::testing::UnorderedElementsAre(::testing::Eq(V{"?s"}), ::testing::Eq(V{"?o"}))); } + + // Unsupported format version. + { + auto plan = qlv().parseAndPlanQuery(simpleWriteQuery_); + MaterializedViewWriter::writeViewToDisk(testIndexBase_, "testView5", plan); + { + // Write fake view metadata with unsupported version. + nlohmann::json viewInfo = {{"version", 0}}; + ad_utility::makeOfstream( + "_materializedViewsTestIndex.view.testView5.viewinfo.json") + << viewInfo.dump() << std::endl; + } + AD_EXPECT_THROW_WITH_MESSAGE( + MaterializedView(testIndexBase_, "testView5"), + ::testing::HasSubstr( + "The materialized view 'testView5' is saved with format version " + "0, however this version of QLever expects")); + } } // _____________________________________________________________________________ From e4c1bde64426fb916415b39ef537f28640285f05 Mon Sep 17 00:00:00 2001 From: ullingerc Date: Wed, 21 Jan 2026 13:59:36 +0100 Subject: [PATCH 20/64] apply Johannes' feedback --- src/engine/MaterializedViews.cpp | 7 +++++-- src/engine/MaterializedViews.h | 8 +++++--- test/MaterializedViewsTest.cpp | 3 ++- 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/src/engine/MaterializedViews.cpp b/src/engine/MaterializedViews.cpp index 1ece944bc6..20309b9df5 100644 --- a/src/engine/MaterializedViews.cpp +++ b/src/engine/MaterializedViews.cpp @@ -275,7 +275,8 @@ void MaterializedViewWriter::writeViewMetadata() const { void MaterializedViewWriter::computeResultAndWritePermutation() const { // Run query and sort the result externally (only if necessary). AD_LOG_INFO << "Computing query result for materialized view '" << name_ - << "': " << parsedQuery_._originalString << std::endl; + << "': " << parsedQuery_._originalString.substr(0, 80) << "..." + << std::endl; auto result = qet_->getResult(true); Sorter spoSorter{getFilenameBase() + ".spo-sorter.dat", numCols(), @@ -339,7 +340,9 @@ MaterializedView::MaterializedView(std::string onDiskBase, std::string name) } // Restore original query string. - originalQuery_ = viewInfoJson.at("query").get(); + if (viewInfoJson.contains("query")) { + originalQuery_ = viewInfoJson.at("query").get(); + } // Read permutation permutation_->loadFromDisk(filename, false); diff --git a/src/engine/MaterializedViews.h b/src/engine/MaterializedViews.h index 16b93bf907..e28ad53167 100644 --- a/src/engine/MaterializedViews.h +++ b/src/engine/MaterializedViews.h @@ -26,7 +26,7 @@ class IndexScan; // For the future, materialized views save their version. If we change something // about the way materialized views are stored, we can break the existing ones // cleanly without breaking the entire index format. -static constexpr size_t MATERIALIZED_VIEWS_VERSION = 2; +static constexpr size_t MATERIALIZED_VIEWS_VERSION = 1; // The `MaterializedViewWriter` can be used to write a new materialized view to // disk, given an already planned query. The query will be executed lazily and @@ -149,7 +149,7 @@ class MaterializedView { Permutation::Enum::SPO, ad_utility::makeUnlimitedAllocator())}; VariableToColumnMap varToColMap_; std::shared_ptr locatedTriplesState_; - std::string originalQuery_; + std::optional originalQuery_; using AdditionalScanColumns = SparqlTripleSimple::AdditionalScanColumns; @@ -172,7 +172,9 @@ class MaterializedView { } // Get the original query string used for writing the view. - const std::string& originalQuery() const { return originalQuery_; } + const std::optional& originalQuery() const { + return originalQuery_; + } // Return the combined filename from the index' `onDiskBase` and the name of // the view. Note that this function does not check for validity or existence. diff --git a/test/MaterializedViewsTest.cpp b/test/MaterializedViewsTest.cpp index 5c53e3e6e7..98d6b668f4 100644 --- a/test/MaterializedViewsTest.cpp +++ b/test/MaterializedViewsTest.cpp @@ -376,7 +376,8 @@ TEST_F(MaterializedViewsTest, ManualConfigurations) { EXPECT_EQ(view->name(), "testView1"); EXPECT_EQ(view->permutation()->permutation(), Permutation::Enum::SPO); EXPECT_NE(view->locatedTriplesState(), nullptr); - EXPECT_EQ(view->originalQuery(), simpleWriteQuery_); + EXPECT_THAT(view->originalQuery(), + ::testing::Optional(::testing::Eq(simpleWriteQuery_))); MaterializedViewsManager managerNoBaseName; AD_EXPECT_THROW_WITH_MESSAGE( From 7db97a5fe7328631014df1ef5e2ec78673a01f38 Mon Sep 17 00:00:00 2001 From: ullingerc Date: Wed, 21 Jan 2026 14:01:46 +0100 Subject: [PATCH 21/64] optional query string --- src/engine/MaterializedViewsQueryAnalysis.cpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/engine/MaterializedViewsQueryAnalysis.cpp b/src/engine/MaterializedViewsQueryAnalysis.cpp index 8bbeb3d1ea..13e2a720a0 100644 --- a/src/engine/MaterializedViewsQueryAnalysis.cpp +++ b/src/engine/MaterializedViewsQueryAnalysis.cpp @@ -131,12 +131,16 @@ bool QueryPatternCache::analyzeSimpleChain(ViewPtr view, const SparqlTriple& a, // _____________________________________________________________________________ bool QueryPatternCache::analyzeView(ViewPtr view) { AD_LOG_INFO << view->name() << std::endl; - auto q = view->originalQuery(); + const auto& query = view->originalQuery(); + if (!query.has_value()) { + return false; + } + // We do not need the `EncodedIriManager` because we are only interested in // analyzing the query structure, not in converting its components to // `ValueId`s. EncodedIriManager e; - auto parsed = SparqlParser::parseQuery(&e, q, {}); + auto parsed = SparqlParser::parseQuery(&e, query.value(), {}); // TODO Do we want to report the reason for non-optimizable // queries? From f48eb711ec70b3ffef154b6321434a6a1d91cd47 Mon Sep 17 00:00:00 2001 From: ullingerc Date: Thu, 22 Jan 2026 11:02:02 +0100 Subject: [PATCH 22/64] small improvements --- src/engine/MaterializedViews.cpp | 9 ++++----- src/engine/MaterializedViews.h | 5 ++++- src/engine/MaterializedViewsQueryAnalysis.cpp | 4 ++-- src/engine/MaterializedViewsQueryAnalysis.h | 12 ++++++------ 4 files changed, 16 insertions(+), 14 deletions(-) diff --git a/src/engine/MaterializedViews.cpp b/src/engine/MaterializedViews.cpp index d02a6855af..d9fc7359ee 100644 --- a/src/engine/MaterializedViews.cpp +++ b/src/engine/MaterializedViews.cpp @@ -620,17 +620,16 @@ MaterializedViewsManager::makeSingleChainReplacementIndexScan( }(); // The join between `left` and `right` does not constitute a chain. - if (!userQueryChain.has_value()) { - return nullptr; - } - if (userQueryChain.value().chainInfos_.size() == 0) { + if (!userQueryChain.has_value() || + userQueryChain.value().chainInfos_ == nullptr || + userQueryChain.value().chainInfos_->size() == 0) { return nullptr; } // TODO We should consider all the possible views for the // combination of predicates. They could have different sorting. const auto& [subj, chain, obj, view] = - userQueryChain.value().chainInfos_.at(0); + userQueryChain.value().chainInfos_->at(0); // Ensure the subject is the first column if it is fixed. if (!userQueryChain.value().subject_.isVariable() && diff --git a/src/engine/MaterializedViews.h b/src/engine/MaterializedViews.h index 1ca2f202c5..c1d6584c2a 100644 --- a/src/engine/MaterializedViews.h +++ b/src/engine/MaterializedViews.h @@ -279,7 +279,10 @@ class MaterializedViewsManager { QueryExecutionContext* qec, const parsedQuery::MaterializedViewQuery& viewQuery) const; - // TODO . If no matching -> return nullptr. + // If the given two `IndexScan`s represent the children of a chain join, that + // is `?s ?m . ?m ?o`, check if a loaded materialized view can + // replace the join operation. If no matching view is found, or the join on + // `left` and `right` is not a chain, this function returns `nullptr`. std::shared_ptr makeSingleChainReplacementIndexScan( QueryExecutionContext* qec, std::shared_ptr left, std::shared_ptr right) const; diff --git a/src/engine/MaterializedViewsQueryAnalysis.cpp b/src/engine/MaterializedViewsQueryAnalysis.cpp index 13e2a720a0..bbdd2160f8 100644 --- a/src/engine/MaterializedViewsQueryAnalysis.cpp +++ b/src/engine/MaterializedViewsQueryAnalysis.cpp @@ -121,9 +121,9 @@ bool QueryPatternCache::analyzeSimpleChain(ViewPtr view, const SparqlTriple& a, // Insert chain to cache. ChainedPredicates preds{aPred.value(), bPred.value()}; if (!simpleChainCache_.contains(preds)) { - simpleChainCache_[preds] = {}; + simpleChainCache_[preds] = std::make_shared>(); } - simpleChainCache_[preds].push_back( + simpleChainCache_[preds]->push_back( ChainInfo{std::move(aSubj), std::move(chainVar), std::move(bObj), view}); return true; } diff --git a/src/engine/MaterializedViewsQueryAnalysis.h b/src/engine/MaterializedViewsQueryAnalysis.h index aac5b563c5..4b7bf16389 100644 --- a/src/engine/MaterializedViewsQueryAnalysis.h +++ b/src/engine/MaterializedViewsQueryAnalysis.h @@ -59,11 +59,10 @@ struct BasicGraphPatternsInvariantTo { // Similar to `ChainInfo`, this struct represents a simple chain, however it may // bind the subject. struct UserQueryChain { - TripleComponent subject_; // Allow fixing the subject of the chain + TripleComponent subject_; // Allow fixing the subject of the chain. Variable chain_; Variable object_; - // TODO Switch to `shared_ptr` - const std::vector& chainInfos_; + std::shared_ptr> chainInfos_; }; // Cache data structure for the `MaterializedViewsManager`. This object can be @@ -71,7 +70,8 @@ struct UserQueryChain { // of an existing materialized view. class QueryPatternCache { // Simple chains can be found by direct access into a hash map. - ad_utility::HashMap> + ad_utility::HashMap>> simpleChainCache_; // Cache for predicates appearing in a materialized view. @@ -91,8 +91,8 @@ class QueryPatternCache { private: // Helper for `analyzeView`, that checks for a simple chain. It returns `true` // iff a simple chain `a->b` is present. - // NOTE: This function only checks only one direction, so it should also be - // called with `a` and `b` switched if it returns `false`. + // NOTE: This function only checks one direction, so it should also be called + // with `a` and `b` switched if it returns `false`. bool analyzeSimpleChain(ViewPtr view, const SparqlTriple& a, const SparqlTriple& b); }; From 712e8086d70aaf12611b3cd4419a42beb8da0d5b Mon Sep 17 00:00:00 2001 From: ullingerc Date: Thu, 22 Jan 2026 11:02:44 +0100 Subject: [PATCH 23/64] remove old --- src/engine/IndexScan.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/engine/IndexScan.h b/src/engine/IndexScan.h index 4ad2e213e3..ed3f318a2e 100644 --- a/src/engine/IndexScan.h +++ b/src/engine/IndexScan.h @@ -94,10 +94,6 @@ class IndexScan final : public Operation { std::vector resultSortedOn() const override; - std::array triple() const { - return {subject_, predicate_, object_}; - } - // Set `PrefilterExpression`s and return updated `QueryExecutionTree` pointer // if necessary. std::optional> From d143325c6a52fd70df425158dfa6c82e89defb48 Mon Sep 17 00:00:00 2001 From: ullingerc Date: Thu, 22 Jan 2026 11:18:54 +0100 Subject: [PATCH 24/64] warn unbound vars --- src/engine/MaterializedViews.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/engine/MaterializedViews.cpp b/src/engine/MaterializedViews.cpp index d9fc7359ee..bb6051b8d1 100644 --- a/src/engine/MaterializedViews.cpp +++ b/src/engine/MaterializedViews.cpp @@ -95,7 +95,9 @@ MaterializedViewWriter::getIdTableColumnNamesAndPermutation() const { // Column information for the columns selected by the user's query. auto existingCols = ::ranges::to>( targetVarsAndCols | ql::views::transform([](const auto& opt) { - AD_CONTRACT_CHECK(opt.has_value()); + AD_CONTRACT_CHECK( + opt.has_value(), + "Please ensure that all variables in your SELECT are bound."); return ColumnNameAndIndex{opt.value().variable_, opt.value().columnIndex_}; })); From 48110cd421598bfbd22f8648cd0f10ea17105ab3 Mon Sep 17 00:00:00 2001 From: ullingerc Date: Tue, 27 Jan 2026 15:39:19 +0100 Subject: [PATCH 25/64] lots of work for refactoring the query planning part --- src/engine/MaterializedViews.cpp | 103 ++++++------- src/engine/MaterializedViews.h | 13 +- src/engine/MaterializedViewsQueryAnalysis.cpp | 122 +++++++++++++++- src/engine/MaterializedViewsQueryAnalysis.h | 23 ++- src/engine/QueryPlanner.cpp | 136 +++++++++++------- src/engine/QueryPlanner.h | 24 ++-- src/parser/PropertyPath.cpp | 12 ++ src/parser/PropertyPath.h | 7 + 8 files changed, 315 insertions(+), 125 deletions(-) diff --git a/src/engine/MaterializedViews.cpp b/src/engine/MaterializedViews.cpp index bb6051b8d1..7ab136cb5f 100644 --- a/src/engine/MaterializedViews.cpp +++ b/src/engine/MaterializedViews.cpp @@ -602,56 +602,59 @@ std::shared_ptr MaterializedView::makeIndexScan( } // _____________________________________________________________________________ -std::shared_ptr -MaterializedViewsManager::makeSingleChainReplacementIndexScan( - QueryExecutionContext* qec, std::shared_ptr left, - std::shared_ptr right) const { - auto lock = loadedViews_.rlock(); - auto patternLock = queryPatternCache_.rlock(); - - // Try both possible orderings of the join's children. - auto userQueryChain = - [&]() -> std::optional { - if (auto chain = patternLock->checkSimpleChain(left, right)) { - return chain; - } - if (auto chain = patternLock->checkSimpleChain(right, left)) { - return chain; - } - return std::nullopt; - }(); - - // The join between `left` and `right` does not constitute a chain. - if (!userQueryChain.has_value() || - userQueryChain.value().chainInfos_ == nullptr || - userQueryChain.value().chainInfos_->size() == 0) { - return nullptr; - } - - // TODO We should consider all the possible views for the - // combination of predicates. They could have different sorting. - const auto& [subj, chain, obj, view] = - userQueryChain.value().chainInfos_->at(0); - - // Ensure the subject is the first column if it is fixed. - if (!userQueryChain.value().subject_.isVariable() && - view->variableToColumnMap().at(subj).columnIndex_ != 0) { - AD_LOG_INFO << "We could use the materialized view '" << view->name() - << "' for the join on '" << chain.name() - << "', however the subject is fixed to '" - << userQueryChain.value().subject_.toRdfLiteral() - << "' and column '" << subj.name() - << "' is not the first column of the view." << std::endl; - return nullptr; - } - - // Construct requested columns for scan specification. - parsedQuery::MaterializedViewQuery::RequestedColumns cols{ - {subj, userQueryChain.value().subject_}, - {chain, userQueryChain.value().chain_}, - {obj, userQueryChain.value().object_}}; - return view->makeIndexScan( - qec, parsedQuery::MaterializedViewQuery{view->name(), std::move(cols)}); +std::vector +MaterializedViewsManager::makeJoinReplacementIndexScans( + QueryExecutionContext* qec, + const parsedQuery::BasicGraphPattern& triples) const { + return queryPatternCache_.rlock()->makeJoinReplacementIndexScans(qec, + triples); + /* auto lock = loadedViews_.rlock(); + auto patternLock = queryPatternCache_.rlock(); + + // Try both possible orderings of the join's children. + auto userQueryChain = + [&]() -> std::optional { + if (auto chain = patternLock->checkSimpleChain(left, right)) { + return chain; + } + if (auto chain = patternLock->checkSimpleChain(right, left)) { + return chain; + } + return std::nullopt; + }(); + + // The join between `left` and `right` does not constitute a chain. + if (!userQueryChain.has_value() || + userQueryChain.value().chainInfos_ == nullptr || + userQueryChain.value().chainInfos_->size() == 0) { + return nullptr; + } + + // TODO We should consider all the possible views for the + // combination of predicates. They could have different sorting. + const auto& [subj, chain, obj, view] = + userQueryChain.value().chainInfos_->at(0); + + // Ensure the subject is the first column if it is fixed. + if (!userQueryChain.value().subject_.isVariable() && + view->variableToColumnMap().at(subj).columnIndex_ != 0) { + AD_LOG_INFO << "We could use the materialized view '" << view->name() + << "' for the join on '" << chain.name() + << "', however the subject is fixed to '" + << userQueryChain.value().subject_.toRdfLiteral() + << "' and column '" << subj.name() + << "' is not the first column of the view." << std::endl; + return nullptr; + } + + // Construct requested columns for scan specification. + parsedQuery::MaterializedViewQuery::RequestedColumns cols{ + {subj, userQueryChain.value().subject_}, + {chain, userQueryChain.value().chain_}, + {obj, userQueryChain.value().object_}}; + return view->makeIndexScan( + qec, parsedQuery::MaterializedViewQuery{view->name(), + std::move(cols)});*/ } // _____________________________________________________________________________ diff --git a/src/engine/MaterializedViews.h b/src/engine/MaterializedViews.h index 91314de6a8..00ef23b6fa 100644 --- a/src/engine/MaterializedViews.h +++ b/src/engine/MaterializedViews.h @@ -14,6 +14,7 @@ #include "index/ExternalSortFunctors.h" #include "index/Permutation.h" #include "libqlever/QleverTypes.h" +#include "parser/GraphPatternOperation.h" #include "parser/MaterializedViewQuery.h" #include "parser/ParsedQuery.h" #include "parser/SparqlTriple.h" @@ -279,13 +280,11 @@ class MaterializedViewsManager { QueryExecutionContext* qec, const parsedQuery::MaterializedViewQuery& viewQuery) const; - // If the given two `IndexScan`s represent the children of a chain join, that - // is `?s ?m . ?m ?o`, check if a loaded materialized view can - // replace the join operation. If no matching view is found, or the join on - // `left` and `right` is not a chain, this function returns `nullptr`. - std::shared_ptr makeSingleChainReplacementIndexScan( - QueryExecutionContext* qec, std::shared_ptr left, - std::shared_ptr right) const; + // + std::vector + makeJoinReplacementIndexScans( + QueryExecutionContext* qec, + const parsedQuery::BasicGraphPattern& triples) const; }; #endif // QLEVER_SRC_ENGINE_MATERIALIZEDVIEWS_H_ diff --git a/src/engine/MaterializedViewsQueryAnalysis.cpp b/src/engine/MaterializedViewsQueryAnalysis.cpp index bbdd2160f8..c023aaf108 100644 --- a/src/engine/MaterializedViewsQueryAnalysis.cpp +++ b/src/engine/MaterializedViewsQueryAnalysis.cpp @@ -12,6 +12,7 @@ #include "engine/IndexScan.h" #include "engine/MaterializedViews.h" #include "parser/GraphPatternOperation.h" +#include "parser/PropertyPath.h" #include "parser/SparqlParser.h" namespace materializedViewsQueryAnalysis { @@ -62,9 +63,104 @@ bool BasicGraphPatternsInvariantTo::operator()( } // _____________________________________________________________________________ -std::optional QueryPatternCache::checkSimpleChain( - std::shared_ptr left, std::shared_ptr right) const { - if (!left || !right || !left->predicate().isIri() || +std::vector +QueryPatternCache::makeJoinReplacementIndexScans( + QueryExecutionContext* qec, + const parsedQuery::BasicGraphPattern& triples) const { + // TODO now we have to deal with (a) subset + (b) property path not rewritten + // to join so far + + std::vector result; + + // Find all single chains + + ad_utility::HashMap> chainLeft; + ad_utility::HashMap> chainRight; + + for (const auto& [i, triple] : ::ranges::views::enumerate(triples._triples)) { + if (std::holds_alternative(triple.p_)) { + const auto& path = std::get(triple.p_); + if (path.isIri()) { + const auto& iri = path.getIri().toStringRepresentation(); + // If no view that we know of contains this predicate so we can ignore + // this triple altogether. + if (!predicateInView_.contains(iri)) { + continue; + } + if (triple.s_.isVariable()) { + chainRight[triple.s_.getVariable()].push_back(i); + } + if (triple.o_.isVariable()) { + chainLeft[triple.o_.getVariable()].push_back(i); + } + } else if (path.isSequence()) { + // CHECK THIS - This doesnt seem to occur in practice + AD_THROW("This should not happen"); + /*const auto& seq = path.getSequence(); + if (seq.size() == 2 && seq.at(0).isIri() && seq.at(1).isIri() && + triple.o_.isVariable()) { + const auto& chainIriLeft = + seq.at(0).getIri().toStringRepresentation(); + const auto& chainIriRight = + seq.at(1).getIri().toStringRepresentation(); + // TODO use std::string_view version to avoid copy - `is_transparent` + // hash for hash map + ChainedPredicates key{chainIriLeft, chainIriRight}; + if (auto it = simpleChainCache_.find(key); + it != simpleChainCache_.end()) { + auto& chainInfoPtr = it->second; + for (const auto& chainInfo : *chainInfoPtr) { + result.push_back({makeScanForSingleChain(qec, chainInfo, + triple.s_, std::nullopt, + triple.o_.getVariable()), + {static_cast(i)}}); + } + } + }*/ + } + } + } + + for (const auto& [varLeft, triplesLeft] : chainLeft) { + if (!chainRight.contains(varLeft)) { + continue; + } + for (auto tripleIdxRight : chainRight.at(varLeft)) { + for (auto tripleIdxLeft : triplesLeft) { + const auto& left = triples._triples.at(tripleIdxLeft); + const auto& right = triples._triples.at(tripleIdxRight); + + const auto& leftP = std::get(left.p_); + const auto& rightP = std::get(right.p_); + + if (!leftP.isIri() || !rightP.isIri()) { + continue; + } + + const auto& chainIriLeft = leftP.getIri().toStringRepresentation(); + const auto& chainIriRight = rightP.getIri().toStringRepresentation(); + + // TODO use std::string_view version to avoid copy - `is_transparent` + // hash for hash map + ChainedPredicates key{chainIriLeft, chainIriRight}; + if (auto it = simpleChainCache_.find(key); + it != simpleChainCache_.end()) { + auto& chainInfoPtr = it->second; + for (const auto& chainInfo : *chainInfoPtr) { + result.push_back( + {makeScanForSingleChain(qec, chainInfo, left.s_, std::nullopt, + right.o_.getVariable()), + {tripleIdxLeft, tripleIdxRight}}); + } + } + } + } + } + + // auto temp = triples._triples.at(0); + // auto& x = std::get(temp.p_).getSequence().at(0).isIri(); + + /*if (!left || !right || !left->predicate().isIri() || !right->predicate().isIri()) { return std::nullopt; } @@ -80,8 +176,26 @@ std::optional QueryPatternCache::checkSimpleChain( right->object().getVariable(), simpleChainCache_.at(preds)}; } + } + return std::nullopt; + */ + AD_LOG_INFO << "MV PLANS: " << result.size() << std::endl; + return result; +} + +// _____________________________________________________________________________ +std::shared_ptr QueryPatternCache::makeScanForSingleChain( + QueryExecutionContext* qec, ChainInfo cached, + const TripleComponent& subject, const std::optional& chain, + const Variable& object) const { + const auto& [cSubj, cChain, cObj, view] = cached; + parsedQuery::MaterializedViewQuery::RequestedColumns cols{{cSubj, subject}, + {cObj, object}}; + if (chain.has_value()) { + cols.insert({cChain, chain.value()}); } - return std::nullopt; + return view->makeIndexScan( + qec, parsedQuery::MaterializedViewQuery{view->name(), std::move(cols)}); } // _____________________________________________________________________________ diff --git a/src/engine/MaterializedViewsQueryAnalysis.h b/src/engine/MaterializedViewsQueryAnalysis.h index 4b7bf16389..9f0c1427a7 100644 --- a/src/engine/MaterializedViewsQueryAnalysis.h +++ b/src/engine/MaterializedViewsQueryAnalysis.h @@ -26,6 +26,8 @@ using ViewPtr = std::shared_ptr; // Key and value types of the cache for simple chains, that is queries of the // form `?s ?m . ?m ?o`. using ChainedPredicates = std::pair; +using ChainedPredicatesForLookup = + std::pair; struct ChainInfo { Variable subject_; Variable chain_; @@ -65,6 +67,14 @@ struct UserQueryChain { std::shared_ptr> chainInfos_; }; +// +struct MaterializedViewJoinReplacement { + std::shared_ptr indexScan_; + std::vector coveredTriples_; + + size_t numJoins() const { return coveredTriples_.size() - 1; } +}; + // Cache data structure for the `MaterializedViewsManager`. This object can be // used for quickly looking up if a given query can be optimized by making use // of an existing materialized view. @@ -83,10 +93,15 @@ class QueryPatternCache { // This is called from `MaterializedViewsManager::loadView`. bool analyzeView(ViewPtr view); - // Check if a simple chain on the two `IndexScan`s given can be optimized by - // any loaded materialized views. - std::optional checkSimpleChain( - std::shared_ptr left, std::shared_ptr right) const; + // + std::vector makeJoinReplacementIndexScans( + QueryExecutionContext* qec, + const parsedQuery::BasicGraphPattern& triples) const; + + std::shared_ptr makeScanForSingleChain( + QueryExecutionContext* qec, ChainInfo cached, + const TripleComponent& subject, const std::optional& chain, + const Variable& object) const; private: // Helper for `analyzeView`, that checks for a simple chain. It returns `true` diff --git a/src/engine/QueryPlanner.cpp b/src/engine/QueryPlanner.cpp index fea24fc813..9b99fb965f 100644 --- a/src/engine/QueryPlanner.cpp +++ b/src/engine/QueryPlanner.cpp @@ -1511,8 +1511,9 @@ size_t QueryPlanner::findUniqueNodeIds( ql::views::transform(&SubtreePlan::_idsOfIncludedNodes); // Check that all the `_idsOfIncludedNodes` are one-hot encodings of a single // value, i.e. they have exactly one bit set. - AD_CORRECTNESS_CHECK(ql::ranges::all_of( - nodeIds, [](auto nodeId) { return absl::popcount(nodeId) == 1; })); + // TODO no longer true (for replacement plans) + // AD_CORRECTNESS_CHECK(ql::ranges::all_of( + // nodeIds, [](auto nodeId) { return absl::popcount(nodeId) == 1; })); ql::ranges::copy(nodeIds, std::inserter(uniqueNodeIds, uniqueNodeIds.end())); return uniqueNodeIds.size(); } @@ -1522,7 +1523,8 @@ std::vector QueryPlanner::runDynamicProgrammingOnConnectedComponent( std::vector connectedComponent, const FiltersAndOptionalSubstitutes& filters, - const TextLimitVec& textLimits, const TripleGraph& tg) const { + const TextLimitVec& textLimits, const TripleGraph& tg, + const ReplacementPlans& replacementPlans) const { vector> dpTab; // find the unique number of nodes in the current connected component // (there might be duplicates because we already have multiple candidates @@ -1544,6 +1546,13 @@ QueryPlanner::runDynamicProgrammingOnConnectedComponent( // As we only passed in connected components, we expect the result to always // be nonempty. AD_CORRECTNESS_CHECK(!dpTab[k - 1].empty()); + // If we have replacement plans for this level, we add them now, s.t. the + // next level can make use of them. + if (replacementPlans.size() > k - 1) { + const auto& newPlans = replacementPlans[k - 1]; + dpTab[k - 1].reserve(dpTab[k - 1].size() + newPlans.size()); + dpTab[k - 1].insert(dpTab[k - 1].end(), newPlans.begin(), newPlans.end()); + } } auto& result = dpTab.back(); applyFiltersIfPossible(result, @@ -1628,7 +1637,8 @@ size_t QueryPlanner::countSubgraphs(std::vector graph, std::vector QueryPlanner::runGreedyPlanningOnConnectedComponent( std::vector connectedComponent, const FiltersAndOptionalSubstitutes& filters, - const TextLimitVec& textLimits, const TripleGraph& tg) const { + const TextLimitVec& textLimits, const TripleGraph& tg, + const ReplacementPlans&) const { applyFiltersIfPossible(connectedComponent, filters); applyTextLimitsIfPossible(connectedComponent, textLimits, true); @@ -1731,7 +1741,8 @@ QueryPlanner::FiltersAndOptionalSubstitutes QueryPlanner::seedFilterSubstitutes( // _____________________________________________________________________________ std::vector> QueryPlanner::fillDpTab( const QueryPlanner::TripleGraph& tg, vector filters, - TextLimitMap& textLimits, const vector>& children) { + TextLimitMap& textLimits, const vector>& children, + const ReplacementPlans& replacementPlans) { auto [initialPlans, additionalFilters] = seedWithScansAndText(tg, children, textLimits); ql::ranges::move(additionalFilters, std::back_inserter(filters)); @@ -1755,9 +1766,29 @@ std::vector> QueryPlanner::fillDpTab( TextLimitVec textLimitVec(textLimits.begin(), textLimits.end()); for (auto& component : components | ql::views::values) { std::vector g; + uint64_t coveredNodes = 0; for (const auto& plan : component) { g.push_back(&plan); + coveredNodes |= plan._idsOfIncludedNodes; + } + + // TODO this could be hash-map based if we would return the indices in the + // create helper and pass them as part of replacementPlans + ReplacementPlans applicableReplacementPlans; + // size_t numApplicableReplacementPlans = 0; + for (auto& rPlans : replacementPlans) { + std::vector applicable; + for (auto& plan : rPlans) { + // (a & b) == a -> a is subset of b + if ((plan._idsOfIncludedNodes & coveredNodes) == + plan._idsOfIncludedNodes) { + applicable.push_back(std::move(plan)); + // ++numApplicableReplacementPlans; + } + } + applicableReplacementPlans.push_back(std::move(applicable)); } + const size_t budget = getRuntimeParameter<&RuntimeParameters::queryPlanningBudget_>(); bool useGreedyPlanning = countSubgraphs(g, filters, budget) > budget; @@ -1766,12 +1797,33 @@ std::vector> QueryPlanner::fillDpTab( << "Using the greedy query planner for a large connected component" << std::endl; } + auto impl = useGreedyPlanning ? &QueryPlanner::runGreedyPlanningOnConnectedComponent : &QueryPlanner::runDynamicProgrammingOnConnectedComponent; + // TODO greedy: once with replacementPlans of all levels on level 0 and all + // covered nodes removed ; once without replacementPlans -> use better + // result + if (useGreedyPlanning) { + // Remove covered ones + for (const auto& a : applicableReplacementPlans) { + for (const auto& p : a) { + std::erase_if(component, [&p](const auto& c) { + return (p._idsOfIncludedNodes & c._idsOfIncludedNodes) != 0; + }); + } + } + // Insert replacements. TODO replacements themselves should not contain + // each other + for (const auto& a : applicableReplacementPlans) { + for (const auto& p : a) { + component.push_back(p); + } + } + } lastDpRowFromComponents.push_back( std::invoke(impl, this, std::move(component), filtersAndOptSubstitutes, - textLimitVec, tg)); + textLimitVec, tg, applicableReplacementPlans)); checkCancellation(); } size_t numConnectedComponents = lastDpRowFromComponents.size(); @@ -2252,9 +2304,9 @@ std::vector QueryPlanner::createJoinCandidates( // column, we check for a possible replacement with a materialized view. // TODO Other join types should be checked in // `optimizeCommutatively` - if (auto opt = createMaterializedViewSimpleJoinReplacement(a, b, jcs)) { - candidates.push_back(std::move(opt.value())); - } + // if (auto opt = createMaterializedViewSimpleJoinReplacement(a, b, jcs)) { + // candidates.push_back(std::move(opt.value())); + // } // "NORMAL" CASE: // The join class takes care of sorting the subtrees if necessary @@ -2535,40 +2587,27 @@ auto QueryPlanner::createJoinWithTransitivePath(const SubtreePlan& a, } // _____________________________________________________________________________ -auto QueryPlanner::createMaterializedViewSimpleJoinReplacement( - const SubtreePlan& a, const SubtreePlan& b, const JoinColumns& jcs) const - -> std::optional { - if (jcs.size() != 1) { - return std::nullopt; +auto QueryPlanner::createMaterializedViewJoinReplacements( + const parsedQuery::BasicGraphPattern& triples) const -> ReplacementPlans { + ReplacementPlans plans; + auto scans = _qec->materializedViewsManager().makeJoinReplacementIndexScans( + _qec, triples); + plans.reserve(triples._triples.size()); + + for (const auto& [scan, coveredTriples] : scans) { + auto plan = makeSubtreePlan(scan); + // This is equivalent to a join between the covered triples, so we must mark + // all included nodes. + for (auto tripleIdx : coveredTriples) { + plan._idsOfIncludedNodes |= (1ULL << tripleIdx); + } + // Empty vectors of replacement plans for smaller numbers of triples. + for (size_t i = plans.size(); i < coveredTriples.size(); ++i) { + plans.push_back({}); + } + plans.at(coveredTriples.size() - 1).push_back(std::move(plan)); } - - // Both children must be index scans for the simple materialized view rewrite. - auto isIndexScan = [](const auto& tree) -> std::shared_ptr { - return std::dynamic_pointer_cast(tree._qet->getRootOperation()); - }; - auto aScan = isIndexScan(a); - if (!aScan) { - return std::nullopt; - } - auto bScan = isIndexScan(b); - if (!bScan) { - return std::nullopt; - } - - // Try to construct an alternative join plan using the - // `MaterializedViewsManager`. So far only single chain joins (that is - // `?s / ?o` or `?s ?m . ?m ?o`) are supported. - auto replacement = - _qec->materializedViewsManager().makeSingleChainReplacementIndexScan( - _qec, aScan, bScan); - if (replacement != nullptr) { - auto plan = makeSubtreePlan(replacement); - // This is equivalent to a join between both plans, so we must mark all - // included nodes. - mergeSubtreePlanIds(plan, a, b); - return plan; - } - return std::nullopt; + return plans; } // ______________________________________________________________________________________ @@ -3349,13 +3388,14 @@ void QueryPlanner::GraphPatternPlanner::visitSubquery( // _______________________________________________________________ void QueryPlanner::GraphPatternPlanner::optimizeCommutatively() { + auto replacementPlans = + planner_.createMaterializedViewJoinReplacements(candidateTriples_); auto tg = planner_.createTripleGraph(&candidateTriples_); - // TODO Further optimizations for replacing more complex joins with - // materialized view scans. - auto lastRow = planner_ - .fillDpTab(tg, rootPattern_->_filters, - rootPattern_->textLimits_, candidatePlans_) - .back(); + auto lastRow = + planner_ + .fillDpTab(tg, rootPattern_->_filters, rootPattern_->textLimits_, + candidatePlans_, replacementPlans) + .back(); candidateTriples_._triples.clear(); candidatePlans_.clear(); candidatePlans_.push_back(std::move(lastRow)); diff --git a/src/engine/QueryPlanner.h b/src/engine/QueryPlanner.h index 5aae7d5d13..7c8c68f65a 100644 --- a/src/engine/QueryPlanner.h +++ b/src/engine/QueryPlanner.h @@ -450,15 +450,12 @@ class QueryPlanner { const SubtreePlan& b, const JoinColumns& jcs); - // Helper that checks if `a` and `b` are `IndexScan` plans and the - // `MaterializedViewManager` has a replacement for a simple join between them. - // Returns an `IndexScan` plan on the respective materialized view, if - // available. - // TODO Add a second version which detects more complex joins from - // a `BasicGraphPattern` (and plans the non-optimized parts with another - // `GraphPatternPlanner`). - std::optional createMaterializedViewSimpleJoinReplacement( - const SubtreePlan& a, const SubtreePlan& b, const JoinColumns& jcs) const; + // + using ReplacementPlans = std::vector>; + + // Helper that ... + ReplacementPlans createMaterializedViewJoinReplacements( + const parsedQuery::BasicGraphPattern& triples) const; vector getOrderByRow( const ParsedQuery& pq, @@ -585,7 +582,8 @@ class QueryPlanner { */ vector> fillDpTab( const TripleGraph& graph, std::vector fs, - TextLimitMap& textLimits, const vector>& children); + TextLimitMap& textLimits, const vector>& children, + const ReplacementPlans& replacementPlans); // Internal subroutine of `fillDpTab` that only works on a single connected // component of the input. Throws if the subtrees in the `connectedComponent` @@ -594,7 +592,8 @@ class QueryPlanner { runDynamicProgrammingOnConnectedComponent( std::vector connectedComponent, const FiltersAndOptionalSubstitutes& filters, - const TextLimitVec& textLimits, const TripleGraph& tg) const; + const TextLimitVec& textLimits, const TripleGraph& tg, + const ReplacementPlans& replacementPlans) const; // Same as `runDynamicProgrammingOnConnectedComponent`, but uses a greedy // algorithm that always greedily chooses the smallest result of the possible @@ -602,7 +601,8 @@ class QueryPlanner { std::vector runGreedyPlanningOnConnectedComponent( std::vector connectedComponent, const FiltersAndOptionalSubstitutes& filters, - const TextLimitVec& textLimits, const TripleGraph& tg) const; + const TextLimitVec& textLimits, const TripleGraph& tg, + const ReplacementPlans& replacementPlans) const; // Return the number of connected subgraphs is the `graph`, or `budget + 1`, // if the number of subgraphs is `> budget`. This is used to analyze the diff --git a/src/parser/PropertyPath.cpp b/src/parser/PropertyPath.cpp index cccfad61fe..b405a70ccc 100644 --- a/src/parser/PropertyPath.cpp +++ b/src/parser/PropertyPath.cpp @@ -130,6 +130,18 @@ bool PropertyPath::isIri() const { return std::holds_alternative(path_); } +// _____________________________________________________________________________ +const std::vector& PropertyPath::getSequence() const { + AD_CONTRACT_CHECK(isSequence()); + return std::get(path_).children_; +} + +// _____________________________________________________________________________ +bool PropertyPath::isSequence() const { + return std::holds_alternative(path_) && + std::get(path_).modifier_ == Modifier::SEQUENCE; +} + // _____________________________________________________________________________ std::optional> PropertyPath::getChildOfInvertedPath() const { diff --git a/src/parser/PropertyPath.h b/src/parser/PropertyPath.h index 08fa199863..0daab58efe 100644 --- a/src/parser/PropertyPath.h +++ b/src/parser/PropertyPath.h @@ -134,6 +134,13 @@ class PropertyPath { // otherwise. bool isIri() const; + // If the path is a sequence, return the children (that is, the parts of the + // sequence). If the path is not a sequence this will throw. + const std::vector& getSequence() const; + + // Check if the path is a sequence. + bool isSequence() const; + // If the path is a modified path with an inverse modifier, return the pointer // to its only child. Otherwise, return nullptr. std::optional> From 7798db85034b53b18db81e9631241de7a7f31e8c Mon Sep 17 00:00:00 2001 From: ullingerc Date: Tue, 27 Jan 2026 15:43:05 +0100 Subject: [PATCH 26/64] bug fix --- src/engine/MaterializedViewsQueryAnalysis.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/engine/MaterializedViewsQueryAnalysis.cpp b/src/engine/MaterializedViewsQueryAnalysis.cpp index c023aaf108..4e9e6edfb1 100644 --- a/src/engine/MaterializedViewsQueryAnalysis.cpp +++ b/src/engine/MaterializedViewsQueryAnalysis.cpp @@ -148,7 +148,7 @@ QueryPatternCache::makeJoinReplacementIndexScans( auto& chainInfoPtr = it->second; for (const auto& chainInfo : *chainInfoPtr) { result.push_back( - {makeScanForSingleChain(qec, chainInfo, left.s_, std::nullopt, + {makeScanForSingleChain(qec, chainInfo, left.s_, varLeft, right.o_.getVariable()), {tripleIdxLeft, tripleIdxRight}}); } From a2f9c3f99a95c2feb8517cdbd0ebfb00626b6821 Mon Sep 17 00:00:00 2001 From: ullingerc Date: Tue, 27 Jan 2026 16:37:23 +0100 Subject: [PATCH 27/64] improvements + clean up --- src/engine/MaterializedViewsQueryAnalysis.cpp | 61 +++++++------------ src/engine/MaterializedViewsQueryAnalysis.h | 55 ++++++++++++++--- src/engine/QueryPlanner.cpp | 14 ++--- src/engine/QueryPlanner.h | 9 ++- 4 files changed, 80 insertions(+), 59 deletions(-) diff --git a/src/engine/MaterializedViewsQueryAnalysis.cpp b/src/engine/MaterializedViewsQueryAnalysis.cpp index 4e9e6edfb1..699df71e00 100644 --- a/src/engine/MaterializedViewsQueryAnalysis.cpp +++ b/src/engine/MaterializedViewsQueryAnalysis.cpp @@ -67,14 +67,14 @@ std::vector QueryPatternCache::makeJoinReplacementIndexScans( QueryExecutionContext* qec, const parsedQuery::BasicGraphPattern& triples) const { - // TODO now we have to deal with (a) subset + (b) property path not rewritten - // to join so far - std::vector result; - // Find all single chains - + // All triples of the form `anything ?variable` where `` is covered + // by a materialized view, stored by `?variable` for finding chains. ad_utility::HashMap> chainLeft; + // All triples of the form `?variable ?otherVariable` where `` is + // covered by a materialized view, where `?variable` is different from + // `?otherVariable`, stored by `?variable` for finding chains. ad_utility::HashMap> chainRight; for (const auto& [i, triple] : ::ranges::views::enumerate(triples._triples)) { @@ -87,14 +87,16 @@ QueryPatternCache::makeJoinReplacementIndexScans( if (!predicateInView_.contains(iri)) { continue; } - if (triple.s_.isVariable()) { + if (triple.s_.isVariable() && triple.o_.isVariable()) { + // This triple could be the right side of a chain join. chainRight[triple.s_.getVariable()].push_back(i); } - if (triple.o_.isVariable()) { + if (triple.o_.isVariable() && triple.s_ != triple.o_) { + // This triple could be the left side of a chain join. chainLeft[triple.o_.getVariable()].push_back(i); } } else if (path.isSequence()) { - // CHECK THIS - This doesnt seem to occur in practice + // CHECK THIS - This doesn't seem to occur in practice AD_THROW("This should not happen"); /*const auto& seq = path.getSequence(); if (seq.size() == 2 && seq.at(0).isIri() && seq.at(1).isIri() && @@ -140,9 +142,8 @@ QueryPatternCache::makeJoinReplacementIndexScans( const auto& chainIriLeft = leftP.getIri().toStringRepresentation(); const auto& chainIriRight = rightP.getIri().toStringRepresentation(); - // TODO use std::string_view version to avoid copy - `is_transparent` - // hash for hash map - ChainedPredicates key{chainIriLeft, chainIriRight}; + // + ChainedPredicatesForLookup key{chainIriLeft, chainIriRight}; if (auto it = simpleChainCache_.find(key); it != simpleChainCache_.end()) { auto& chainInfoPtr = it->second; @@ -157,42 +158,21 @@ QueryPatternCache::makeJoinReplacementIndexScans( } } - // auto temp = triples._triples.at(0); - // auto& x = std::get(temp.p_).getSequence().at(0).isIri(); - - /*if (!left || !right || !left->predicate().isIri() || - !right->predicate().isIri()) { - return std::nullopt; - } - if (left->object() == right->subject() && - left->subject() != right->object() && left->subject() != left->object() && - right->subject() != right->object() && left->object().isVariable() && - right->object().isVariable()) { - materializedViewsQueryAnalysis::ChainedPredicates preds{ - left->predicate().getIri().toStringRepresentation(), - right->predicate().getIri().toStringRepresentation()}; - if (simpleChainCache_.contains(preds)) { - return UserQueryChain{left->subject(), left->object().getVariable(), - right->object().getVariable(), - simpleChainCache_.at(preds)}; - } - } - return std::nullopt; - */ AD_LOG_INFO << "MV PLANS: " << result.size() << std::endl; return result; } // _____________________________________________________________________________ std::shared_ptr QueryPatternCache::makeScanForSingleChain( - QueryExecutionContext* qec, ChainInfo cached, - const TripleComponent& subject, const std::optional& chain, - const Variable& object) const { - const auto& [cSubj, cChain, cObj, view] = cached; - parsedQuery::MaterializedViewQuery::RequestedColumns cols{{cSubj, subject}, - {cObj, object}}; + QueryExecutionContext* qec, ChainInfo cached, TripleComponent subject, + std::optional chain, Variable object) const { + auto& [cSubj, cChain, cObj, view] = cached; + parsedQuery::MaterializedViewQuery::RequestedColumns cols{ + {std::move(cSubj), std::move(subject)}, + {std::move(cObj), std::move(object)}, + }; if (chain.has_value()) { - cols.insert({cChain, chain.value()}); + cols.insert({std::move(cChain), std::move(chain.value())}); } return view->makeIndexScan( qec, parsedQuery::MaterializedViewQuery{view->name(), std::move(cols)}); @@ -233,6 +213,7 @@ bool QueryPatternCache::analyzeSimpleChain(ViewPtr view, const SparqlTriple& a, auto bObj = b.o_.getVariable(); // Insert chain to cache. + // TODO avoid hashing 3x ChainedPredicates preds{aPred.value(), bPred.value()}; if (!simpleChainCache_.contains(preds)) { simpleChainCache_[preds] = std::make_shared>(); diff --git a/src/engine/MaterializedViewsQueryAnalysis.h b/src/engine/MaterializedViewsQueryAnalysis.h index 9f0c1427a7..6c6875177c 100644 --- a/src/engine/MaterializedViewsQueryAnalysis.h +++ b/src/engine/MaterializedViewsQueryAnalysis.h @@ -18,6 +18,46 @@ class MaterializedView; class IndexScan; +// _____________________________________________________________________________ +namespace ad_utility::detail { + +using StringPair = std::pair; +using StringViewPair = std::pair; + +// _____________________________________________________________________________ +struct StringPairHash { + // Allows looking up values from a hash map with `StringPair` keys also with + // `StringViewPair`. + using is_transparent = void; + + size_t operator()(const StringPair& p) const { + return absl::HashOf(p.first, p.second); + } + + size_t operator()(const StringViewPair& p) const { + return absl::HashOf(p.first, p.second); + } +}; + +// _____________________________________________________________________________ +struct StringPairEq { + using is_transparent = void; + + bool operator()(const StringPair& a, const StringPair& b) const { + return a == b; + } + + bool operator()(const StringPair& a, const StringViewPair& b) const { + return a.first == b.first && a.second == b.second; + } + + bool operator()(const StringViewPair& a, const StringPair& b) const { + return b.first == a.first && b.second == a.second; + } +}; + +} // namespace ad_utility::detail + // _____________________________________________________________________________ namespace materializedViewsQueryAnalysis { @@ -25,9 +65,8 @@ using ViewPtr = std::shared_ptr; // Key and value types of the cache for simple chains, that is queries of the // form `?s ?m . ?m ?o`. -using ChainedPredicates = std::pair; -using ChainedPredicatesForLookup = - std::pair; +using ChainedPredicates = ad_utility::detail::StringPair; +using ChainedPredicatesForLookup = ad_utility::detail::StringViewPair; struct ChainInfo { Variable subject_; Variable chain_; @@ -80,8 +119,9 @@ struct MaterializedViewJoinReplacement { // of an existing materialized view. class QueryPatternCache { // Simple chains can be found by direct access into a hash map. - ad_utility::HashMap>> + ad_utility::HashMap< + ChainedPredicates, std::shared_ptr>, + ad_utility::detail::StringPairHash, ad_utility::detail::StringPairEq> simpleChainCache_; // Cache for predicates appearing in a materialized view. @@ -99,9 +139,8 @@ class QueryPatternCache { const parsedQuery::BasicGraphPattern& triples) const; std::shared_ptr makeScanForSingleChain( - QueryExecutionContext* qec, ChainInfo cached, - const TripleComponent& subject, const std::optional& chain, - const Variable& object) const; + QueryExecutionContext* qec, ChainInfo cached, TripleComponent subject, + std::optional chain, Variable object) const; private: // Helper for `analyzeView`, that checks for a simple chain. It returns `true` diff --git a/src/engine/QueryPlanner.cpp b/src/engine/QueryPlanner.cpp index 9b99fb965f..46c7c2171c 100644 --- a/src/engine/QueryPlanner.cpp +++ b/src/engine/QueryPlanner.cpp @@ -2300,14 +2300,6 @@ std::vector QueryPlanner::createJoinCandidates( candidates.push_back(std::move(plan)); } - // If the given plans are two index scans and we would join them on a single - // column, we check for a possible replacement with a materialized view. - // TODO Other join types should be checked in - // `optimizeCommutatively` - // if (auto opt = createMaterializedViewSimpleJoinReplacement(a, b, jcs)) { - // candidates.push_back(std::move(opt.value())); - // } - // "NORMAL" CASE: // The join class takes care of sorting the subtrees if necessary SubtreePlan plan = @@ -2590,10 +2582,16 @@ auto QueryPlanner::createJoinWithTransitivePath(const SubtreePlan& a, auto QueryPlanner::createMaterializedViewJoinReplacements( const parsedQuery::BasicGraphPattern& triples) const -> ReplacementPlans { ReplacementPlans plans; + // The `MaterializedViewsManager` provides `IndexScan` instances for all the + // subsets of `triples` it can rewrite. The individual results do not cover + // all items of `triples`, instead each has a vector of triple indices it + // covers. auto scans = _qec->materializedViewsManager().makeJoinReplacementIndexScans( _qec, triples); plans.reserve(triples._triples.size()); + // Convert all the `IndexScan`s to `SubtreePlan`s with the appropriate ids + // set. for (const auto& [scan, coveredTriples] : scans) { auto plan = makeSubtreePlan(scan); // This is equivalent to a join between the covered triples, so we must mark diff --git a/src/engine/QueryPlanner.h b/src/engine/QueryPlanner.h index 7c8c68f65a..8c9595493f 100644 --- a/src/engine/QueryPlanner.h +++ b/src/engine/QueryPlanner.h @@ -450,10 +450,13 @@ class QueryPlanner { const SubtreePlan& b, const JoinColumns& jcs); - // + // Helper that generates `IndexScan` query plans on materialized views if they + // can be used to avoid joins between some of the `triples`. The resulting + // plans for part of the `triples` are given in a vector of query planning + // rounds in which they should be added to the planner. For example, at index + // 1 there is a vector of query plans that should be added in round 1 of the + // dynamic programming algorithm. using ReplacementPlans = std::vector>; - - // Helper that ... ReplacementPlans createMaterializedViewJoinReplacements( const parsedQuery::BasicGraphPattern& triples) const; From babaa7bca6ad19cf88267d019dc457e29daac6ec Mon Sep 17 00:00:00 2001 From: ullingerc Date: Tue, 27 Jan 2026 17:01:50 +0100 Subject: [PATCH 28/64] more work --- src/engine/MaterializedViews.cpp | 49 +------------ src/engine/MaterializedViews.h | 11 ++- src/engine/MaterializedViewsQueryAnalysis.cpp | 72 ++++++++----------- src/engine/QueryPlanner.cpp | 21 +++--- src/engine/QueryPlanner.h | 3 +- 5 files changed, 55 insertions(+), 101 deletions(-) diff --git a/src/engine/MaterializedViews.cpp b/src/engine/MaterializedViews.cpp index 7ab136cb5f..a93edf87ce 100644 --- a/src/engine/MaterializedViews.cpp +++ b/src/engine/MaterializedViews.cpp @@ -602,59 +602,12 @@ std::shared_ptr MaterializedView::makeIndexScan( } // _____________________________________________________________________________ -std::vector +std::vector MaterializedViewsManager::makeJoinReplacementIndexScans( QueryExecutionContext* qec, const parsedQuery::BasicGraphPattern& triples) const { return queryPatternCache_.rlock()->makeJoinReplacementIndexScans(qec, triples); - /* auto lock = loadedViews_.rlock(); - auto patternLock = queryPatternCache_.rlock(); - - // Try both possible orderings of the join's children. - auto userQueryChain = - [&]() -> std::optional { - if (auto chain = patternLock->checkSimpleChain(left, right)) { - return chain; - } - if (auto chain = patternLock->checkSimpleChain(right, left)) { - return chain; - } - return std::nullopt; - }(); - - // The join between `left` and `right` does not constitute a chain. - if (!userQueryChain.has_value() || - userQueryChain.value().chainInfos_ == nullptr || - userQueryChain.value().chainInfos_->size() == 0) { - return nullptr; - } - - // TODO We should consider all the possible views for the - // combination of predicates. They could have different sorting. - const auto& [subj, chain, obj, view] = - userQueryChain.value().chainInfos_->at(0); - - // Ensure the subject is the first column if it is fixed. - if (!userQueryChain.value().subject_.isVariable() && - view->variableToColumnMap().at(subj).columnIndex_ != 0) { - AD_LOG_INFO << "We could use the materialized view '" << view->name() - << "' for the join on '" << chain.name() - << "', however the subject is fixed to '" - << userQueryChain.value().subject_.toRdfLiteral() - << "' and column '" << subj.name() - << "' is not the first column of the view." << std::endl; - return nullptr; - } - - // Construct requested columns for scan specification. - parsedQuery::MaterializedViewQuery::RequestedColumns cols{ - {subj, userQueryChain.value().subject_}, - {chain, userQueryChain.value().chain_}, - {obj, userQueryChain.value().object_}}; - return view->makeIndexScan( - qec, parsedQuery::MaterializedViewQuery{view->name(), - std::move(cols)});*/ } // _____________________________________________________________________________ diff --git a/src/engine/MaterializedViews.h b/src/engine/MaterializedViews.h index 00ef23b6fa..10a502300f 100644 --- a/src/engine/MaterializedViews.h +++ b/src/engine/MaterializedViews.h @@ -241,6 +241,9 @@ class MaterializedView { const parsedQuery::MaterializedViewQuery& viewQuery) const; }; +// Shorthand for query rewriting helper class. +using materializedViewsQueryAnalysis::MaterializedViewJoinReplacement; + // The `MaterializedViewsManager` is part of the `QueryExecutionContext` and is // used to manage the currently loaded `MaterializedViews` in a `Server` or // `Qlever` instance. @@ -280,9 +283,11 @@ class MaterializedViewsManager { QueryExecutionContext* qec, const parsedQuery::MaterializedViewQuery& viewQuery) const; - // - std::vector - makeJoinReplacementIndexScans( + // Given a set of triples, check if some join operations that would be + // required when evaluating them can be replaced by scans on materialized + // views that are currently loaded. This is implemented using the + // `queryPatternCache_`. + std::vector makeJoinReplacementIndexScans( QueryExecutionContext* qec, const parsedQuery::BasicGraphPattern& triples) const; }; diff --git a/src/engine/MaterializedViewsQueryAnalysis.cpp b/src/engine/MaterializedViewsQueryAnalysis.cpp index 699df71e00..ad8fc423ae 100644 --- a/src/engine/MaterializedViewsQueryAnalysis.cpp +++ b/src/engine/MaterializedViewsQueryAnalysis.cpp @@ -72,12 +72,16 @@ QueryPatternCache::makeJoinReplacementIndexScans( // All triples of the form `anything ?variable` where `` is covered // by a materialized view, stored by `?variable` for finding chains. ad_utility::HashMap> chainLeft; + // All triples of the form `?variable ?otherVariable` where `` is // covered by a materialized view, where `?variable` is different from // `?otherVariable`, stored by `?variable` for finding chains. ad_utility::HashMap> chainRight; - for (const auto& [i, triple] : ::ranges::views::enumerate(triples._triples)) { + // TODO Optimizations for stars. + + for (const auto& [tripleIdx, triple] : + ::ranges::views::enumerate(triples._triples)) { if (std::holds_alternative(triple.p_)) { const auto& path = std::get(triple.p_); if (path.isIri()) { @@ -89,49 +93,37 @@ QueryPatternCache::makeJoinReplacementIndexScans( } if (triple.s_.isVariable() && triple.o_.isVariable()) { // This triple could be the right side of a chain join. - chainRight[triple.s_.getVariable()].push_back(i); + chainRight[triple.s_.getVariable()].push_back(tripleIdx); } if (triple.o_.isVariable() && triple.s_ != triple.o_) { // This triple could be the left side of a chain join. - chainLeft[triple.o_.getVariable()].push_back(i); + chainLeft[triple.o_.getVariable()].push_back(tripleIdx); } } else if (path.isSequence()) { - // CHECK THIS - This doesn't seem to occur in practice - AD_THROW("This should not happen"); - /*const auto& seq = path.getSequence(); - if (seq.size() == 2 && seq.at(0).isIri() && seq.at(1).isIri() && - triple.o_.isVariable()) { - const auto& chainIriLeft = - seq.at(0).getIri().toStringRepresentation(); - const auto& chainIriRight = - seq.at(1).getIri().toStringRepresentation(); - // TODO use std::string_view version to avoid copy - `is_transparent` - // hash for hash map - ChainedPredicates key{chainIriLeft, chainIriRight}; - if (auto it = simpleChainCache_.find(key); - it != simpleChainCache_.end()) { - auto& chainInfoPtr = it->second; - for (const auto& chainInfo : *chainInfoPtr) { - result.push_back({makeScanForSingleChain(qec, chainInfo, - triple.s_, std::nullopt, - triple.o_.getVariable()), - {static_cast(i)}}); - } - } - }*/ + AD_THROW( + "Sequence property paths are expected to be replaced by joins " + "during earlier stages of query planning."); } } } + // Using the information collected by the pass over all triples, assemble all + // chains that can potentially be rewritten. for (const auto& [varLeft, triplesLeft] : chainLeft) { + // No triples for the right side on the same variable have been collected. if (!chainRight.contains(varLeft)) { continue; } + + // Iterate over all chains present and check if they can be rewritten to a + // view scan. for (auto tripleIdxRight : chainRight.at(varLeft)) { for (auto tripleIdxLeft : triplesLeft) { const auto& left = triples._triples.at(tripleIdxLeft); const auto& right = triples._triples.at(tripleIdxRight); + // We have already checked that this holds the correct alternative + // above. const auto& leftP = std::get(left.p_); const auto& rightP = std::get(right.p_); @@ -139,15 +131,18 @@ QueryPatternCache::makeJoinReplacementIndexScans( continue; } - const auto& chainIriLeft = leftP.getIri().toStringRepresentation(); - const auto& chainIriRight = rightP.getIri().toStringRepresentation(); + // Lookup key based on `std::string_view` avoids copying the IRIs. + ChainedPredicatesForLookup key{ + leftP.getIri().toStringRepresentation(), + rightP.getIri().toStringRepresentation()}; - // - ChainedPredicatesForLookup key{chainIriLeft, chainIriRight}; + // Lookup if there are matching views. There could potentially be + // multiple (e.g. with different sorting). if (auto it = simpleChainCache_.find(key); it != simpleChainCache_.end()) { - auto& chainInfoPtr = it->second; - for (const auto& chainInfo : *chainInfoPtr) { + for (const auto& chainInfo : *(it->second)) { + // We have found a materialized view for this chain. Construct an + // `IndexScan`. result.push_back( {makeScanForSingleChain(qec, chainInfo, left.s_, varLeft, right.o_.getVariable()), @@ -158,7 +153,6 @@ QueryPatternCache::makeJoinReplacementIndexScans( } } - AD_LOG_INFO << "MV PLANS: " << result.size() << std::endl; return result; } @@ -166,13 +160,13 @@ QueryPatternCache::makeJoinReplacementIndexScans( std::shared_ptr QueryPatternCache::makeScanForSingleChain( QueryExecutionContext* qec, ChainInfo cached, TripleComponent subject, std::optional chain, Variable object) const { - auto& [cSubj, cChain, cObj, view] = cached; + auto& [cSubject, cChainVar, cObject, view] = cached; parsedQuery::MaterializedViewQuery::RequestedColumns cols{ - {std::move(cSubj), std::move(subject)}, - {std::move(cObj), std::move(object)}, + {std::move(cSubject), std::move(subject)}, + {std::move(cObject), std::move(object)}, }; if (chain.has_value()) { - cols.insert({std::move(cChain), std::move(chain.value())}); + cols.insert({std::move(cChainVar), std::move(chain.value())}); } return view->makeIndexScan( qec, parsedQuery::MaterializedViewQuery{view->name(), std::move(cols)}); @@ -225,7 +219,6 @@ bool QueryPatternCache::analyzeSimpleChain(ViewPtr view, const SparqlTriple& a, // _____________________________________________________________________________ bool QueryPatternCache::analyzeView(ViewPtr view) { - AD_LOG_INFO << view->name() << std::endl; const auto& query = view->originalQuery(); if (!query.has_value()) { return false; @@ -285,9 +278,6 @@ bool QueryPatternCache::analyzeView(ViewPtr view) { for (const auto& triple : triples) { auto predicate = triple.getSimplePredicate(); if (predicate.has_value()) { - if (!predicateInView_.contains(predicate.value())) { - predicateInView_[predicate.value()] = {}; - } predicateInView_[predicate.value()].push_back(view); } } diff --git a/src/engine/QueryPlanner.cpp b/src/engine/QueryPlanner.cpp index 46c7c2171c..2c27bd3a7e 100644 --- a/src/engine/QueryPlanner.cpp +++ b/src/engine/QueryPlanner.cpp @@ -1505,15 +1505,18 @@ void QueryPlanner::applyTextLimitsIfPossible(vector& row, // _____________________________________________________________________________ size_t QueryPlanner::findUniqueNodeIds( - const std::vector& connectedComponent) { + const std::vector& connectedComponent, + bool allowReplacementPlans) { ad_utility::HashSet uniqueNodeIds; auto nodeIds = connectedComponent | ql::views::transform(&SubtreePlan::_idsOfIncludedNodes); // Check that all the `_idsOfIncludedNodes` are one-hot encodings of a single - // value, i.e. they have exactly one bit set. - // TODO no longer true (for replacement plans) - // AD_CORRECTNESS_CHECK(ql::ranges::all_of( - // nodeIds, [](auto nodeId) { return absl::popcount(nodeId) == 1; })); + // value, i.e. they have exactly one bit set. If replacement plans are allowed + // this constraint is not applicable. + AD_CORRECTNESS_CHECK(allowReplacementPlans || + ql::ranges::all_of(nodeIds, [](auto nodeId) { + return absl::popcount(nodeId) == 1; + })); ql::ranges::copy(nodeIds, std::inserter(uniqueNodeIds, uniqueNodeIds.end())); return uniqueNodeIds.size(); } @@ -1530,7 +1533,7 @@ QueryPlanner::runDynamicProgrammingOnConnectedComponent( // (there might be duplicates because we already have multiple candidates // for each index scan with different permutations. dpTab.push_back(std::move(connectedComponent)); - size_t numSeeds = findUniqueNodeIds(dpTab.back()); + size_t numSeeds = findUniqueNodeIds(dpTab.back(), false); for (size_t k = 2; k <= numSeeds; ++k) { AD_LOG_TRACE << "Producing plans that unite " << k << " triples." @@ -1638,11 +1641,12 @@ std::vector QueryPlanner::runGreedyPlanningOnConnectedComponent( std::vector connectedComponent, const FiltersAndOptionalSubstitutes& filters, const TextLimitVec& textLimits, const TripleGraph& tg, - const ReplacementPlans&) const { + const ReplacementPlans& replacementPlans) const { applyFiltersIfPossible(connectedComponent, filters); applyTextLimitsIfPossible(connectedComponent, textLimits, true); - const size_t numSeeds = findUniqueNodeIds(connectedComponent); + const size_t numSeeds = + findUniqueNodeIds(connectedComponent, !replacementPlans.empty()); if (numSeeds <= 1) { // Only 0 or 1 nodes in the input, nothing to plan. return connectedComponent; @@ -3386,6 +3390,7 @@ void QueryPlanner::GraphPatternPlanner::visitSubquery( // _______________________________________________________________ void QueryPlanner::GraphPatternPlanner::optimizeCommutatively() { + // TODO Add runtime parameter to disable join rewriting. auto replacementPlans = planner_.createMaterializedViewJoinReplacements(candidateTriples_); auto tg = planner_.createTripleGraph(&candidateTriples_); diff --git a/src/engine/QueryPlanner.h b/src/engine/QueryPlanner.h index 8c9595493f..b77a4c3c72 100644 --- a/src/engine/QueryPlanner.h +++ b/src/engine/QueryPlanner.h @@ -734,7 +734,8 @@ class QueryPlanner { static size_t findSmallestExecutionTree( const std::vector& lastRow); static size_t findUniqueNodeIds( - const std::vector& connectedComponent); + const std::vector& connectedComponent, + bool allowReplacementPlans = false); /// if this Planner is not associated with a queryExecutionContext we are only /// in the unit test mode From 3ee6103cadb3a0b0614c4f357a2802e8a44cad21 Mon Sep 17 00:00:00 2001 From: ullingerc Date: Tue, 27 Jan 2026 17:41:00 +0100 Subject: [PATCH 29/64] lots of stuff --- src/engine/MaterializedViewsQueryAnalysis.cpp | 45 --------- src/engine/MaterializedViewsQueryAnalysis.h | 91 +++---------------- src/engine/QueryPlanner.cpp | 12 +-- src/parser/CMakeLists.txt | 1 + src/parser/GraphPatternAnalysis.cpp | 33 +++++++ src/parser/GraphPatternAnalysis.h | 45 +++++++++ src/parser/GraphPatternOperation.cpp | 30 ++++++ src/parser/GraphPatternOperation.h | 12 +++ src/util/StringPairHashMap.h | 68 ++++++++++++++ 9 files changed, 202 insertions(+), 135 deletions(-) create mode 100644 src/parser/GraphPatternAnalysis.cpp create mode 100644 src/parser/GraphPatternAnalysis.h create mode 100644 src/util/StringPairHashMap.h diff --git a/src/engine/MaterializedViewsQueryAnalysis.cpp b/src/engine/MaterializedViewsQueryAnalysis.cpp index ad8fc423ae..d7ee78d7c4 100644 --- a/src/engine/MaterializedViewsQueryAnalysis.cpp +++ b/src/engine/MaterializedViewsQueryAnalysis.cpp @@ -17,51 +17,6 @@ namespace materializedViewsQueryAnalysis { -// _____________________________________________________________________________ -ad_utility::HashSet getVariablesPresentInBasicGraphPatterns( - const std::vector& graphPatterns) { - ad_utility::HashSet vars; - for (const auto& graphPattern : graphPatterns) { - if (!std::holds_alternative(graphPattern)) { - continue; - } - for (const auto& triple : graphPattern.getBasic()._triples) { - if (triple.s_.isVariable()) { - vars.insert(triple.s_.getVariable()); - } - if (triple.o_.isVariable()) { - vars.insert(triple.o_.getVariable()); - } - if (auto p = triple.getPredicateVariable()) { - vars.insert(p.value()); - } - } - } - return vars; -} - -// _____________________________________________________________________________ -bool BasicGraphPatternsInvariantTo::operator()( - const parsedQuery::Optional&) const { - // TODO Analyze if the optional binds values from the outside - // query. - return false; -} - -// _____________________________________________________________________________ -bool BasicGraphPatternsInvariantTo::operator()( - const parsedQuery::Bind& bind) const { - return !variables_.contains(bind._target); -} - -// _____________________________________________________________________________ -bool BasicGraphPatternsInvariantTo::operator()( - const parsedQuery::Values& values) const { - return !std::ranges::any_of( - values._inlineValues._variables, - [this](const auto& var) { return variables_.contains(var); }); -} - // _____________________________________________________________________________ std::vector QueryPatternCache::makeJoinReplacementIndexScans( diff --git a/src/engine/MaterializedViewsQueryAnalysis.h b/src/engine/MaterializedViewsQueryAnalysis.h index 6c6875177c..a8ab9dfe80 100644 --- a/src/engine/MaterializedViewsQueryAnalysis.h +++ b/src/engine/MaterializedViewsQueryAnalysis.h @@ -7,61 +7,23 @@ #ifndef QLEVER_SRC_ENGINE_MATERIALIZEDVIEWSQUERYANALYSIS_H_ #define QLEVER_SRC_ENGINE_MATERIALIZEDVIEWSQUERYANALYSIS_H_ +#include "parser/GraphPatternAnalysis.h" #include "parser/GraphPatternOperation.h" #include "parser/SparqlTriple.h" #include "parser/TripleComponent.h" #include "rdfTypes/Variable.h" -#include "util/HashMap.h" +#include "util/StringPairHashMap.h" #include "util/TypeTraits.h" // Forward declarations to prevent cyclic dependencies. class MaterializedView; class IndexScan; -// _____________________________________________________________________________ -namespace ad_utility::detail { - -using StringPair = std::pair; -using StringViewPair = std::pair; - -// _____________________________________________________________________________ -struct StringPairHash { - // Allows looking up values from a hash map with `StringPair` keys also with - // `StringViewPair`. - using is_transparent = void; - - size_t operator()(const StringPair& p) const { - return absl::HashOf(p.first, p.second); - } - - size_t operator()(const StringViewPair& p) const { - return absl::HashOf(p.first, p.second); - } -}; - -// _____________________________________________________________________________ -struct StringPairEq { - using is_transparent = void; - - bool operator()(const StringPair& a, const StringPair& b) const { - return a == b; - } - - bool operator()(const StringPair& a, const StringViewPair& b) const { - return a.first == b.first && a.second == b.second; - } - - bool operator()(const StringViewPair& a, const StringPair& b) const { - return b.first == a.first && b.second == a.second; - } -}; - -} // namespace ad_utility::detail - // _____________________________________________________________________________ namespace materializedViewsQueryAnalysis { using ViewPtr = std::shared_ptr; +using graphPatternAnalysis::BasicGraphPatternsInvariantTo; // Key and value types of the cache for simple chains, that is queries of the // form `?s ?m . ?m ?o`. @@ -73,40 +35,11 @@ struct ChainInfo { Variable object_; ViewPtr view_; }; +using SimpleChainCache = + ad_utility::StringPairHashMap>>; -// Extract all variables present in a set of graph patterns. Required for -// `BasicGraphPatternsInvariantTo` below. -ad_utility::HashSet getVariablesPresentInBasicGraphPatterns( - const std::vector& graphPatterns); - -// Check whether certain graph patterns can be ignored as they do not affect the -// result of a query when we are only interested in the bindings for variables -// from `variables_`. -struct BasicGraphPatternsInvariantTo { - ad_utility::HashSet variables_; - - bool operator()(const parsedQuery::Optional& optional) const; - bool operator()(const parsedQuery::Bind& bind) const; - bool operator()(const parsedQuery::Values& values) const; - - CPP_template(typename T)(requires( - !ad_utility::SimilarToAny)) bool - operator()(const T&) const { - return false; - } -}; - -// Similar to `ChainInfo`, this struct represents a simple chain, however it may -// bind the subject. -struct UserQueryChain { - TripleComponent subject_; // Allow fixing the subject of the chain. - Variable chain_; - Variable object_; - std::shared_ptr> chainInfos_; -}; - -// +// Helper class that represents a possible join replacement and indicates the +// subset of triples it handles. struct MaterializedViewJoinReplacement { std::shared_ptr indexScan_; std::vector coveredTriples_; @@ -119,10 +52,7 @@ struct MaterializedViewJoinReplacement { // of an existing materialized view. class QueryPatternCache { // Simple chains can be found by direct access into a hash map. - ad_utility::HashMap< - ChainedPredicates, std::shared_ptr>, - ad_utility::detail::StringPairHash, ad_utility::detail::StringPairEq> - simpleChainCache_; + SimpleChainCache simpleChainCache_; // Cache for predicates appearing in a materialized view. ad_utility::HashMap> predicateInView_; @@ -133,11 +63,14 @@ class QueryPatternCache { // This is called from `MaterializedViewsManager::loadView`. bool analyzeView(ViewPtr view); - // + // Given a set of triples, check if a subset of necessary join operations can + // be replaced by scans on materialized views. std::vector makeJoinReplacementIndexScans( QueryExecutionContext* qec, const parsedQuery::BasicGraphPattern& triples) const; + // Construct an `IndexScan` for a single chain join given the necessary + // information from both the materialized view and the user's query. std::shared_ptr makeScanForSingleChain( QueryExecutionContext* qec, ChainInfo cached, TripleComponent subject, std::optional chain, Variable object) const; diff --git a/src/engine/QueryPlanner.cpp b/src/engine/QueryPlanner.cpp index 2c27bd3a7e..165538ed5b 100644 --- a/src/engine/QueryPlanner.cpp +++ b/src/engine/QueryPlanner.cpp @@ -3122,17 +3122,7 @@ void QueryPlanner::GraphPatternPlanner::visitBasicGraphPattern( const parsedQuery::BasicGraphPattern& v) { // A basic graph patterns consists only of triples. First collect all // the bound variables. - for (const SparqlTriple& t : v._triples) { - if (t.s_.isVariable()) { - boundVariables_.insert(t.s_.getVariable()); - } - if (auto predicate = t.getPredicateVariable()) { - boundVariables_.insert(predicate.value()); - } - if (t.o_.isVariable()) { - boundVariables_.insert(t.o_.getVariable()); - } - } + v.collectAllContainedVariables(boundVariables_); // Then collect the triples. Transform each triple with a property path to // an equivalent form without property path (using `seedFromPropertyPath`). diff --git a/src/parser/CMakeLists.txt b/src/parser/CMakeLists.txt index 8c76f41716..1629ea784d 100644 --- a/src/parser/CMakeLists.txt +++ b/src/parser/CMakeLists.txt @@ -30,5 +30,6 @@ add_library(parser Quads.cpp UpdateTriples.cpp MaterializedViewQuery.cpp + GraphPatternAnalysis.cpp ) qlever_target_link_libraries(parser sparqlParser parserData sparqlExpressions rdfEscaping global re2::re2 util engine index rdfTypes) diff --git a/src/parser/GraphPatternAnalysis.cpp b/src/parser/GraphPatternAnalysis.cpp new file mode 100644 index 0000000000..d8f736ff4b --- /dev/null +++ b/src/parser/GraphPatternAnalysis.cpp @@ -0,0 +1,33 @@ +// Copyright 2026 The QLever Authors, in particular: +// +// 2026 Christoph Ullinger , UFR +// +// UFR = University of Freiburg, Chair of Algorithms and Data Structures + +#include "parser/GraphPatternAnalysis.h" + +namespace graphPatternAnalysis { + +// _____________________________________________________________________________ +bool BasicGraphPatternsInvariantTo::operator()( + const parsedQuery::Optional&) const { + // TODO Analyze if the optional binds values from the outside + // query. + return false; +} + +// _____________________________________________________________________________ +bool BasicGraphPatternsInvariantTo::operator()( + const parsedQuery::Bind& bind) const { + return !variables_.contains(bind._target); +} + +// _____________________________________________________________________________ +bool BasicGraphPatternsInvariantTo::operator()( + const parsedQuery::Values& values) const { + return !std::ranges::any_of( + values._inlineValues._variables, + [this](const auto& var) { return variables_.contains(var); }); +} + +} // namespace graphPatternAnalysis diff --git a/src/parser/GraphPatternAnalysis.h b/src/parser/GraphPatternAnalysis.h new file mode 100644 index 0000000000..953d2ede9a --- /dev/null +++ b/src/parser/GraphPatternAnalysis.h @@ -0,0 +1,45 @@ +// Copyright 2026 The QLever Authors, in particular: +// +// 2026 Christoph Ullinger , UFR +// +// UFR = University of Freiburg, Chair of Algorithms and Data Structures + +#ifndef QLEVER_SRC_PARSER_GRAPHPATTERNANALYSIS_H_ +#define QLEVER_SRC_PARSER_GRAPHPATTERNANALYSIS_H_ + +#include "parser/GraphPatternOperation.h" + +// This module contains helpers for analyzing the structure of graph patterns. + +// _____________________________________________________________________________ +namespace graphPatternAnalysis { + +// Check whether certain graph patterns can be ignored when we are only +// interested in the bindings for variables from `variables_` as they do not +// affect the result of a query that only selects `variables_`. This is +// currently used for the `MaterializedViewsManager`'s `QueryPatternCache`. +// +// NOTE: This does not guarantee completeness, so it might return `false` even +// though we could be invariant to a `GraphPatternOperation`. +// +// NOTE: The selected query result is expected to be deduplicated, otherwise the +// result indicated by this helper is not correct. +struct BasicGraphPatternsInvariantTo { + ad_utility::HashSet variables_; + + bool operator()(const parsedQuery::Optional& optional) const; + bool operator()(const parsedQuery::Bind& bind) const; + bool operator()(const parsedQuery::Values& values) const; + + CPP_template(typename T)(requires( + // TODO Whitelist + !ad_utility::SimilarToAny)) bool operator()(const T&) + const { + return false; + } +}; + +} // namespace graphPatternAnalysis + +#endif // QLEVER_SRC_PARSER_GRAPHPATTERNANALYSIS_H_ diff --git a/src/parser/GraphPatternOperation.cpp b/src/parser/GraphPatternOperation.cpp index b92e2eb46d..0abc32cdd7 100644 --- a/src/parser/GraphPatternOperation.cpp +++ b/src/parser/GraphPatternOperation.cpp @@ -81,4 +81,34 @@ void BasicGraphPattern::appendTriples(BasicGraphPattern other) { auto inner = _expression.getDescriptor(); return "BIND (" + inner + " AS " + _target.name() + ")"; } + +// ____________________________________________________________________________ +void BasicGraphPattern::collectAllContainedVariables( + ad_utility::HashSet& vars) const { + for (const SparqlTriple& t : _triples) { + if (t.s_.isVariable()) { + vars.insert(t.s_.getVariable()); + } + if (auto predicate = t.getPredicateVariable()) { + vars.insert(predicate.value()); + } + if (t.o_.isVariable()) { + vars.insert(t.o_.getVariable()); + } + } +} + +// _____________________________________________________________________________ +ad_utility::HashSet getVariablesPresentInBasicGraphPatterns( + const std::vector& graphPatterns) { + ad_utility::HashSet vars; + for (const auto& graphPattern : graphPatterns) { + if (!std::holds_alternative(graphPattern)) { + continue; + } + graphPattern.getBasic().collectAllContainedVariables(vars); + } + return vars; +} + } // namespace parsedQuery diff --git a/src/parser/GraphPatternOperation.h b/src/parser/GraphPatternOperation.h index 4f16be3f9a..041e357616 100644 --- a/src/parser/GraphPatternOperation.h +++ b/src/parser/GraphPatternOperation.h @@ -80,8 +80,20 @@ struct BasicGraphPattern { std::vector _triples; /// Append the triples from `other` to this `BasicGraphPattern` void appendTriples(BasicGraphPattern other); + + // Collect all the `Variable`s present in this `BasicGraphPattern` and add + // them to a `HashSet`. + void collectAllContainedVariables(ad_utility::HashSet& vars) const; }; +// Extract all variables present in a set of `BasicGraphPatterns` contained in +// `GraphPatternOperation`s. +// +// IMPORTANT: This function does not consider variables that are contained in +// other types of `GraphPatternOperation`s. +ad_utility::HashSet getVariablesPresentInBasicGraphPatterns( + const std::vector& graphPatterns); + /// A `Values` clause struct Values { SparqlValues _inlineValues; diff --git a/src/util/StringPairHashMap.h b/src/util/StringPairHashMap.h new file mode 100644 index 0000000000..0b8ff98965 --- /dev/null +++ b/src/util/StringPairHashMap.h @@ -0,0 +1,68 @@ +// Copyright 2026 The QLever Authors, in particular: +// +// 2026 Christoph Ullinger , UFR +// +// UFR = University of Freiburg, Chair of Algorithms and Data Structures + +#ifndef QLEVER_SRC_UTIL_STRINGPAIRHASHMAP_H_ +#define QLEVER_SRC_UTIL_STRINGPAIRHASHMAP_H_ + +#include "util/HashMap.h" + +// This module provides a modified version of `ad_utility::HashMap` that uses +// pairs of strings as keys. Unlike the default hash map it allows looking up +// values with pairs of string views as keys. This is implemented using custom +// hash and equality operators. + +// _____________________________________________________________________________ +namespace ad_utility { + +// _____________________________________________________________________________ +namespace detail { + +using StringPair = std::pair; +using StringViewPair = std::pair; + +// _____________________________________________________________________________ +struct StringPairHash { + // Allows looking up values from a hash map with `StringPair` keys also with + // `StringViewPair`. + using is_transparent = void; + + size_t operator()(const StringPair& p) const { + return absl::HashOf(p.first, p.second); + } + + size_t operator()(const StringViewPair& p) const { + return absl::HashOf(p.first, p.second); + } +}; + +// _____________________________________________________________________________ +struct StringPairEq { + using is_transparent = void; + + bool operator()(const StringPair& a, const StringPair& b) const { + return a == b; + } + + bool operator()(const StringPair& a, const StringViewPair& b) const { + return a.first == b.first && a.second == b.second; + } + + bool operator()(const StringViewPair& a, const StringPair& b) const { + return b.first == a.first && b.second == a.second; + } +}; + +} // namespace detail + +template +using StringPairHashMap = + ad_utility::HashMap; + +} // namespace ad_utility + +#endif // QLEVER_SRC_UTIL_STRINGPAIRHASHMAP_H_ From 1e3271f478e911381fec1d7882d3c7969f32c0bc Mon Sep 17 00:00:00 2001 From: ullingerc Date: Tue, 27 Jan 2026 17:52:49 +0100 Subject: [PATCH 30/64] fix some comments --- src/engine/QueryPlanner.cpp | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/src/engine/QueryPlanner.cpp b/src/engine/QueryPlanner.cpp index 165538ed5b..c8b234502e 100644 --- a/src/engine/QueryPlanner.cpp +++ b/src/engine/QueryPlanner.cpp @@ -1776,18 +1776,16 @@ std::vector> QueryPlanner::fillDpTab( coveredNodes |= plan._idsOfIncludedNodes; } - // TODO this could be hash-map based if we would return the indices in the - // create helper and pass them as part of replacementPlans + // TODO This could be hash-map based if we would return the + // indices in the create helper and pass them as part of `replacementPlans`. ReplacementPlans applicableReplacementPlans; - // size_t numApplicableReplacementPlans = 0; for (auto& rPlans : replacementPlans) { std::vector applicable; for (auto& plan : rPlans) { - // (a & b) == a -> a is subset of b + // Nodes covered by plan are a subset of the covered nodes. if ((plan._idsOfIncludedNodes & coveredNodes) == plan._idsOfIncludedNodes) { applicable.push_back(std::move(plan)); - // ++numApplicableReplacementPlans; } } applicableReplacementPlans.push_back(std::move(applicable)); @@ -1805,11 +1803,10 @@ std::vector> QueryPlanner::fillDpTab( auto impl = useGreedyPlanning ? &QueryPlanner::runGreedyPlanningOnConnectedComponent : &QueryPlanner::runDynamicProgrammingOnConnectedComponent; - // TODO greedy: once with replacementPlans of all levels on level 0 and all - // covered nodes removed ; once without replacementPlans -> use better - // result + // TODO Run greedy also once without replacementPlans -> use + // better result if (useGreedyPlanning) { - // Remove covered ones + // Remove covered ones. for (const auto& a : applicableReplacementPlans) { for (const auto& p : a) { std::erase_if(component, [&p](const auto& c) { @@ -1817,8 +1814,9 @@ std::vector> QueryPlanner::fillDpTab( }); } } - // Insert replacements. TODO replacements themselves should not contain - // each other + // Insert replacements. + // TODO How can we ensure that replacements themselves do not + // contain each other. This will be probably relevant for stars later. for (const auto& a : applicableReplacementPlans) { for (const auto& p : a) { component.push_back(p); From f05db5bfdaa6467a5a2ccde7d580928b8f7d2305 Mon Sep 17 00:00:00 2001 From: ullingerc Date: Wed, 28 Jan 2026 14:16:54 +0100 Subject: [PATCH 31/64] apply some feedback --- src/engine/MaterializedViewsQueryAnalysis.cpp | 8 ++++---- src/parser/GraphPatternAnalysis.h | 13 +++++++++---- 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/src/engine/MaterializedViewsQueryAnalysis.cpp b/src/engine/MaterializedViewsQueryAnalysis.cpp index d7ee78d7c4..6e4d3698f5 100644 --- a/src/engine/MaterializedViewsQueryAnalysis.cpp +++ b/src/engine/MaterializedViewsQueryAnalysis.cpp @@ -162,12 +162,12 @@ bool QueryPatternCache::analyzeSimpleChain(ViewPtr view, const SparqlTriple& a, auto bObj = b.o_.getVariable(); // Insert chain to cache. - // TODO avoid hashing 3x ChainedPredicates preds{aPred.value(), bPred.value()}; - if (!simpleChainCache_.contains(preds)) { - simpleChainCache_[preds] = std::make_shared>(); + auto [it, wasNew] = simpleChainCache_.try_emplace(preds, nullptr); + if (it->second == nullptr) { + it->second = std::make_shared>(); } - simpleChainCache_[preds]->push_back( + it->second->push_back( ChainInfo{std::move(aSubj), std::move(chainVar), std::move(bObj), view}); return true; } diff --git a/src/parser/GraphPatternAnalysis.h b/src/parser/GraphPatternAnalysis.h index 953d2ede9a..a3a99abb81 100644 --- a/src/parser/GraphPatternAnalysis.h +++ b/src/parser/GraphPatternAnalysis.h @@ -32,10 +32,15 @@ struct BasicGraphPatternsInvariantTo { bool operator()(const parsedQuery::Values& values) const; CPP_template(typename T)(requires( - // TODO Whitelist - !ad_utility::SimilarToAny)) bool operator()(const T&) - const { + ad_utility::SimilarToAny< + T, parsedQuery::Union, parsedQuery::Subquery, parsedQuery::TransPath, + parsedQuery::BasicGraphPattern, parsedQuery::Service, + parsedQuery::PathQuery, parsedQuery::SpatialQuery, + parsedQuery::TextSearchQuery, parsedQuery::Minus, + parsedQuery::GroupGraphPattern, parsedQuery::Describe, + parsedQuery::Load, parsedQuery::NamedCachedResult, + parsedQuery::MaterializedViewQuery>)) bool + operator()(const T&) const { return false; } }; From ffb7e11032d14566563ce6eb2f9b57e3a0386006 Mon Sep 17 00:00:00 2001 From: ullingerc Date: Wed, 28 Jan 2026 14:22:47 +0100 Subject: [PATCH 32/64] clean up --- src/engine/MaterializedViews.h | 1 - src/engine/MaterializedViewsQueryAnalysis.cpp | 17 ++++++++++------- src/engine/MaterializedViewsQueryAnalysis.h | 3 ++- 3 files changed, 12 insertions(+), 9 deletions(-) diff --git a/src/engine/MaterializedViews.h b/src/engine/MaterializedViews.h index 10a502300f..b74cb0f9f8 100644 --- a/src/engine/MaterializedViews.h +++ b/src/engine/MaterializedViews.h @@ -18,7 +18,6 @@ #include "parser/MaterializedViewQuery.h" #include "parser/ParsedQuery.h" #include "parser/SparqlTriple.h" -#include "rdfTypes/Iri.h" #include "util/HashMap.h" #include "util/Synchronized.h" diff --git a/src/engine/MaterializedViewsQueryAnalysis.cpp b/src/engine/MaterializedViewsQueryAnalysis.cpp index 6e4d3698f5..09fdd62163 100644 --- a/src/engine/MaterializedViewsQueryAnalysis.cpp +++ b/src/engine/MaterializedViewsQueryAnalysis.cpp @@ -46,13 +46,16 @@ QueryPatternCache::makeJoinReplacementIndexScans( if (!predicateInView_.contains(iri)) { continue; } - if (triple.s_.isVariable() && triple.o_.isVariable()) { - // This triple could be the right side of a chain join. - chainRight[triple.s_.getVariable()].push_back(tripleIdx); - } - if (triple.o_.isVariable() && triple.s_ != triple.o_) { - // This triple could be the left side of a chain join. - chainLeft[triple.o_.getVariable()].push_back(tripleIdx); + // Check for potential join chain triple. + if (triple.o_.isVariable()) { + if (triple.s_.isVariable()) { + // This triple could be the right side of a chain join. + chainRight[triple.s_.getVariable()].push_back(tripleIdx); + } + if (triple.s_ != triple.o_) { + // This triple could be the left side of a chain join. + chainLeft[triple.o_.getVariable()].push_back(tripleIdx); + } } } else if (path.isSequence()) { AD_THROW( diff --git a/src/engine/MaterializedViewsQueryAnalysis.h b/src/engine/MaterializedViewsQueryAnalysis.h index a8ab9dfe80..4a31438c19 100644 --- a/src/engine/MaterializedViewsQueryAnalysis.h +++ b/src/engine/MaterializedViewsQueryAnalysis.h @@ -13,7 +13,6 @@ #include "parser/TripleComponent.h" #include "rdfTypes/Variable.h" #include "util/StringPairHashMap.h" -#include "util/TypeTraits.h" // Forward declarations to prevent cyclic dependencies. class MaterializedView; @@ -44,6 +43,7 @@ struct MaterializedViewJoinReplacement { std::shared_ptr indexScan_; std::vector coveredTriples_; + // ___________________________________________________________________________ size_t numJoins() const { return coveredTriples_.size() - 1; } }; @@ -51,6 +51,7 @@ struct MaterializedViewJoinReplacement { // used for quickly looking up if a given query can be optimized by making use // of an existing materialized view. class QueryPatternCache { + private: // Simple chains can be found by direct access into a hash map. SimpleChainCache simpleChainCache_; From 7a23840a28855ce80a7644a10fa7bea34c8c80f8 Mon Sep 17 00:00:00 2001 From: ullingerc Date: Thu, 29 Jan 2026 12:16:11 +0100 Subject: [PATCH 33/64] notes and improvements --- src/engine/QueryPlanner.cpp | 37 +++++++++++++++++++++++++++++-------- 1 file changed, 29 insertions(+), 8 deletions(-) diff --git a/src/engine/QueryPlanner.cpp b/src/engine/QueryPlanner.cpp index c8b234502e..1dc416ef51 100644 --- a/src/engine/QueryPlanner.cpp +++ b/src/engine/QueryPlanner.cpp @@ -1778,6 +1778,7 @@ std::vector> QueryPlanner::fillDpTab( // TODO This could be hash-map based if we would return the // indices in the create helper and pass them as part of `replacementPlans`. + bool hasApplicableReplacementPlans = false; ReplacementPlans applicableReplacementPlans; for (auto& rPlans : replacementPlans) { std::vector applicable; @@ -1786,6 +1787,7 @@ std::vector> QueryPlanner::fillDpTab( if ((plan._idsOfIncludedNodes & coveredNodes) == plan._idsOfIncludedNodes) { applicable.push_back(std::move(plan)); + hasApplicableReplacementPlans = true; } } applicableReplacementPlans.push_back(std::move(applicable)); @@ -1803,10 +1805,23 @@ std::vector> QueryPlanner::fillDpTab( auto impl = useGreedyPlanning ? &QueryPlanner::runGreedyPlanningOnConnectedComponent : &QueryPlanner::runDynamicProgrammingOnConnectedComponent; - // TODO Run greedy also once without replacementPlans -> use - // better result - if (useGreedyPlanning) { - // Remove covered ones. + + std::vector lastDpRow; + + auto addCandidates = [&lastDpRow](std::vector candidates) { + std::move(candidates.begin(), candidates.end(), + std::back_inserter(lastDpRow)); + }; + + if (useGreedyPlanning && hasApplicableReplacementPlans) { + // Plan once with a copy of `components` and without replacements to have + // a baseline plan. This plan may be better than the replacement if a + // certain sorting is required, that the replacement doesn't provide. + addCandidates(std::invoke(impl, this, component, filtersAndOptSubstitutes, + textLimitVec, tg, ReplacementPlans{})); + + // Remove nodes from the `components` that are covered by replacement + // plans. for (const auto& a : applicableReplacementPlans) { for (const auto& p : a) { std::erase_if(component, [&p](const auto& c) { @@ -1814,18 +1829,24 @@ std::vector> QueryPlanner::fillDpTab( }); } } - // Insert replacements. + // Insert replacement plans as initial plans. // TODO How can we ensure that replacements themselves do not // contain each other. This will be probably relevant for stars later. + // Overlapping replacement plans create empty candidates (e.g. + // ?x geo:hasGeometry ?m . ?m geo:asWKT ?g . ?m geo:asWKT ?g2 ) -> + // Assertion error, greedy can't find a plan because it has only chain + // ?x->?m->?g and ?x->?m->?g2, but no scan ?m+?g or ?m+?g2. for (const auto& a : applicableReplacementPlans) { for (const auto& p : a) { component.push_back(p); } } } - lastDpRowFromComponents.push_back( - std::invoke(impl, this, std::move(component), filtersAndOptSubstitutes, - textLimitVec, tg, applicableReplacementPlans)); + + addCandidates(std::invoke(impl, this, std::move(component), + filtersAndOptSubstitutes, textLimitVec, tg, + applicableReplacementPlans)); + lastDpRowFromComponents.push_back(std::move(lastDpRow)); checkCancellation(); } size_t numConnectedComponents = lastDpRowFromComponents.size(); From ddfce01b64cf400711975be3738ae4d6ec2fbdea Mon Sep 17 00:00:00 2001 From: ullingerc Date: Thu, 29 Jan 2026 14:37:53 +0100 Subject: [PATCH 34/64] Modularize and fix greedy planning --- src/engine/QueryPlanner.cpp | 113 +++++++++++++++++++++++------------- src/engine/QueryPlanner.h | 30 +++++++++- 2 files changed, 102 insertions(+), 41 deletions(-) diff --git a/src/engine/QueryPlanner.cpp b/src/engine/QueryPlanner.cpp index 1dc416ef51..67748bd377 100644 --- a/src/engine/QueryPlanner.cpp +++ b/src/engine/QueryPlanner.cpp @@ -1746,7 +1746,7 @@ QueryPlanner::FiltersAndOptionalSubstitutes QueryPlanner::seedFilterSubstitutes( std::vector> QueryPlanner::fillDpTab( const QueryPlanner::TripleGraph& tg, vector filters, TextLimitMap& textLimits, const vector>& children, - const ReplacementPlans& replacementPlans) { + ReplacementPlans replacementPlans) { auto [initialPlans, additionalFilters] = seedWithScansAndText(tg, children, textLimits); ql::ranges::move(additionalFilters, std::back_inserter(filters)); @@ -1776,23 +1776,6 @@ std::vector> QueryPlanner::fillDpTab( coveredNodes |= plan._idsOfIncludedNodes; } - // TODO This could be hash-map based if we would return the - // indices in the create helper and pass them as part of `replacementPlans`. - bool hasApplicableReplacementPlans = false; - ReplacementPlans applicableReplacementPlans; - for (auto& rPlans : replacementPlans) { - std::vector applicable; - for (auto& plan : rPlans) { - // Nodes covered by plan are a subset of the covered nodes. - if ((plan._idsOfIncludedNodes & coveredNodes) == - plan._idsOfIncludedNodes) { - applicable.push_back(std::move(plan)); - hasApplicableReplacementPlans = true; - } - } - applicableReplacementPlans.push_back(std::move(applicable)); - } - const size_t budget = getRuntimeParameter<&RuntimeParameters::queryPlanningBudget_>(); bool useGreedyPlanning = countSubgraphs(g, filters, budget) > budget; @@ -1802,6 +1785,10 @@ std::vector> QueryPlanner::fillDpTab( << std::endl; } + auto [applicableReplacementPlans, hasApplicableReplacementPlans] = + findApplicableReplacementPlans(replacementPlans, coveredNodes, + useGreedyPlanning); + auto impl = useGreedyPlanning ? &QueryPlanner::runGreedyPlanningOnConnectedComponent : &QueryPlanner::runDynamicProgrammingOnConnectedComponent; @@ -1820,27 +1807,10 @@ std::vector> QueryPlanner::fillDpTab( addCandidates(std::invoke(impl, this, component, filtersAndOptSubstitutes, textLimitVec, tg, ReplacementPlans{})); - // Remove nodes from the `components` that are covered by replacement - // plans. - for (const auto& a : applicableReplacementPlans) { - for (const auto& p : a) { - std::erase_if(component, [&p](const auto& c) { - return (p._idsOfIncludedNodes & c._idsOfIncludedNodes) != 0; - }); - } - } - // Insert replacement plans as initial plans. - // TODO How can we ensure that replacements themselves do not - // contain each other. This will be probably relevant for stars later. - // Overlapping replacement plans create empty candidates (e.g. - // ?x geo:hasGeometry ?m . ?m geo:asWKT ?g . ?m geo:asWKT ?g2 ) -> - // Assertion error, greedy can't find a plan because it has only chain - // ?x->?m->?g and ?x->?m->?g2, but no scan ?m+?g or ?m+?g2. - for (const auto& a : applicableReplacementPlans) { - for (const auto& p : a) { - component.push_back(p); - } - } + // Then remove the plans for the nodes covered by replacement plans and + // insert the replacement plans. + useReplacementPlansForGreedyPlanner(applicableReplacementPlans, + component); } addCandidates(std::invoke(impl, this, std::move(component), @@ -3406,7 +3376,7 @@ void QueryPlanner::GraphPatternPlanner::optimizeCommutatively() { auto lastRow = planner_ .fillDpTab(tg, rootPattern_->_filters, rootPattern_->textLimits_, - candidatePlans_, replacementPlans) + candidatePlans_, std::move(replacementPlans)) .back(); candidateTriples_._triples.clear(); candidatePlans_.clear(); @@ -3424,3 +3394,66 @@ void QueryPlanner::GraphPatternPlanner::visitDescribe( candidatePlans_.push_back(std::vector{std::move(describeOp)}); planner_.checkCancellation(); } + +// _______________________________________________________________ +std::pair +QueryPlanner::findApplicableReplacementPlans( + ReplacementPlans& allReplacementPlans, uint64_t coveredNodeIds, + bool useGreedyPlanning) { + // TODO This could be hash-map based if we would return the + // indices in the create helper and pass them as part of `replacementPlans`. + bool hasApplicableReplacementPlans = false; + ReplacementPlans applicableReplacementPlans; + for (auto& rPlans : allReplacementPlans) { + std::vector applicable; + for (auto& plan : rPlans) { + // Nodes covered by plan must be a subset of the covered nodes. + if ((plan._idsOfIncludedNodes & coveredNodeIds) == + plan._idsOfIncludedNodes) { + applicable.push_back(std::move(plan)); + hasApplicableReplacementPlans = true; + } + } + applicableReplacementPlans.push_back(std::move(applicable)); + } + + // Filter the plans to be disjunctive for greedy planning. This is done in + // reversed order of the number of triples they cover, s.t. plans covering + // more triples are preferred over smaller ones. + if (useGreedyPlanning) { + uint64_t nodesCoveredByReplacementPlans = 0; + for (auto& plans : applicableReplacementPlans | ql::views::reverse) { + std::erase_if(plans, [&](SubtreePlan& plan) { + bool res = + (plan._idsOfIncludedNodes & nodesCoveredByReplacementPlans) != 0; + nodesCoveredByReplacementPlans = + nodesCoveredByReplacementPlans | plan._idsOfIncludedNodes; + return res; + }); + } + } + + return {std::move(applicableReplacementPlans), hasApplicableReplacementPlans}; +} + +// _______________________________________________________________ +void QueryPlanner::useReplacementPlansForGreedyPlanner( + ReplacementPlans& applicableReplacementPlans, + std::vector& connectedComponent) { + // Remove nodes from the `connectedComponent` that are covered by replacement + // plans. + for (const auto& plans : applicableReplacementPlans) { + for (const auto& plan : plans) { + std::erase_if(connectedComponent, [&plan](const auto& c) { + return (plan._idsOfIncludedNodes & c._idsOfIncludedNodes) != 0; + }); + } + } + + // Insert replacement plans as leaf plans. + for (auto& plans : applicableReplacementPlans) { + for (auto& plan : plans) { + connectedComponent.push_back(std::move(plan)); + } + } +} diff --git a/src/engine/QueryPlanner.h b/src/engine/QueryPlanner.h index b77a4c3c72..062d46d293 100644 --- a/src/engine/QueryPlanner.h +++ b/src/engine/QueryPlanner.h @@ -586,7 +586,7 @@ class QueryPlanner { vector> fillDpTab( const TripleGraph& graph, std::vector fs, TextLimitMap& textLimits, const vector>& children, - const ReplacementPlans& replacementPlans); + ReplacementPlans replacementPlans); // Internal subroutine of `fillDpTab` that only works on a single connected // component of the input. Throws if the subtrees in the `connectedComponent` @@ -737,6 +737,34 @@ class QueryPlanner { const std::vector& connectedComponent, bool allowReplacementPlans = false); + // Helper for `fillDpTab` that extracts a subset of possible + // `ReplacementPlans` that is applicable to a connected component given by the + // covered node ids of the component. + // + // If the greedy query planning mode is active, this function guarantees that + // the returned replacement plans are disjunctive with regard to their covered + // node ids. + // + // The function returns the applicable replacement plans and a boolean for + // quickly checking whether any were found. + // + // NOTE: This function is destructive w.r.t. `allReplacementPlans`: the used + // replacement plans are moved out. + static std::pair findApplicableReplacementPlans( + ReplacementPlans& allReplacementPlans, uint64_t coveredNodeIds, + bool useGreedyPlanning); + + // Helper for `fillDpTab` that inserts replacement plans into a connected + // component for greedy query planning. The `IndexScan` plans for triples + // covered by the replacement plans are filtered out, s.t. the greedy planner + // is forced to use the replacement plans. + // + // NOTE: For this to work correctly the nodes covered by the replacement plans + // must be disjunctive. + static void useReplacementPlansForGreedyPlanner( + ReplacementPlans& applicableReplacementPlans, + std::vector& connectedComponent); + /// if this Planner is not associated with a queryExecutionContext we are only /// in the unit test mode bool isInTestMode() const { return _qec == nullptr; } From 94b6c7305fda2765a9d370b6359d28b3e36242e9 Mon Sep 17 00:00:00 2001 From: ullingerc Date: Thu, 29 Jan 2026 14:42:32 +0100 Subject: [PATCH 35/64] remove redundant --- src/engine/QueryPlanner.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/engine/QueryPlanner.cpp b/src/engine/QueryPlanner.cpp index 67748bd377..8bcea1cdd2 100644 --- a/src/engine/QueryPlanner.cpp +++ b/src/engine/QueryPlanner.cpp @@ -3426,8 +3426,7 @@ QueryPlanner::findApplicableReplacementPlans( std::erase_if(plans, [&](SubtreePlan& plan) { bool res = (plan._idsOfIncludedNodes & nodesCoveredByReplacementPlans) != 0; - nodesCoveredByReplacementPlans = - nodesCoveredByReplacementPlans | plan._idsOfIncludedNodes; + nodesCoveredByReplacementPlans |= plan._idsOfIncludedNodes; return res; }); } From cc39f61b69664ec2811a1eefc073dd772bf13a0b Mon Sep 17 00:00:00 2001 From: ullingerc Date: Thu, 29 Jan 2026 14:50:07 +0100 Subject: [PATCH 36/64] use erase_if backport --- src/engine/QueryPlanner.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/engine/QueryPlanner.cpp b/src/engine/QueryPlanner.cpp index 8bcea1cdd2..0b4cbcbe95 100644 --- a/src/engine/QueryPlanner.cpp +++ b/src/engine/QueryPlanner.cpp @@ -3423,7 +3423,7 @@ QueryPlanner::findApplicableReplacementPlans( if (useGreedyPlanning) { uint64_t nodesCoveredByReplacementPlans = 0; for (auto& plans : applicableReplacementPlans | ql::views::reverse) { - std::erase_if(plans, [&](SubtreePlan& plan) { + ql::erase_if(plans, [&](SubtreePlan& plan) { bool res = (plan._idsOfIncludedNodes & nodesCoveredByReplacementPlans) != 0; nodesCoveredByReplacementPlans |= plan._idsOfIncludedNodes; @@ -3443,7 +3443,7 @@ void QueryPlanner::useReplacementPlansForGreedyPlanner( // plans. for (const auto& plans : applicableReplacementPlans) { for (const auto& plan : plans) { - std::erase_if(connectedComponent, [&plan](const auto& c) { + ql::erase_if(connectedComponent, [&plan](const auto& c) { return (plan._idsOfIncludedNodes & c._idsOfIncludedNodes) != 0; }); } From 37e6012f292ec65522c825343ff46b28144e067d Mon Sep 17 00:00:00 2001 From: ullingerc Date: Thu, 29 Jan 2026 14:52:21 +0100 Subject: [PATCH 37/64] comment --- src/engine/QueryPlanner.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/engine/QueryPlanner.h b/src/engine/QueryPlanner.h index 062d46d293..6389aad406 100644 --- a/src/engine/QueryPlanner.h +++ b/src/engine/QueryPlanner.h @@ -453,9 +453,12 @@ class QueryPlanner { // Helper that generates `IndexScan` query plans on materialized views if they // can be used to avoid joins between some of the `triples`. The resulting // plans for part of the `triples` are given in a vector of query planning - // rounds in which they should be added to the planner. For example, at index - // 1 there is a vector of query plans that should be added in round 1 of the - // dynamic programming algorithm. + // rounds in which they should be added to the planner. + // + // For example, at index 1 there is a vector of query plans that should be + // added in round 1 of the dynamic programming algorithm. For the greedy + // algorithm, the `useReplacementPlansForGreedyPlanner` helper handles the + // necessary steps. using ReplacementPlans = std::vector>; ReplacementPlans createMaterializedViewJoinReplacements( const parsedQuery::BasicGraphPattern& triples) const; From 59d6ce912f63951b3c25d6c1352a91299b453ceb Mon Sep 17 00:00:00 2001 From: ullingerc Date: Thu, 29 Jan 2026 15:03:12 +0100 Subject: [PATCH 38/64] Test property path getters --- test/parser/PropertyPathTest.cpp | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/test/parser/PropertyPathTest.cpp b/test/parser/PropertyPathTest.cpp index 0d99b1791b..de2cd749e1 100644 --- a/test/parser/PropertyPathTest.cpp +++ b/test/parser/PropertyPathTest.cpp @@ -288,3 +288,33 @@ TEST(PropertyPath, handlePath) { }), 2); } + +// _____________________________________________________________________________ +TEST(PropertyPath, Getters) { + auto path1 = PropertyPath::fromIri(iri1); + EXPECT_TRUE(path1.isIri()); + EXPECT_FALSE(path1.isSequence()); + EXPECT_EQ(path1.getIri(), iri1); + + auto path2 = PropertyPath::makeInverse(PropertyPath::fromIri(iri1)); + EXPECT_FALSE(path2.isIri()); + EXPECT_FALSE(path2.isSequence()); + + auto path3 = PropertyPath::makeAlternative( + {PropertyPath::fromIri(iri1), PropertyPath::fromIri(iri2)}); + EXPECT_FALSE(path3.isIri()); + EXPECT_FALSE(path3.isSequence()); + + auto path4 = PropertyPath::makeSequence( + {PropertyPath::fromIri(iri1), PropertyPath::fromIri(iri2)}); + EXPECT_FALSE(path4.isIri()); + EXPECT_TRUE(path4.isSequence()); + auto matchIri = [](ad_utility::triple_component::Iri iri) + -> ::testing::Matcher { + return ::testing::AllOf( + ::testing::Property(&PropertyPath::isIri, ::testing::IsTrue()), + ::testing::Property(&PropertyPath::getIri, ::testing::Eq(iri))); + }; + EXPECT_THAT(path4.getSequence(), + ::testing::ElementsAre(matchIri(iri1), matchIri(iri2))); +} From a2a7fe94c03ccea0c422569e0cf5b42e80ee3f8d Mon Sep 17 00:00:00 2001 From: ullingerc Date: Fri, 30 Jan 2026 11:21:14 +0100 Subject: [PATCH 39/64] runtime toggle for query rewriting --- src/ServerMain.cpp | 5 +++++ src/engine/QueryPlanner.cpp | 8 +++++++- src/global/RuntimeParameters.cpp | 1 + src/global/RuntimeParameters.h | 5 +++++ 4 files changed, 18 insertions(+), 1 deletion(-) diff --git a/src/ServerMain.cpp b/src/ServerMain.cpp index 36b404831d..215a28d1e7 100644 --- a/src/ServerMain.cpp +++ b/src/ServerMain.cpp @@ -176,6 +176,11 @@ int main(int argc, char** argv) { &RuntimeParameters::materializedViewWriterMemory_>(), "Memory limit for sorting rows during the writing of materialized " "views."); + add("enable-materialized-view-query-rewrite", + optionFactory.getProgramOption< + &RuntimeParameters::enableMaterializedViewQueryRewrite_>(), + "If set to true, loaded materialized views will be considered as " + "alternative query plans for certain supported query patterns."); po::variables_map optionsMap; try { diff --git a/src/engine/QueryPlanner.cpp b/src/engine/QueryPlanner.cpp index 0b4cbcbe95..6b5851b5af 100644 --- a/src/engine/QueryPlanner.cpp +++ b/src/engine/QueryPlanner.cpp @@ -2575,6 +2575,13 @@ auto QueryPlanner::createJoinWithTransitivePath(const SubtreePlan& a, auto QueryPlanner::createMaterializedViewJoinReplacements( const parsedQuery::BasicGraphPattern& triples) const -> ReplacementPlans { ReplacementPlans plans; + + // Check if the user allows query rewriting. + if (!getRuntimeParameter< + &RuntimeParameters::enableMaterializedViewQueryRewrite_>()) { + return plans; + } + // The `MaterializedViewsManager` provides `IndexScan` instances for all the // subsets of `triples` it can rewrite. The individual results do not cover // all items of `triples`, instead each has a vector of triple indices it @@ -3369,7 +3376,6 @@ void QueryPlanner::GraphPatternPlanner::visitSubquery( // _______________________________________________________________ void QueryPlanner::GraphPatternPlanner::optimizeCommutatively() { - // TODO Add runtime parameter to disable join rewriting. auto replacementPlans = planner_.createMaterializedViewJoinReplacements(candidateTriples_); auto tg = planner_.createTripleGraph(&candidateTriples_); diff --git a/src/global/RuntimeParameters.cpp b/src/global/RuntimeParameters.cpp index 5a409a2c32..5915513615 100644 --- a/src/global/RuntimeParameters.cpp +++ b/src/global/RuntimeParameters.cpp @@ -51,6 +51,7 @@ RuntimeParameters::RuntimeParameters() { add(materializedViewWriterMemory_); add(defaultQueryTimeout_); add(sortInMemoryThreshold_); + add(enableMaterializedViewQueryRewrite_); defaultQueryTimeout_.setParameterConstraint( [](std::chrono::seconds value, std::string_view parameterName) { diff --git a/src/global/RuntimeParameters.h b/src/global/RuntimeParameters.h index 67cdfbf3f8..b8679890ab 100644 --- a/src/global/RuntimeParameters.h +++ b/src/global/RuntimeParameters.h @@ -135,6 +135,11 @@ struct RuntimeParameters { MemorySizeParameter sortInMemoryThreshold_{ ad_utility::MemorySize::gigabytes(5), "sort-in-memory-threshold"}; + // If set, the query planner checks if suitable materialized views are loaded + // to substitute more expensive query plans. + Bool enableMaterializedViewQueryRewrite_{ + true, "enable-materialized-view-query-rewrite"}; + // ___________________________________________________________________________ // IMPORTANT NOTE: IF YOU ADD PARAMETERS ABOVE, ALSO REGISTER THEM IN THE // CONSTRUCTOR, S.T. THEY CAN ALSO BE ACCESSED VIA THE RUNTIME INTERFACE. From 37cb1a9f8fd78ce1e89126cc9b88f871b0912be2 Mon Sep 17 00:00:00 2001 From: ullingerc Date: Fri, 30 Jan 2026 11:31:14 +0100 Subject: [PATCH 40/64] apply Johannes' feedback for BasicGraphPatternsInvariantTo --- src/parser/GraphPatternAnalysis.cpp | 18 +++++++----------- src/parser/GraphPatternAnalysis.h | 23 +++++++++-------------- 2 files changed, 16 insertions(+), 25 deletions(-) diff --git a/src/parser/GraphPatternAnalysis.cpp b/src/parser/GraphPatternAnalysis.cpp index d8f736ff4b..f2454ceeea 100644 --- a/src/parser/GraphPatternAnalysis.cpp +++ b/src/parser/GraphPatternAnalysis.cpp @@ -8,14 +8,6 @@ namespace graphPatternAnalysis { -// _____________________________________________________________________________ -bool BasicGraphPatternsInvariantTo::operator()( - const parsedQuery::Optional&) const { - // TODO Analyze if the optional binds values from the outside - // query. - return false; -} - // _____________________________________________________________________________ bool BasicGraphPatternsInvariantTo::operator()( const parsedQuery::Bind& bind) const { @@ -25,9 +17,13 @@ bool BasicGraphPatternsInvariantTo::operator()( // _____________________________________________________________________________ bool BasicGraphPatternsInvariantTo::operator()( const parsedQuery::Values& values) const { - return !std::ranges::any_of( - values._inlineValues._variables, - [this](const auto& var) { return variables_.contains(var); }); + return + // The `VALUES` doesn't bind to any of the `variables_`. + !std::ranges::any_of( + values._inlineValues._variables, + [this](const auto& var) { return variables_.contains(var); }) && + // There is exactly one row inside the `VALUES`. + values._inlineValues._values.size() == 1; } } // namespace graphPatternAnalysis diff --git a/src/parser/GraphPatternAnalysis.h b/src/parser/GraphPatternAnalysis.h index a3a99abb81..250c881c40 100644 --- a/src/parser/GraphPatternAnalysis.h +++ b/src/parser/GraphPatternAnalysis.h @@ -21,26 +21,21 @@ namespace graphPatternAnalysis { // // NOTE: This does not guarantee completeness, so it might return `false` even // though we could be invariant to a `GraphPatternOperation`. -// -// NOTE: The selected query result is expected to be deduplicated, otherwise the -// result indicated by this helper is not correct. struct BasicGraphPatternsInvariantTo { ad_utility::HashSet variables_; - bool operator()(const parsedQuery::Optional& optional) const; bool operator()(const parsedQuery::Bind& bind) const; bool operator()(const parsedQuery::Values& values) const; - CPP_template(typename T)(requires( - ad_utility::SimilarToAny< - T, parsedQuery::Union, parsedQuery::Subquery, parsedQuery::TransPath, - parsedQuery::BasicGraphPattern, parsedQuery::Service, - parsedQuery::PathQuery, parsedQuery::SpatialQuery, - parsedQuery::TextSearchQuery, parsedQuery::Minus, - parsedQuery::GroupGraphPattern, parsedQuery::Describe, - parsedQuery::Load, parsedQuery::NamedCachedResult, - parsedQuery::MaterializedViewQuery>)) bool - operator()(const T&) const { + template + bool operator()(const T&) const { + using namespace parsedQuery; + // The presence of any of these operations might remove or duplicate rows. + static_assert( + ad_utility::SimilarToAny< + T, Optional, Union, Subquery, TransPath, BasicGraphPattern, Service, + PathQuery, SpatialQuery, TextSearchQuery, Minus, GroupGraphPattern, + Describe, Load, NamedCachedResult, MaterializedViewQuery>); return false; } }; From c208ff24424311421d0d6b9603f944f76facb7fc Mon Sep 17 00:00:00 2001 From: ullingerc Date: Fri, 30 Jan 2026 11:37:09 +0100 Subject: [PATCH 41/64] some clean up --- src/engine/MaterializedViewsQueryAnalysis.cpp | 29 +++++++++++-------- src/engine/MaterializedViewsQueryAnalysis.h | 6 ++++ 2 files changed, 23 insertions(+), 12 deletions(-) diff --git a/src/engine/MaterializedViewsQueryAnalysis.cpp b/src/engine/MaterializedViewsQueryAnalysis.cpp index 09fdd62163..aefb55c827 100644 --- a/src/engine/MaterializedViewsQueryAnalysis.cpp +++ b/src/engine/MaterializedViewsQueryAnalysis.cpp @@ -191,18 +191,7 @@ bool QueryPatternCache::analyzeView(ViewPtr view) { // TODO Do we want to report the reason for non-optimizable // queries? - const auto& graphPatterns = parsed._rootGraphPattern._graphPatterns; - BasicGraphPatternsInvariantTo invariantCheck{ - getVariablesPresentInBasicGraphPatterns(graphPatterns)}; - // Filter out graph patterns that do not change the result of the basic graph - // pattern analyzed. - // TODO Deduplication necessary when reading, the variables should - // not be in the first three - auto graphPatternsFiltered = - ::ranges::to(parsed._rootGraphPattern._graphPatterns | - ql::views::filter([&](const auto& pattern) { - return !std::visit(invariantCheck, pattern); - })); + auto graphPatternsFiltered = graphPatternInvariantCheck(parsed); if (graphPatternsFiltered.size() != 1) { return false; } @@ -244,4 +233,20 @@ bool QueryPatternCache::analyzeView(ViewPtr view) { return patternFound; } +// _____________________________________________________________________________ +std::vector +QueryPatternCache::graphPatternInvariantCheck(const ParsedQuery& parsed) { + BasicGraphPatternsInvariantTo invariantCheck{ + getVariablesPresentInBasicGraphPatterns( + parsed._rootGraphPattern._graphPatterns)}; + + // Filter out graph patterns that do not change the result of the basic graph + // pattern analyzed. + return ::ranges::to(parsed._rootGraphPattern._graphPatterns | + ql::views::filter([&](const auto& pattern) { + return !std::visit(invariantCheck, + pattern); + })); +} + } // namespace materializedViewsQueryAnalysis diff --git a/src/engine/MaterializedViewsQueryAnalysis.h b/src/engine/MaterializedViewsQueryAnalysis.h index 4a31438c19..70136a917a 100644 --- a/src/engine/MaterializedViewsQueryAnalysis.h +++ b/src/engine/MaterializedViewsQueryAnalysis.h @@ -83,6 +83,12 @@ class QueryPatternCache { // with `a` and `b` switched if it returns `false`. bool analyzeSimpleChain(ViewPtr view, const SparqlTriple& a, const SparqlTriple& b); + + // Helper that filters the graph patterns of a parsed query using + // `BasicGraphPatternInvariantTo`. For details, see the documentation for this + // helper. + static std::vector + graphPatternInvariantCheck(const ParsedQuery& parsed); }; } // namespace materializedViewsQueryAnalysis From 34194477abdc095c20d9e4e2b119054e0abbc1cc Mon Sep 17 00:00:00 2001 From: ullingerc Date: Fri, 30 Jan 2026 11:42:42 +0100 Subject: [PATCH 42/64] explain ignore --- src/engine/MaterializedViewsQueryAnalysis.cpp | 20 +++++++++++++++++-- src/engine/MaterializedViewsQueryAnalysis.h | 2 +- 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/src/engine/MaterializedViewsQueryAnalysis.cpp b/src/engine/MaterializedViewsQueryAnalysis.cpp index aefb55c827..a0861fcf1e 100644 --- a/src/engine/MaterializedViewsQueryAnalysis.cpp +++ b/src/engine/MaterializedViewsQueryAnalysis.cpp @@ -182,6 +182,13 @@ bool QueryPatternCache::analyzeView(ViewPtr view) { return false; } + auto explainIgnore = [&](const std::string& reason) { + AD_LOG_INFO << "Materialized view '" << view->name() + << "' will not be added to the query pattern cache for query " + "rewriting. Reason: " + << reason << "." << std::endl; + }; + // We do not need the `EncodedIriManager` because we are only interested in // analyzing the query structure, not in converting its components to // `ValueId`s. @@ -191,17 +198,22 @@ bool QueryPatternCache::analyzeView(ViewPtr view) { // TODO Do we want to report the reason for non-optimizable // queries? - auto graphPatternsFiltered = graphPatternInvariantCheck(parsed); + auto graphPatternsFiltered = graphPatternInvariantFilter(parsed); if (graphPatternsFiltered.size() != 1) { + explainIgnore( + "The view has more than one graph pattern (even after skipping ignored " + "patterns)"); return false; } const auto& graphPattern = graphPatternsFiltered.at(0); if (!std::holds_alternative(graphPattern)) { + explainIgnore("The graph pattern is not a basic set of triples"); return false; } // TODO Property path is stored as a single predicate here. const auto& triples = graphPattern.getBasic()._triples; if (triples.size() == 0) { + explainIgnore("The query body is empty"); return false; } bool patternFound = false; @@ -230,12 +242,16 @@ bool QueryPatternCache::analyzeView(ViewPtr view) { } } + if (!patternFound) { + explainIgnore("No supported query pattern for rewriting joins was found"); + } + return patternFound; } // _____________________________________________________________________________ std::vector -QueryPatternCache::graphPatternInvariantCheck(const ParsedQuery& parsed) { +QueryPatternCache::graphPatternInvariantFilter(const ParsedQuery& parsed) { BasicGraphPatternsInvariantTo invariantCheck{ getVariablesPresentInBasicGraphPatterns( parsed._rootGraphPattern._graphPatterns)}; diff --git a/src/engine/MaterializedViewsQueryAnalysis.h b/src/engine/MaterializedViewsQueryAnalysis.h index 70136a917a..b8657eb202 100644 --- a/src/engine/MaterializedViewsQueryAnalysis.h +++ b/src/engine/MaterializedViewsQueryAnalysis.h @@ -88,7 +88,7 @@ class QueryPatternCache { // `BasicGraphPatternInvariantTo`. For details, see the documentation for this // helper. static std::vector - graphPatternInvariantCheck(const ParsedQuery& parsed); + graphPatternInvariantFilter(const ParsedQuery& parsed); }; } // namespace materializedViewsQueryAnalysis From c914c1cbc07ee07a8a020ad532d0855afc213efe Mon Sep 17 00:00:00 2001 From: ullingerc Date: Fri, 30 Jan 2026 11:44:55 +0100 Subject: [PATCH 43/64] fix ambigious symbol --- src/engine/MaterializedViewsQueryAnalysis.cpp | 3 --- src/parser/GraphPatternAnalysis.h | 10 +++++----- 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/src/engine/MaterializedViewsQueryAnalysis.cpp b/src/engine/MaterializedViewsQueryAnalysis.cpp index a0861fcf1e..68788c13fe 100644 --- a/src/engine/MaterializedViewsQueryAnalysis.cpp +++ b/src/engine/MaterializedViewsQueryAnalysis.cpp @@ -195,9 +195,6 @@ bool QueryPatternCache::analyzeView(ViewPtr view) { EncodedIriManager e; auto parsed = SparqlParser::parseQuery(&e, query.value(), {}); - // TODO Do we want to report the reason for non-optimizable - // queries? - auto graphPatternsFiltered = graphPatternInvariantFilter(parsed); if (graphPatternsFiltered.size() != 1) { explainIgnore( diff --git a/src/parser/GraphPatternAnalysis.h b/src/parser/GraphPatternAnalysis.h index 250c881c40..4d2844e025 100644 --- a/src/parser/GraphPatternAnalysis.h +++ b/src/parser/GraphPatternAnalysis.h @@ -31,11 +31,11 @@ struct BasicGraphPatternsInvariantTo { bool operator()(const T&) const { using namespace parsedQuery; // The presence of any of these operations might remove or duplicate rows. - static_assert( - ad_utility::SimilarToAny< - T, Optional, Union, Subquery, TransPath, BasicGraphPattern, Service, - PathQuery, SpatialQuery, TextSearchQuery, Minus, GroupGraphPattern, - Describe, Load, NamedCachedResult, MaterializedViewQuery>); + static_assert(ad_utility::SimilarToAny< + T, Optional, Union, Subquery, TransPath, BasicGraphPattern, + Service, PathQuery, SpatialQuery, TextSearchQuery, Minus, + GroupGraphPattern, parsedQuery::Describe, parsedQuery::Load, + NamedCachedResult, MaterializedViewQuery>); return false; } }; From 454a3798bbf9ef0377b811259d0a29c398a94684 Mon Sep 17 00:00:00 2001 From: ullingerc Date: Fri, 30 Jan 2026 15:06:19 +0100 Subject: [PATCH 44/64] fix --- src/parser/GraphPatternAnalysis.h | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/parser/GraphPatternAnalysis.h b/src/parser/GraphPatternAnalysis.h index 4d2844e025..13ed8727f8 100644 --- a/src/parser/GraphPatternAnalysis.h +++ b/src/parser/GraphPatternAnalysis.h @@ -29,13 +29,16 @@ struct BasicGraphPatternsInvariantTo { template bool operator()(const T&) const { - using namespace parsedQuery; // The presence of any of these operations might remove or duplicate rows. static_assert(ad_utility::SimilarToAny< - T, Optional, Union, Subquery, TransPath, BasicGraphPattern, - Service, PathQuery, SpatialQuery, TextSearchQuery, Minus, - GroupGraphPattern, parsedQuery::Describe, parsedQuery::Load, - NamedCachedResult, MaterializedViewQuery>); + T, parsedQuery::Optional, parsedQuery::Union, + parsedQuery::Subquery, parsedQuery::TransPath, + parsedQuery::BasicGraphPattern, parsedQuery::Service, + parsedQuery::PathQuery, parsedQuery::SpatialQuery, + parsedQuery::TextSearchQuery, parsedQuery::Minus, + parsedQuery::GroupGraphPattern, parsedQuery::Describe, + parsedQuery::Load, parsedQuery::NamedCachedResult, + parsedQuery::MaterializedViewQuery>); return false; } }; From f9eeeaa8b252849e44f70a8ff197706402e522e1 Mon Sep 17 00:00:00 2001 From: ullingerc Date: Fri, 30 Jan 2026 15:51:36 +0100 Subject: [PATCH 45/64] merge fix test --- test/MaterializedViewsTest.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/test/MaterializedViewsTest.cpp b/test/MaterializedViewsTest.cpp index 3f99420843..bac98e59b1 100644 --- a/test/MaterializedViewsTest.cpp +++ b/test/MaterializedViewsTest.cpp @@ -818,8 +818,9 @@ TEST(MaterializedViewsQueryRewriteTest, simpleChain) { h::IndexScanFromStrings("?m", "", "?o"))); // Write a chain structure to the materialized view. - MaterializedViewWriter::writeViewToDisk(onDiskBase, viewName, - qlv.parseAndPlanQuery(simpleChain)); + MaterializedViewsManager manager{onDiskBase}; + manager.writeViewToDisk(onDiskBase, viewName, + qlv.parseAndPlanQuery(simpleChain)); qlv.loadMaterializedView(viewName); // With the materialized view loaded, an index scan on the view is performed From ea3b1b00b5d026108f7139f6ac56e3590f1519f4 Mon Sep 17 00:00:00 2001 From: ullingerc Date: Fri, 30 Jan 2026 16:00:55 +0100 Subject: [PATCH 46/64] next try --- src/engine/MaterializedViews.cpp | 1 + test/MaterializedViewsTest.cpp | 18 ++++++++++++++++-- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/src/engine/MaterializedViews.cpp b/src/engine/MaterializedViews.cpp index d2d67a6b5a..3ef3ff416e 100644 --- a/src/engine/MaterializedViews.cpp +++ b/src/engine/MaterializedViews.cpp @@ -399,6 +399,7 @@ void MaterializedViewsManager::unloadViewIfLoaded( const std::string& name) const { // `HashMap::erase` is a no-op for nonexisting keys. loadedViews_.wlock()->erase(name); + // TODO Query pattern cache unload. } // _____________________________________________________________________________ diff --git a/test/MaterializedViewsTest.cpp b/test/MaterializedViewsTest.cpp index bac98e59b1..c023d2af38 100644 --- a/test/MaterializedViewsTest.cpp +++ b/test/MaterializedViewsTest.cpp @@ -819,8 +819,7 @@ TEST(MaterializedViewsQueryRewriteTest, simpleChain) { // Write a chain structure to the materialized view. MaterializedViewsManager manager{onDiskBase}; - manager.writeViewToDisk(onDiskBase, viewName, - qlv.parseAndPlanQuery(simpleChain)); + manager.writeViewToDisk(viewName, qlv.parseAndPlanQuery(simpleChain)); qlv.loadMaterializedView(viewName); // With the materialized view loaded, an index scan on the view is performed @@ -841,4 +840,19 @@ TEST(MaterializedViewsQueryRewriteTest, simpleChain) { *qet, h::IndexScanFromStrings("", "?_QLever_internal_variable_qp_0", "?c", {Permutation::Enum::SPO})); } + + // An additional `BIND` is ignored and the view can still be used for query + // rewriting. Also uses a different sorting. + qlever::Qlever qlv2{config}; + const std::string simpleChainRenamedPlusBind = + "SELECT ?b ?c ?a ?x { ?b ?c . ?a ?b . BIND(5 AS ?x) }"; + const std::string viewNameBind = "testViewChainPlusBind"; + manager.writeViewToDisk(viewNameBind, + qlv2.parseAndPlanQuery(simpleChainRenamedPlusBind)); + qlv2.loadMaterializedView(viewNameBind); + { + auto [qet, qec, parsed] = qlv2.parseAndPlanQuery(simpleChain); + EXPECT_THAT(*qet, h::IndexScanFromStrings("?m", "?o", "?s", + {Permutation::Enum::SPO})); + } } From 6319d50d776aa88a07e8d9940826c8987c04f168 Mon Sep 17 00:00:00 2001 From: ullingerc Date: Fri, 30 Jan 2026 16:30:02 +0100 Subject: [PATCH 47/64] extend tests to greedy --- test/MaterializedViewsTest.cpp | 96 +++++++++++++++++------------ test/MaterializedViewsTestHelpers.h | 23 +++++++ 2 files changed, 81 insertions(+), 38 deletions(-) diff --git a/test/MaterializedViewsTest.cpp b/test/MaterializedViewsTest.cpp index c023d2af38..2d08355d32 100644 --- a/test/MaterializedViewsTest.cpp +++ b/test/MaterializedViewsTest.cpp @@ -11,6 +11,7 @@ #include "./QueryPlannerTestHelpers.h" #include "./ServerTestHelpers.h" #include "./util/HttpRequestHelpers.h" +#include "./util/RuntimeParametersTestHelpers.h" #include "engine/IndexScan.h" #include "engine/MaterializedViews.h" #include "engine/QueryExecutionContext.h" @@ -788,20 +789,34 @@ TEST_F(MaterializedViewsTestLarge, LazyScan) { } } +// Example queries for testing query rewriting. +constexpr std::string_view simpleChain = "SELECT * { ?s ?m . ?m ?o }"; +constexpr std::string_view simpleChainRenamed = + "SELECT * { ?b ?c . ?a ?b }"; +constexpr std::string_view simpleChainFixed = + "SELECT * { / ?c . }"; +constexpr std::string_view simpleChainPlusJoin = + "SELECT * { ?s / ?o . ?s ?o2 }"; +constexpr std::string_view simpleChainRenamedPlusBind = + "SELECT ?a ?b ?c ?x { ?b ?c . ?a ?b . BIND(5 AS ?x) }"; + // _____________________________________________________________________________ -TEST(MaterializedViewsQueryRewriteTest, simpleChain) { +TEST_P(MaterializedViewsQueryRewriteTest, simpleChain) { namespace h = queryPlannerTestHelpers; + RewriteTestParams p = GetParam(); + auto cleanup = + setRuntimeParameterForTest<&RuntimeParameters::queryPlanningBudget_>( + p.queryPlanningBudget_); + // Test dataset and query. - const std::string simpleChain = "SELECT * { ?s ?m . ?m ?o }"; - const std::string simpleChainRenamed = "SELECT * { ?b ?c . ?a ?b }"; - const std::string simpleChainFixed = "SELECT * { / ?c . }"; const std::string chainTtl = " . \n" " . \n" " . \n" " . \n" - " \"abc\" ."; + " \"abc\" . \n" + " . \n"; const std::string onDiskBase = "_materializedViewRewriteChain"; const std::string viewName = "testViewChain"; @@ -814,45 +829,50 @@ TEST(MaterializedViewsQueryRewriteTest, simpleChain) { qlever::Qlever qlv{config}; // Without the materialized view, a regular join is executed. - h::expect(simpleChain, h::Join(h::IndexScanFromStrings("?s", "", "?m"), - h::IndexScanFromStrings("?m", "", "?o"))); + h::expect(std::string{simpleChain}, + h::Join(h::IndexScanFromStrings("?s", "", "?m"), + h::IndexScanFromStrings("?m", "", "?o"))); // Write a chain structure to the materialized view. MaterializedViewsManager manager{onDiskBase}; - manager.writeViewToDisk(viewName, qlv.parseAndPlanQuery(simpleChain)); + manager.writeViewToDisk(viewName, qlv.parseAndPlanQuery(p.writeQuery_)); qlv.loadMaterializedView(viewName); // With the materialized view loaded, an index scan on the view is performed // instead of a regular join. - { - auto [qet, qec, parsed] = qlv.parseAndPlanQuery(simpleChain); - EXPECT_THAT(*qet, h::IndexScanFromStrings("?s", "?m", "?o", - {Permutation::Enum::SPO})); - } - { - auto [qet, qec, parsed] = qlv.parseAndPlanQuery(simpleChainRenamed); - EXPECT_THAT(*qet, h::IndexScanFromStrings("?a", "?b", "?c", - {Permutation::Enum::SPO})); - } - { - auto [qet, qec, parsed] = qlv.parseAndPlanQuery(simpleChainFixed); - EXPECT_THAT( - *qet, h::IndexScanFromStrings("", "?_QLever_internal_variable_qp_0", - "?c", {Permutation::Enum::SPO})); - } + auto qpExpect = [](qlever::Qlever& qlv, const auto& query, + ::testing::Matcher matcher, + source_location sourceLocation = AD_CURRENT_SOURCE_LOC()) { + auto l = generateLocationTrace(sourceLocation); + auto [qet, qec, parsed] = qlv.parseAndPlanQuery(std::string{query}); + EXPECT_THAT(*qet, matcher); + }; - // An additional `BIND` is ignored and the view can still be used for query - // rewriting. Also uses a different sorting. - qlever::Qlever qlv2{config}; - const std::string simpleChainRenamedPlusBind = - "SELECT ?b ?c ?a ?x { ?b ?c . ?a ?b . BIND(5 AS ?x) }"; - const std::string viewNameBind = "testViewChainPlusBind"; - manager.writeViewToDisk(viewNameBind, - qlv2.parseAndPlanQuery(simpleChainRenamedPlusBind)); - qlv2.loadMaterializedView(viewNameBind); - { - auto [qet, qec, parsed] = qlv2.parseAndPlanQuery(simpleChain); - EXPECT_THAT(*qet, h::IndexScanFromStrings("?m", "?o", "?s", - {Permutation::Enum::SPO})); - } + qpExpect(qlv, std::string{simpleChain}, + h::IndexScanFromStrings("?s", "?m", "?o", {Permutation::Enum::SPO})); + qpExpect(qlv, simpleChainRenamed, + h::IndexScanFromStrings("?a", "?b", "?c", {Permutation::Enum::SPO})); + qpExpect(qlv, simpleChainFixed, + h::IndexScanFromStrings("", "?_QLever_internal_variable_qp_0", + "?c", {Permutation::Enum::SPO})); + qpExpect( + qlv, simpleChainPlusJoin, + h::Join(h::IndexScanFromStrings("?s", "?_QLever_internal_variable_qp_0", + "?o", {Permutation::Enum::SPO}), + h::IndexScanFromStrings("?s", "", "?o2"))); } + +// _____________________________________________________________________________ +INSTANTIATE_TEST_SUITE_P( + MaterializedViewsTest, MaterializedViewsQueryRewriteTest, + ::testing::Values( + // Default case. + RewriteTestParams{std::string{simpleChain}, 1500}, + + // Default query for writing the materialized view, but forced greedy + // planning. + RewriteTestParams{std::string{simpleChain}, 1}, + + // An additional `BIND` is ignored and the view can still be used for + // query rewriting. Also uses a different sorting. + RewriteTestParams{std::string{simpleChainRenamedPlusBind}, 1500})); diff --git a/test/MaterializedViewsTestHelpers.h b/test/MaterializedViewsTestHelpers.h index 2829fe5490..5b1d98438a 100644 --- a/test/MaterializedViewsTestHelpers.h +++ b/test/MaterializedViewsTestHelpers.h @@ -143,6 +143,29 @@ class MaterializedViewsTestLarge : public MaterializedViewsTest { } }; +// _____________________________________________________________________________ +struct RewriteTestParams { + // Query to write the test view. + std::string writeQuery_; + + // Enforce a query planning budget to allow testing the greedy query planner + // with toy examples. + size_t queryPlanningBudget_; +}; + +// _____________________________________________________________________________ +class MaterializedViewsQueryRewriteTest + : public ::testing::TestWithParam { + protected: + std::stringstream log_; + + // ___________________________________________________________________________ + void SetUp() override { ad_utility::setGlobalLoggingStream(&log_); } + + // ___________________________________________________________________________ + void TearDown() override { ad_utility::setGlobalLoggingStream(&std::cout); } +}; + } // namespace materializedViewsTestHelpers #endif // QLEVER_TEST_MATERIALIZEDVIEWSTESTHELPERS_H_ From fae24b5053ce3186f2b6d620f409e9731dd657df Mon Sep 17 00:00:00 2001 From: ullingerc Date: Fri, 30 Jan 2026 16:34:02 +0100 Subject: [PATCH 48/64] todo --- test/MaterializedViewsTest.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/test/MaterializedViewsTest.cpp b/test/MaterializedViewsTest.cpp index 2d08355d32..52de507e41 100644 --- a/test/MaterializedViewsTest.cpp +++ b/test/MaterializedViewsTest.cpp @@ -848,7 +848,7 @@ TEST_P(MaterializedViewsQueryRewriteTest, simpleChain) { EXPECT_THAT(*qet, matcher); }; - qpExpect(qlv, std::string{simpleChain}, + qpExpect(qlv, simpleChain, h::IndexScanFromStrings("?s", "?m", "?o", {Permutation::Enum::SPO})); qpExpect(qlv, simpleChainRenamed, h::IndexScanFromStrings("?a", "?b", "?c", {Permutation::Enum::SPO})); @@ -860,6 +860,8 @@ TEST_P(MaterializedViewsQueryRewriteTest, simpleChain) { h::Join(h::IndexScanFromStrings("?s", "?_QLever_internal_variable_qp_0", "?o", {Permutation::Enum::SPO}), h::IndexScanFromStrings("?s", "", "?o2"))); + + // TODO Test overlapping view plans. } // _____________________________________________________________________________ From 4a7416560d7d9978ab9ce7be9df6d5b6dd841b89 Mon Sep 17 00:00:00 2001 From: ullingerc Date: Fri, 30 Jan 2026 16:36:57 +0100 Subject: [PATCH 49/64] improve test --- test/MaterializedViewsTest.cpp | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/test/MaterializedViewsTest.cpp b/test/MaterializedViewsTest.cpp index 52de507e41..46182e1a31 100644 --- a/test/MaterializedViewsTest.cpp +++ b/test/MaterializedViewsTest.cpp @@ -847,19 +847,18 @@ TEST_P(MaterializedViewsQueryRewriteTest, simpleChain) { auto [qet, qec, parsed] = qlv.parseAndPlanQuery(std::string{query}); EXPECT_THAT(*qet, matcher); }; + auto viewScan = [](std::string a, std::string b, std::string c) { + return h::IndexScanFromStrings(std::move(a), std::move(b), std::move(c), + {Permutation::Enum::SPO}); + }; - qpExpect(qlv, simpleChain, - h::IndexScanFromStrings("?s", "?m", "?o", {Permutation::Enum::SPO})); - qpExpect(qlv, simpleChainRenamed, - h::IndexScanFromStrings("?a", "?b", "?c", {Permutation::Enum::SPO})); + qpExpect(qlv, simpleChain, viewScan("?s", "?m", "?o")); + qpExpect(qlv, simpleChainRenamed, viewScan("?a", "?b", "?c")); qpExpect(qlv, simpleChainFixed, - h::IndexScanFromStrings("", "?_QLever_internal_variable_qp_0", - "?c", {Permutation::Enum::SPO})); - qpExpect( - qlv, simpleChainPlusJoin, - h::Join(h::IndexScanFromStrings("?s", "?_QLever_internal_variable_qp_0", - "?o", {Permutation::Enum::SPO}), - h::IndexScanFromStrings("?s", "", "?o2"))); + viewScan("", "?_QLever_internal_variable_qp_0", "?c")); + qpExpect(qlv, simpleChainPlusJoin, + h::Join(viewScan("?s", "?_QLever_internal_variable_qp_0", "?o"), + h::IndexScanFromStrings("?s", "", "?o2"))); // TODO Test overlapping view plans. } From 98eab72504e5834baa25e62f47c1097a1509b742 Mon Sep 17 00:00:00 2001 From: ullingerc Date: Mon, 2 Feb 2026 13:50:48 +0100 Subject: [PATCH 50/64] checkout util changes from other branch --- src/parser/CMakeLists.txt | 1 + src/parser/GraphPatternAnalysis.cpp | 29 ++++++++++++ src/parser/GraphPatternAnalysis.h | 48 ++++++++++++++++++++ src/parser/GraphPatternOperation.cpp | 30 ++++++++++++ src/parser/GraphPatternOperation.h | 12 +++++ src/parser/MaterializedViewQuery.cpp | 6 +++ src/parser/MaterializedViewQuery.h | 6 ++- src/parser/PropertyPath.cpp | 12 +++++ src/parser/PropertyPath.h | 7 +++ src/util/StringPairHashMap.h | 68 ++++++++++++++++++++++++++++ test/parser/PropertyPathTest.cpp | 30 ++++++++++++ 11 files changed, 248 insertions(+), 1 deletion(-) create mode 100644 src/parser/GraphPatternAnalysis.cpp create mode 100644 src/parser/GraphPatternAnalysis.h create mode 100644 src/util/StringPairHashMap.h diff --git a/src/parser/CMakeLists.txt b/src/parser/CMakeLists.txt index 8c76f41716..1629ea784d 100644 --- a/src/parser/CMakeLists.txt +++ b/src/parser/CMakeLists.txt @@ -30,5 +30,6 @@ add_library(parser Quads.cpp UpdateTriples.cpp MaterializedViewQuery.cpp + GraphPatternAnalysis.cpp ) qlever_target_link_libraries(parser sparqlParser parserData sparqlExpressions rdfEscaping global re2::re2 util engine index rdfTypes) diff --git a/src/parser/GraphPatternAnalysis.cpp b/src/parser/GraphPatternAnalysis.cpp new file mode 100644 index 0000000000..f2454ceeea --- /dev/null +++ b/src/parser/GraphPatternAnalysis.cpp @@ -0,0 +1,29 @@ +// Copyright 2026 The QLever Authors, in particular: +// +// 2026 Christoph Ullinger , UFR +// +// UFR = University of Freiburg, Chair of Algorithms and Data Structures + +#include "parser/GraphPatternAnalysis.h" + +namespace graphPatternAnalysis { + +// _____________________________________________________________________________ +bool BasicGraphPatternsInvariantTo::operator()( + const parsedQuery::Bind& bind) const { + return !variables_.contains(bind._target); +} + +// _____________________________________________________________________________ +bool BasicGraphPatternsInvariantTo::operator()( + const parsedQuery::Values& values) const { + return + // The `VALUES` doesn't bind to any of the `variables_`. + !std::ranges::any_of( + values._inlineValues._variables, + [this](const auto& var) { return variables_.contains(var); }) && + // There is exactly one row inside the `VALUES`. + values._inlineValues._values.size() == 1; +} + +} // namespace graphPatternAnalysis diff --git a/src/parser/GraphPatternAnalysis.h b/src/parser/GraphPatternAnalysis.h new file mode 100644 index 0000000000..13ed8727f8 --- /dev/null +++ b/src/parser/GraphPatternAnalysis.h @@ -0,0 +1,48 @@ +// Copyright 2026 The QLever Authors, in particular: +// +// 2026 Christoph Ullinger , UFR +// +// UFR = University of Freiburg, Chair of Algorithms and Data Structures + +#ifndef QLEVER_SRC_PARSER_GRAPHPATTERNANALYSIS_H_ +#define QLEVER_SRC_PARSER_GRAPHPATTERNANALYSIS_H_ + +#include "parser/GraphPatternOperation.h" + +// This module contains helpers for analyzing the structure of graph patterns. + +// _____________________________________________________________________________ +namespace graphPatternAnalysis { + +// Check whether certain graph patterns can be ignored when we are only +// interested in the bindings for variables from `variables_` as they do not +// affect the result of a query that only selects `variables_`. This is +// currently used for the `MaterializedViewsManager`'s `QueryPatternCache`. +// +// NOTE: This does not guarantee completeness, so it might return `false` even +// though we could be invariant to a `GraphPatternOperation`. +struct BasicGraphPatternsInvariantTo { + ad_utility::HashSet variables_; + + bool operator()(const parsedQuery::Bind& bind) const; + bool operator()(const parsedQuery::Values& values) const; + + template + bool operator()(const T&) const { + // The presence of any of these operations might remove or duplicate rows. + static_assert(ad_utility::SimilarToAny< + T, parsedQuery::Optional, parsedQuery::Union, + parsedQuery::Subquery, parsedQuery::TransPath, + parsedQuery::BasicGraphPattern, parsedQuery::Service, + parsedQuery::PathQuery, parsedQuery::SpatialQuery, + parsedQuery::TextSearchQuery, parsedQuery::Minus, + parsedQuery::GroupGraphPattern, parsedQuery::Describe, + parsedQuery::Load, parsedQuery::NamedCachedResult, + parsedQuery::MaterializedViewQuery>); + return false; + } +}; + +} // namespace graphPatternAnalysis + +#endif // QLEVER_SRC_PARSER_GRAPHPATTERNANALYSIS_H_ diff --git a/src/parser/GraphPatternOperation.cpp b/src/parser/GraphPatternOperation.cpp index b92e2eb46d..0abc32cdd7 100644 --- a/src/parser/GraphPatternOperation.cpp +++ b/src/parser/GraphPatternOperation.cpp @@ -81,4 +81,34 @@ void BasicGraphPattern::appendTriples(BasicGraphPattern other) { auto inner = _expression.getDescriptor(); return "BIND (" + inner + " AS " + _target.name() + ")"; } + +// ____________________________________________________________________________ +void BasicGraphPattern::collectAllContainedVariables( + ad_utility::HashSet& vars) const { + for (const SparqlTriple& t : _triples) { + if (t.s_.isVariable()) { + vars.insert(t.s_.getVariable()); + } + if (auto predicate = t.getPredicateVariable()) { + vars.insert(predicate.value()); + } + if (t.o_.isVariable()) { + vars.insert(t.o_.getVariable()); + } + } +} + +// _____________________________________________________________________________ +ad_utility::HashSet getVariablesPresentInBasicGraphPatterns( + const std::vector& graphPatterns) { + ad_utility::HashSet vars; + for (const auto& graphPattern : graphPatterns) { + if (!std::holds_alternative(graphPattern)) { + continue; + } + graphPattern.getBasic().collectAllContainedVariables(vars); + } + return vars; +} + } // namespace parsedQuery diff --git a/src/parser/GraphPatternOperation.h b/src/parser/GraphPatternOperation.h index 4f16be3f9a..041e357616 100644 --- a/src/parser/GraphPatternOperation.h +++ b/src/parser/GraphPatternOperation.h @@ -80,8 +80,20 @@ struct BasicGraphPattern { std::vector _triples; /// Append the triples from `other` to this `BasicGraphPattern` void appendTriples(BasicGraphPattern other); + + // Collect all the `Variable`s present in this `BasicGraphPattern` and add + // them to a `HashSet`. + void collectAllContainedVariables(ad_utility::HashSet& vars) const; }; +// Extract all variables present in a set of `BasicGraphPatterns` contained in +// `GraphPatternOperation`s. +// +// IMPORTANT: This function does not consider variables that are contained in +// other types of `GraphPatternOperation`s. +ad_utility::HashSet getVariablesPresentInBasicGraphPatterns( + const std::vector& graphPatterns); + /// A `Values` clause struct Values { SparqlValues _inlineValues; diff --git a/src/parser/MaterializedViewQuery.cpp b/src/parser/MaterializedViewQuery.cpp index 7ad56493e9..2ee31d2a4d 100644 --- a/src/parser/MaterializedViewQuery.cpp +++ b/src/parser/MaterializedViewQuery.cpp @@ -92,6 +92,12 @@ MaterializedViewQuery::MaterializedViewQuery(const SparqlTriple& triple) { addRequestedColumn(requestedColumn, simpleTriple.o_); } +// _____________________________________________________________________________ +MaterializedViewQuery::MaterializedViewQuery(std::string name, + RequestedColumns requestedColumns) + : viewName_{std::move(name)}, + requestedColumns_{std::move(requestedColumns)} {}; + // _____________________________________________________________________________ ad_utility::HashSet MaterializedViewQuery::getVarsToKeep() const { ad_utility::HashSet varsToKeep; diff --git a/src/parser/MaterializedViewQuery.h b/src/parser/MaterializedViewQuery.h index 4a44b4f9fc..886e239943 100644 --- a/src/parser/MaterializedViewQuery.h +++ b/src/parser/MaterializedViewQuery.h @@ -47,7 +47,8 @@ struct MaterializedViewQuery : MagicServiceQuery { // column names in the query result or literals/IRIs to restrict the column // on. This can be used for filtering the results and reading any number of // payload columns from the materialized view. - ad_utility::HashMap requestedColumns_; + using RequestedColumns = ad_utility::HashMap; + RequestedColumns requestedColumns_; // This constructor takes an IRI consisting of the magic service IRI for // materialized views with the view name as a suffix. If this is used, add the @@ -58,6 +59,9 @@ struct MaterializedViewQuery : MagicServiceQuery { // are necessary in this case. explicit MaterializedViewQuery(const SparqlTriple& triple); + // For query rewriting: Initialize directly using name and requested columns. + MaterializedViewQuery(std::string name, RequestedColumns requestedColumns); + void addParameter(const SparqlTriple& triple) override; // Return the variables that should be visible from this read on the diff --git a/src/parser/PropertyPath.cpp b/src/parser/PropertyPath.cpp index cccfad61fe..b405a70ccc 100644 --- a/src/parser/PropertyPath.cpp +++ b/src/parser/PropertyPath.cpp @@ -130,6 +130,18 @@ bool PropertyPath::isIri() const { return std::holds_alternative(path_); } +// _____________________________________________________________________________ +const std::vector& PropertyPath::getSequence() const { + AD_CONTRACT_CHECK(isSequence()); + return std::get(path_).children_; +} + +// _____________________________________________________________________________ +bool PropertyPath::isSequence() const { + return std::holds_alternative(path_) && + std::get(path_).modifier_ == Modifier::SEQUENCE; +} + // _____________________________________________________________________________ std::optional> PropertyPath::getChildOfInvertedPath() const { diff --git a/src/parser/PropertyPath.h b/src/parser/PropertyPath.h index 08fa199863..0daab58efe 100644 --- a/src/parser/PropertyPath.h +++ b/src/parser/PropertyPath.h @@ -134,6 +134,13 @@ class PropertyPath { // otherwise. bool isIri() const; + // If the path is a sequence, return the children (that is, the parts of the + // sequence). If the path is not a sequence this will throw. + const std::vector& getSequence() const; + + // Check if the path is a sequence. + bool isSequence() const; + // If the path is a modified path with an inverse modifier, return the pointer // to its only child. Otherwise, return nullptr. std::optional> diff --git a/src/util/StringPairHashMap.h b/src/util/StringPairHashMap.h new file mode 100644 index 0000000000..0b8ff98965 --- /dev/null +++ b/src/util/StringPairHashMap.h @@ -0,0 +1,68 @@ +// Copyright 2026 The QLever Authors, in particular: +// +// 2026 Christoph Ullinger , UFR +// +// UFR = University of Freiburg, Chair of Algorithms and Data Structures + +#ifndef QLEVER_SRC_UTIL_STRINGPAIRHASHMAP_H_ +#define QLEVER_SRC_UTIL_STRINGPAIRHASHMAP_H_ + +#include "util/HashMap.h" + +// This module provides a modified version of `ad_utility::HashMap` that uses +// pairs of strings as keys. Unlike the default hash map it allows looking up +// values with pairs of string views as keys. This is implemented using custom +// hash and equality operators. + +// _____________________________________________________________________________ +namespace ad_utility { + +// _____________________________________________________________________________ +namespace detail { + +using StringPair = std::pair; +using StringViewPair = std::pair; + +// _____________________________________________________________________________ +struct StringPairHash { + // Allows looking up values from a hash map with `StringPair` keys also with + // `StringViewPair`. + using is_transparent = void; + + size_t operator()(const StringPair& p) const { + return absl::HashOf(p.first, p.second); + } + + size_t operator()(const StringViewPair& p) const { + return absl::HashOf(p.first, p.second); + } +}; + +// _____________________________________________________________________________ +struct StringPairEq { + using is_transparent = void; + + bool operator()(const StringPair& a, const StringPair& b) const { + return a == b; + } + + bool operator()(const StringPair& a, const StringViewPair& b) const { + return a.first == b.first && a.second == b.second; + } + + bool operator()(const StringViewPair& a, const StringPair& b) const { + return b.first == a.first && b.second == a.second; + } +}; + +} // namespace detail + +template +using StringPairHashMap = + ad_utility::HashMap; + +} // namespace ad_utility + +#endif // QLEVER_SRC_UTIL_STRINGPAIRHASHMAP_H_ diff --git a/test/parser/PropertyPathTest.cpp b/test/parser/PropertyPathTest.cpp index 0d99b1791b..de2cd749e1 100644 --- a/test/parser/PropertyPathTest.cpp +++ b/test/parser/PropertyPathTest.cpp @@ -288,3 +288,33 @@ TEST(PropertyPath, handlePath) { }), 2); } + +// _____________________________________________________________________________ +TEST(PropertyPath, Getters) { + auto path1 = PropertyPath::fromIri(iri1); + EXPECT_TRUE(path1.isIri()); + EXPECT_FALSE(path1.isSequence()); + EXPECT_EQ(path1.getIri(), iri1); + + auto path2 = PropertyPath::makeInverse(PropertyPath::fromIri(iri1)); + EXPECT_FALSE(path2.isIri()); + EXPECT_FALSE(path2.isSequence()); + + auto path3 = PropertyPath::makeAlternative( + {PropertyPath::fromIri(iri1), PropertyPath::fromIri(iri2)}); + EXPECT_FALSE(path3.isIri()); + EXPECT_FALSE(path3.isSequence()); + + auto path4 = PropertyPath::makeSequence( + {PropertyPath::fromIri(iri1), PropertyPath::fromIri(iri2)}); + EXPECT_FALSE(path4.isIri()); + EXPECT_TRUE(path4.isSequence()); + auto matchIri = [](ad_utility::triple_component::Iri iri) + -> ::testing::Matcher { + return ::testing::AllOf( + ::testing::Property(&PropertyPath::isIri, ::testing::IsTrue()), + ::testing::Property(&PropertyPath::getIri, ::testing::Eq(iri))); + }; + EXPECT_THAT(path4.getSequence(), + ::testing::ElementsAre(matchIri(iri1), matchIri(iri2))); +} From 04d014316c3ca503cfad336b8fedfefed2f4b666 Mon Sep 17 00:00:00 2001 From: ullingerc Date: Mon, 2 Feb 2026 14:22:42 +0100 Subject: [PATCH 51/64] apply Johannes' feedback --- src/engine/QueryPlanner.h | 11 ++--------- src/parser/GraphPatternAnalysis.cpp | 13 +++++++------ src/parser/GraphPatternAnalysis.h | 27 ++++++++++++++++----------- src/parser/GraphPatternOperation.cpp | 22 +++++++--------------- src/parser/GraphPatternOperation.h | 7 ++++--- src/parser/SparqlTriple.h | 13 +++++++++++++ src/util/TransparentFunctors.h | 9 +++++++++ 7 files changed, 58 insertions(+), 44 deletions(-) diff --git a/src/engine/QueryPlanner.h b/src/engine/QueryPlanner.h index 94c6ce9a30..88d2ca39af 100644 --- a/src/engine/QueryPlanner.h +++ b/src/engine/QueryPlanner.h @@ -65,15 +65,8 @@ class QueryPlanner { Node(size_t id, SparqlTriple t, std::optional graphVariable = std::nullopt) : id_(id), triple_(std::move(t)) { - if (triple_.s_.isVariable()) { - _variables.insert(triple_.s_.getVariable()); - } - if (auto predicate = triple_.getPredicateVariable()) { - _variables.insert(predicate.value()); - } - if (triple_.o_.isVariable()) { - _variables.insert(triple_.o_.getVariable()); - } + triple_.forEachVariable( + [this](const auto& var) { _variables.insert(var); }); if (graphVariable.has_value()) { _variables.insert(std::move(graphVariable).value()); } diff --git a/src/parser/GraphPatternAnalysis.cpp b/src/parser/GraphPatternAnalysis.cpp index f2454ceeea..8969420ae4 100644 --- a/src/parser/GraphPatternAnalysis.cpp +++ b/src/parser/GraphPatternAnalysis.cpp @@ -16,14 +16,15 @@ bool BasicGraphPatternsInvariantTo::operator()( // _____________________________________________________________________________ bool BasicGraphPatternsInvariantTo::operator()( - const parsedQuery::Values& values) const { + const parsedQuery::Values& valuesClause) const { + const auto& [variables, values] = valuesClause._inlineValues; return - // The `VALUES` doesn't bind to any of the `variables_`. - !std::ranges::any_of( - values._inlineValues._variables, - [this](const auto& var) { return variables_.contains(var); }) && // There is exactly one row inside the `VALUES`. - values._inlineValues._values.size() == 1; + values.size() == 1 && + // The `VALUES` doesn't bind to any of the `variables_`. + ql::ranges::none_of(variables, [this](const auto& var) { + return variables_.contains(var); + }); } } // namespace graphPatternAnalysis diff --git a/src/parser/GraphPatternAnalysis.h b/src/parser/GraphPatternAnalysis.h index 13ed8727f8..91f7c659c0 100644 --- a/src/parser/GraphPatternAnalysis.h +++ b/src/parser/GraphPatternAnalysis.h @@ -16,8 +16,15 @@ namespace graphPatternAnalysis { // Check whether certain graph patterns can be ignored when we are only // interested in the bindings for variables from `variables_` as they do not -// affect the result of a query that only selects `variables_`. This is -// currently used for the `MaterializedViewsManager`'s `QueryPatternCache`. +// affect the result for these `variables_`. +// +// For example: A basic graph pattern (a list of triples) is invariant to a +// `BIND` statement whose target variable is not contained in the basic graph +// pattern, because the `BIND` only adds its own column, but neither adds nor +// deletes result rows. +// +// This is currently used for the `MaterializedViewsManager`'s +// `QueryPatternCache`. // // NOTE: This does not guarantee completeness, so it might return `false` even // though we could be invariant to a `GraphPatternOperation`. @@ -30,15 +37,13 @@ struct BasicGraphPatternsInvariantTo { template bool operator()(const T&) const { // The presence of any of these operations might remove or duplicate rows. - static_assert(ad_utility::SimilarToAny< - T, parsedQuery::Optional, parsedQuery::Union, - parsedQuery::Subquery, parsedQuery::TransPath, - parsedQuery::BasicGraphPattern, parsedQuery::Service, - parsedQuery::PathQuery, parsedQuery::SpatialQuery, - parsedQuery::TextSearchQuery, parsedQuery::Minus, - parsedQuery::GroupGraphPattern, parsedQuery::Describe, - parsedQuery::Load, parsedQuery::NamedCachedResult, - parsedQuery::MaterializedViewQuery>); + namespace pq = parsedQuery; + static_assert( + ad_utility::SimilarToAny< + T, pq::Optional, pq::Union, pq::Subquery, pq::TransPath, + pq::BasicGraphPattern, pq::Service, pq::PathQuery, pq::SpatialQuery, + pq::TextSearchQuery, pq::Minus, pq::GroupGraphPattern, pq::Describe, + pq::Load, pq::NamedCachedResult, pq::MaterializedViewQuery>); return false; } }; diff --git a/src/parser/GraphPatternOperation.cpp b/src/parser/GraphPatternOperation.cpp index 0abc32cdd7..01c0754362 100644 --- a/src/parser/GraphPatternOperation.cpp +++ b/src/parser/GraphPatternOperation.cpp @@ -86,27 +86,19 @@ void BasicGraphPattern::appendTriples(BasicGraphPattern other) { void BasicGraphPattern::collectAllContainedVariables( ad_utility::HashSet& vars) const { for (const SparqlTriple& t : _triples) { - if (t.s_.isVariable()) { - vars.insert(t.s_.getVariable()); - } - if (auto predicate = t.getPredicateVariable()) { - vars.insert(predicate.value()); - } - if (t.o_.isVariable()) { - vars.insert(t.o_.getVariable()); - } + t.forEachVariable([&vars](const auto& var) { vars.insert(var); }); } } // _____________________________________________________________________________ -ad_utility::HashSet getVariablesPresentInBasicGraphPatterns( +ad_utility::HashSet getVariablesPresentInFirstBasicGraphPattern( const std::vector& graphPatterns) { ad_utility::HashSet vars; - for (const auto& graphPattern : graphPatterns) { - if (!std::holds_alternative(graphPattern)) { - continue; - } - graphPattern.getBasic().collectAllContainedVariables(vars); + auto basicGraphPatterns = + ad_utility::filterRangeOfVariantsByType( + graphPatterns); + if (!ql::ranges::empty(basicGraphPatterns)) { + (*basicGraphPatterns.begin()).collectAllContainedVariables(vars); } return vars; } diff --git a/src/parser/GraphPatternOperation.h b/src/parser/GraphPatternOperation.h index 041e357616..9e6687d58a 100644 --- a/src/parser/GraphPatternOperation.h +++ b/src/parser/GraphPatternOperation.h @@ -86,12 +86,13 @@ struct BasicGraphPattern { void collectAllContainedVariables(ad_utility::HashSet& vars) const; }; -// Extract all variables present in a set of `BasicGraphPatterns` contained in -// `GraphPatternOperation`s. +// Extract all variables present in a the first `BasicGraphPattern` contained in +// a vector of `GraphPatternOperation`s. It is used for skipping some graph +// patterns in `MaterializedViewQueryAnalysis.cpp`. // // IMPORTANT: This function does not consider variables that are contained in // other types of `GraphPatternOperation`s. -ad_utility::HashSet getVariablesPresentInBasicGraphPatterns( +ad_utility::HashSet getVariablesPresentInFirstBasicGraphPattern( const std::vector& graphPatterns); /// A `Values` clause diff --git a/src/parser/SparqlTriple.h b/src/parser/SparqlTriple.h index 2ba815c743..e0212a4a73 100644 --- a/src/parser/SparqlTriple.h +++ b/src/parser/SparqlTriple.h @@ -133,6 +133,19 @@ class SparqlTriple auto ptr = std::get_if(&p_); return (ptr != nullptr && *ptr == variable); } + + // Call a function for every variable contained in the triple. + void forEachVariable(auto function) const { + if (s_.isVariable()) { + function(s_.getVariable()); + } + if (auto predicate = getPredicateVariable()) { + function(predicate.value()); + } + if (o_.isVariable()) { + function(o_.getVariable()); + } + } }; #endif // QLEVER_SRC_PARSER_SPARQLTRIPLE_H diff --git a/src/util/TransparentFunctors.h b/src/util/TransparentFunctors.h index adc6e88673..94bac92192 100644 --- a/src/util/TransparentFunctors.h +++ b/src/util/TransparentFunctors.h @@ -119,6 +119,15 @@ static constexpr detail::HoldsAlternativeImpl holdsAlternative; template static constexpr detail::GetImpl get; +// Helper that filters a range, like `std::vector` which contains `std::variant` +// elements by a certain type `T` and returns a view of the contained values. +CPP_template(typename T, typename R)( + requires ql::ranges::range) auto filterRangeOfVariantsByType(const R& + range) { + return range | ql::views::filter(holdsAlternative) | + ql::views::transform(get); +} + // Transparent functor for `std::get_if`. As an extension to `std::get_if`, // `ad_utility::getIf` may also be called with a `variant` object or reference, // not only with a pointer. From 5b2593d023c7a8aa9e114ce4504d4f5a377f8570 Mon Sep 17 00:00:00 2001 From: ullingerc Date: Mon, 2 Feb 2026 14:23:58 +0100 Subject: [PATCH 52/64] merge after care --- src/engine/MaterializedViewsQueryAnalysis.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/engine/MaterializedViewsQueryAnalysis.cpp b/src/engine/MaterializedViewsQueryAnalysis.cpp index 68788c13fe..956366f870 100644 --- a/src/engine/MaterializedViewsQueryAnalysis.cpp +++ b/src/engine/MaterializedViewsQueryAnalysis.cpp @@ -250,7 +250,7 @@ bool QueryPatternCache::analyzeView(ViewPtr view) { std::vector QueryPatternCache::graphPatternInvariantFilter(const ParsedQuery& parsed) { BasicGraphPatternsInvariantTo invariantCheck{ - getVariablesPresentInBasicGraphPatterns( + getVariablesPresentInFirstBasicGraphPattern( parsed._rootGraphPattern._graphPatterns)}; // Filter out graph patterns that do not change the result of the basic graph From 85eb90a70ed83efe2f4bb74a8bf2a468ee20cd21 Mon Sep 17 00:00:00 2001 From: ullingerc Date: Mon, 2 Feb 2026 15:42:58 +0100 Subject: [PATCH 53/64] add todo --- src/util/StringPairHashMap.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/util/StringPairHashMap.h b/src/util/StringPairHashMap.h index 0b8ff98965..9f984b364d 100644 --- a/src/util/StringPairHashMap.h +++ b/src/util/StringPairHashMap.h @@ -14,6 +14,9 @@ // values with pairs of string views as keys. This is implemented using custom // hash and equality operators. +// TODO This could be extended to support `std::tuple` or +// `std::array`, not only `std::pair`, and other transparently + // _____________________________________________________________________________ namespace ad_utility { From 76509c777ee8dd966f21f6c3e6f39ca4dd287a93 Mon Sep 17 00:00:00 2001 From: ullingerc Date: Mon, 2 Feb 2026 15:42:58 +0100 Subject: [PATCH 54/64] add todo --- src/util/StringPairHashMap.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/util/StringPairHashMap.h b/src/util/StringPairHashMap.h index 0b8ff98965..9f984b364d 100644 --- a/src/util/StringPairHashMap.h +++ b/src/util/StringPairHashMap.h @@ -14,6 +14,9 @@ // values with pairs of string views as keys. This is implemented using custom // hash and equality operators. +// TODO This could be extended to support `std::tuple` or +// `std::array`, not only `std::pair`, and other transparently + // _____________________________________________________________________________ namespace ad_utility { From e8ff99e1da67b8989fa9aa537886c6c51413c917 Mon Sep 17 00:00:00 2001 From: ullingerc Date: Mon, 2 Feb 2026 15:47:19 +0100 Subject: [PATCH 55/64] try to fix compiler error --- src/parser/GraphPatternOperation.cpp | 1 + src/util/TransparentFunctors.h | 9 --------- src/util/VariantRangeFilter.h | 26 ++++++++++++++++++++++++++ 3 files changed, 27 insertions(+), 9 deletions(-) create mode 100644 src/util/VariantRangeFilter.h diff --git a/src/parser/GraphPatternOperation.cpp b/src/parser/GraphPatternOperation.cpp index 01c0754362..027adbbaaa 100644 --- a/src/parser/GraphPatternOperation.cpp +++ b/src/parser/GraphPatternOperation.cpp @@ -16,6 +16,7 @@ #include "parser/TripleComponent.h" #include "util/Exception.h" #include "util/Forward.h" +#include "util/VariantRangeFilter.h" namespace parsedQuery { diff --git a/src/util/TransparentFunctors.h b/src/util/TransparentFunctors.h index 94bac92192..adc6e88673 100644 --- a/src/util/TransparentFunctors.h +++ b/src/util/TransparentFunctors.h @@ -119,15 +119,6 @@ static constexpr detail::HoldsAlternativeImpl holdsAlternative; template static constexpr detail::GetImpl get; -// Helper that filters a range, like `std::vector` which contains `std::variant` -// elements by a certain type `T` and returns a view of the contained values. -CPP_template(typename T, typename R)( - requires ql::ranges::range) auto filterRangeOfVariantsByType(const R& - range) { - return range | ql::views::filter(holdsAlternative) | - ql::views::transform(get); -} - // Transparent functor for `std::get_if`. As an extension to `std::get_if`, // `ad_utility::getIf` may also be called with a `variant` object or reference, // not only with a pointer. diff --git a/src/util/VariantRangeFilter.h b/src/util/VariantRangeFilter.h new file mode 100644 index 0000000000..b6a5661488 --- /dev/null +++ b/src/util/VariantRangeFilter.h @@ -0,0 +1,26 @@ +// Copyright 2026 The QLever Authors, in particular: +// +// 2026 Christoph Ullinger , UFR +// +// UFR = University of Freiburg, Chair of Algorithms and Data Structures + +#ifndef QLEVER_SRC_UTIL_VARIANTRANGEFILTER_H +#define QLEVER_SRC_UTIL_VARIANTRANGEFILTER_H + +#include "backports/algorithm.h" +#include "util/TransparentFunctors.h" + +namespace ad_utility { + +// Helper that filters a range, like `std::vector` which contains `std::variant` +// elements by a certain type `T` and returns a view of the contained values. +CPP_template(typename T, typename R)( + requires ql::ranges::range) auto filterRangeOfVariantsByType(const R& + range) { + return range | ql::views::filter(holdsAlternative) | + ql::views::transform(get); +} + +} // namespace ad_utility + +#endif // QLEVER_SRC_UTIL_VARIANTRANGEFILTER_H From 7774adf8f73b2842e95c968da8617f89d20161c4 Mon Sep 17 00:00:00 2001 From: ullingerc Date: Mon, 2 Feb 2026 15:49:55 +0100 Subject: [PATCH 56/64] move code to cpp --- src/parser/GraphPatternAnalysis.cpp | 14 ++++++++++++++ src/parser/GraphPatternAnalysis.h | 12 +----------- 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/src/parser/GraphPatternAnalysis.cpp b/src/parser/GraphPatternAnalysis.cpp index 8969420ae4..55db3a28d7 100644 --- a/src/parser/GraphPatternAnalysis.cpp +++ b/src/parser/GraphPatternAnalysis.cpp @@ -27,4 +27,18 @@ bool BasicGraphPatternsInvariantTo::operator()( }); } +// _____________________________________________________________________________ +template +bool BasicGraphPatternsInvariantTo::operator()(const T&) const { + // The presence of any of these operations might remove or duplicate rows. + namespace pq = parsedQuery; + static_assert( + ad_utility::SimilarToAny< + T, pq::Optional, pq::Union, pq::Subquery, pq::TransPath, + pq::BasicGraphPattern, pq::Service, pq::PathQuery, pq::SpatialQuery, + pq::TextSearchQuery, pq::Minus, pq::GroupGraphPattern, pq::Describe, + pq::Load, pq::NamedCachedResult, pq::MaterializedViewQuery>); + return false; +} + } // namespace graphPatternAnalysis diff --git a/src/parser/GraphPatternAnalysis.h b/src/parser/GraphPatternAnalysis.h index 91f7c659c0..061dcc2913 100644 --- a/src/parser/GraphPatternAnalysis.h +++ b/src/parser/GraphPatternAnalysis.h @@ -35,17 +35,7 @@ struct BasicGraphPatternsInvariantTo { bool operator()(const parsedQuery::Values& values) const; template - bool operator()(const T&) const { - // The presence of any of these operations might remove or duplicate rows. - namespace pq = parsedQuery; - static_assert( - ad_utility::SimilarToAny< - T, pq::Optional, pq::Union, pq::Subquery, pq::TransPath, - pq::BasicGraphPattern, pq::Service, pq::PathQuery, pq::SpatialQuery, - pq::TextSearchQuery, pq::Minus, pq::GroupGraphPattern, pq::Describe, - pq::Load, pq::NamedCachedResult, pq::MaterializedViewQuery>); - return false; - } + bool operator()(const T&) const; }; } // namespace graphPatternAnalysis From 44e4f890ca631d9489d98f3c4b9a30becc7a5383 Mon Sep 17 00:00:00 2001 From: ullingerc Date: Mon, 2 Feb 2026 15:49:55 +0100 Subject: [PATCH 57/64] move code to cpp --- src/parser/GraphPatternAnalysis.cpp | 14 ++++++++++++++ src/parser/GraphPatternAnalysis.h | 12 +----------- 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/src/parser/GraphPatternAnalysis.cpp b/src/parser/GraphPatternAnalysis.cpp index 8969420ae4..55db3a28d7 100644 --- a/src/parser/GraphPatternAnalysis.cpp +++ b/src/parser/GraphPatternAnalysis.cpp @@ -27,4 +27,18 @@ bool BasicGraphPatternsInvariantTo::operator()( }); } +// _____________________________________________________________________________ +template +bool BasicGraphPatternsInvariantTo::operator()(const T&) const { + // The presence of any of these operations might remove or duplicate rows. + namespace pq = parsedQuery; + static_assert( + ad_utility::SimilarToAny< + T, pq::Optional, pq::Union, pq::Subquery, pq::TransPath, + pq::BasicGraphPattern, pq::Service, pq::PathQuery, pq::SpatialQuery, + pq::TextSearchQuery, pq::Minus, pq::GroupGraphPattern, pq::Describe, + pq::Load, pq::NamedCachedResult, pq::MaterializedViewQuery>); + return false; +} + } // namespace graphPatternAnalysis diff --git a/src/parser/GraphPatternAnalysis.h b/src/parser/GraphPatternAnalysis.h index 91f7c659c0..061dcc2913 100644 --- a/src/parser/GraphPatternAnalysis.h +++ b/src/parser/GraphPatternAnalysis.h @@ -35,17 +35,7 @@ struct BasicGraphPatternsInvariantTo { bool operator()(const parsedQuery::Values& values) const; template - bool operator()(const T&) const { - // The presence of any of these operations might remove or duplicate rows. - namespace pq = parsedQuery; - static_assert( - ad_utility::SimilarToAny< - T, pq::Optional, pq::Union, pq::Subquery, pq::TransPath, - pq::BasicGraphPattern, pq::Service, pq::PathQuery, pq::SpatialQuery, - pq::TextSearchQuery, pq::Minus, pq::GroupGraphPattern, pq::Describe, - pq::Load, pq::NamedCachedResult, pq::MaterializedViewQuery>); - return false; - } + bool operator()(const T&) const; }; } // namespace graphPatternAnalysis From 7d0fc418fa7a343bed385186d46640139d0a8bf4 Mon Sep 17 00:00:00 2001 From: ullingerc Date: Mon, 2 Feb 2026 16:20:54 +0100 Subject: [PATCH 58/64] add a test for string pair hash map --- test/CMakeLists.txt | 2 ++ test/StringPairHashMapTest.cpp | 31 +++++++++++++++++++++++++++++++ 2 files changed, 33 insertions(+) create mode 100644 test/StringPairHashMapTest.cpp diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 32aa042017..f487f01d73 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -193,6 +193,8 @@ addLinkAndDiscoverTestSerial(QueryPlannerSpatialJoinTest engine) addLinkAndDiscoverTestNoLibs(HashMapTest) +addLinkAndDiscoverTestNoLibs(StringPairHashMapTest) + addLinkAndDiscoverTest(HashSetTest) addLinkAndDiscoverTestSerial(GroupByTest engine) diff --git a/test/StringPairHashMapTest.cpp b/test/StringPairHashMapTest.cpp new file mode 100644 index 0000000000..6ded6fa40b --- /dev/null +++ b/test/StringPairHashMapTest.cpp @@ -0,0 +1,31 @@ +// Copyright 2026 The QLever Authors, in particular: +// +// 2026 Christoph Ullinger , UFR +// +// UFR = University of Freiburg, Chair of Algorithms and Data Structures + +#include + +#include "util/StringPairHashMap.h" + +// _____________________________________________________________________________ +TEST(StringPairHashMapTest, InsertAndLookup) { + ad_utility::StringPairHashMap map; + + using ad_utility::detail::StringPair; + using ad_utility::detail::StringViewPair; + + // Insert using `std::string` pairs. + map[StringPair{"hello", "world"}] = 7; + map[StringPair{"foo", "bar"}] = 42; + + ASSERT_EQ(map.size(), 2u); + + // Lookup using `std::string_view` pairs. + auto it = map.find(StringViewPair{"hello", "world"}); + ASSERT_NE(it, map.end()); + ASSERT_EQ(it->second, 7); + + EXPECT_EQ(map.count(StringViewPair{"foo", "bar"}), 1u); + EXPECT_EQ(map.count(StringViewPair{"doesnot", "exist"}), 0u); +} From 2d34cfc6684e4138dcf6ed8d18b7f96a880a8bfb Mon Sep 17 00:00:00 2001 From: ullingerc Date: Mon, 2 Feb 2026 18:14:39 +0100 Subject: [PATCH 59/64] make codespell happy --- test/StringPairHashMapTest.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/StringPairHashMapTest.cpp b/test/StringPairHashMapTest.cpp index 6ded6fa40b..aa09b5efc9 100644 --- a/test/StringPairHashMapTest.cpp +++ b/test/StringPairHashMapTest.cpp @@ -27,5 +27,5 @@ TEST(StringPairHashMapTest, InsertAndLookup) { ASSERT_EQ(it->second, 7); EXPECT_EQ(map.count(StringViewPair{"foo", "bar"}), 1u); - EXPECT_EQ(map.count(StringViewPair{"doesnot", "exist"}), 0u); + EXPECT_EQ(map.count(StringViewPair{"does not", "exist"}), 0u); } From a2ea70ba3f357ab853edd7273d32cbbe07f982d8 Mon Sep 17 00:00:00 2001 From: ullingerc Date: Tue, 3 Feb 2026 09:16:57 +0100 Subject: [PATCH 60/64] Improve test coverage --- test/CMakeLists.txt | 2 ++ test/StringPairHashMapTest.cpp | 31 ++++++++++++++++++ test/VariantRangeFilterTest.cpp | 38 +++++++++++++++++++++ test/parser/CMakeLists.txt | 1 + test/parser/GraphPatternOperationTest.cpp | 40 +++++++++++++++++++++++ 5 files changed, 112 insertions(+) create mode 100644 test/VariantRangeFilterTest.cpp create mode 100644 test/parser/GraphPatternOperationTest.cpp diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index f487f01d73..6653abc2ea 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -495,3 +495,5 @@ addLinkAndDiscoverTest(MaterializedViewsTest qlever engine server) addLinkAndDiscoverTestNoLibs(ConstexprMapTest) addLinkAndDiscoverTestNoLibs(ParallelExecutorTest) + +addLinkAndDiscoverTestNoLibs(VariantRangeFilterTest) diff --git a/test/StringPairHashMapTest.cpp b/test/StringPairHashMapTest.cpp index aa09b5efc9..d6ee1b3dfd 100644 --- a/test/StringPairHashMapTest.cpp +++ b/test/StringPairHashMapTest.cpp @@ -29,3 +29,34 @@ TEST(StringPairHashMapTest, InsertAndLookup) { EXPECT_EQ(map.count(StringViewPair{"foo", "bar"}), 1u); EXPECT_EQ(map.count(StringViewPair{"does not", "exist"}), 0u); } + +// _____________________________________________________________________________ +TEST(StringPairHashMapTest, StringPairEq) { + using ad_utility::detail::StringPair; + using ad_utility::detail::StringViewPair; + ad_utility::detail::StringPairEq eq; + + StringPair a{"a", "b"}; + StringPair b{"x", "y"}; + StringPair c{"x", "g"}; + + EXPECT_TRUE(eq(a, a)); + EXPECT_FALSE(eq(a, b)); + EXPECT_FALSE(eq(a, c)); + + StringViewPair aEq{"a", "b"}; + StringViewPair aNe{"a", "c"}; + StringViewPair bNe{"f", "g"}; + + EXPECT_TRUE(eq(a, aEq)); + EXPECT_FALSE(eq(a, aNe)); + EXPECT_FALSE(eq(b, bNe)); + + EXPECT_TRUE(eq(aEq, a)); + EXPECT_FALSE(eq(aNe, a)); + EXPECT_FALSE(eq(bNe, b)); + + StringViewPair aSv{"a", "b"}; + + EXPECT_TRUE(eq(a, aSv)); +} diff --git a/test/VariantRangeFilterTest.cpp b/test/VariantRangeFilterTest.cpp new file mode 100644 index 0000000000..ca9036fd56 --- /dev/null +++ b/test/VariantRangeFilterTest.cpp @@ -0,0 +1,38 @@ +// Copyright 2026 The QLever Authors, in particular: +// +// 2026 Christoph Ullinger , UFR +// +// UFR = University of Freiburg, Chair of Algorithms and Data Structures + +#include + +#include "./util/GTestHelpers.h" +#include "util/VariantRangeFilter.h" + +namespace { + +// Helper for testing `filterRangeOfVariantsByType`. +template +void expectFilteredRange( + std::vector input, std::vector expected, + ad_utility::source_location location = AD_CURRENT_SOURCE_LOC()) { + auto l = generateLocationTrace(location); + auto matcher = liftMatcherToElementsAreArray>( + [](auto value) { return ::testing::Eq(value); }); + auto actual = ad_utility::filterRangeOfVariantsByType(input) | + ::ranges::to>; + EXPECT_THAT(actual, matcher(expected)); +} + +// _____________________________________________________________________________ +TEST(VariantRangeFilterTest, Test) { + using V = std::variant; + std::vector vec{1, 'c', true, false, true, 3, 'f'}; + + expectFilteredRange(vec, {1, 3}); + expectFilteredRange(vec, {'c', 'f'}); + expectFilteredRange(vec, {true, false, true}); + expectFilteredRange(vec, {}); +} + +} // namespace diff --git a/test/parser/CMakeLists.txt b/test/parser/CMakeLists.txt index 8810d5f332..45cfc320e7 100644 --- a/test/parser/CMakeLists.txt +++ b/test/parser/CMakeLists.txt @@ -8,6 +8,7 @@ addLinkAndDiscoverTest(BlankNodeExpressionTest engine) addLinkAndDiscoverTest(PropertyPathTest parser) addLinkAndDiscoverTest(UpdateTriplesTest parser) addLinkAndDiscoverTest(NamedCachedResultTest parser) +addLinkAndDiscoverTest(GraphPatternOperationTest parser) addLinkAndDiscoverTestSerial(SparqlAntlrParserTest parser engine) addLinkAndDiscoverTestSerial(SparqlAntlrParserUpdateTest parser engine) addLinkAndDiscoverTestSerial(SparqlAntlrParserExpressionTest parser sparqlExpressions engine) diff --git a/test/parser/GraphPatternOperationTest.cpp b/test/parser/GraphPatternOperationTest.cpp new file mode 100644 index 0000000000..4d456be5e9 --- /dev/null +++ b/test/parser/GraphPatternOperationTest.cpp @@ -0,0 +1,40 @@ +// Copyright 2026 The QLever Authors, in particular: +// +// 2026 Christoph Ullinger , UFR +// +// UFR = University of Freiburg, Chair of Algorithms and Data Structures + +#include + +#include "gmock/gmock.h" +#include "parser/GraphPatternOperation.h" +#include "parser/SparqlTriple.h" +#include "rdfTypes/Iri.h" + +// _____________________________________________________________________________ +TEST(GraphPatternOperationTest, BasicPatternContainedVars) { + SparqlTripleSimple example1{Variable{"?s"}, Variable{"?p"}, Variable{"?o"}}; + SparqlTripleSimple example2{ + ad_utility::triple_component::Iri::fromIriref(""), + ad_utility::triple_component::Iri::fromIriref("

"), Variable{"?o2"}}; + + auto triple1 = SparqlTriple::fromSimple(example1); + auto triple2 = SparqlTriple::fromSimple(example2); + + parsedQuery::BasicGraphPattern bgp{{triple1, triple2}}; + + ad_utility::HashSet vars; + bgp.collectAllContainedVariables(vars); + auto expectedVarsMatcher = ::testing::UnorderedElementsAre( + ::testing::Eq(Variable{"?s"}), ::testing::Eq(Variable{"?p"}), + ::testing::Eq(Variable{"?o"}), ::testing::Eq(Variable{"?o2"})); + EXPECT_THAT(vars, expectedVarsMatcher); + + parsedQuery::Bind bind{ + sparqlExpression::SparqlExpressionPimpl::makeVariableExpression( + Variable{"?x"}), + Variable{"?y"}}; + std::vector graphPatterns{bind, bgp}; + EXPECT_THAT(getVariablesPresentInFirstBasicGraphPattern(graphPatterns), + expectedVarsMatcher); +} From cd263f4173ebff82920b42fcab9ff5b76779ae12 Mon Sep 17 00:00:00 2001 From: ullingerc Date: Tue, 3 Feb 2026 09:17:22 +0100 Subject: [PATCH 61/64] fix includes --- test/parser/GraphPatternOperationTest.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/test/parser/GraphPatternOperationTest.cpp b/test/parser/GraphPatternOperationTest.cpp index 4d456be5e9..71b11e9aae 100644 --- a/test/parser/GraphPatternOperationTest.cpp +++ b/test/parser/GraphPatternOperationTest.cpp @@ -4,12 +4,10 @@ // // UFR = University of Freiburg, Chair of Algorithms and Data Structures -#include +#include -#include "gmock/gmock.h" #include "parser/GraphPatternOperation.h" #include "parser/SparqlTriple.h" -#include "rdfTypes/Iri.h" // _____________________________________________________________________________ TEST(GraphPatternOperationTest, BasicPatternContainedVars) { From 41b733c5309081b52e6ed66d58e43a8e4e25d78f Mon Sep 17 00:00:00 2001 From: ullingerc Date: Tue, 3 Feb 2026 09:22:18 +0100 Subject: [PATCH 62/64] another test --- test/MaterializedViewsTest.cpp | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/test/MaterializedViewsTest.cpp b/test/MaterializedViewsTest.cpp index 887263df63..6331e0c6b2 100644 --- a/test/MaterializedViewsTest.cpp +++ b/test/MaterializedViewsTest.cpp @@ -556,6 +556,16 @@ TEST_F(MaterializedViewsTest, ManualConfigurations) { ::testing::Eq(V{"?o"}))); } + // Test internal constructor. + { + ViewQuery query{"testView", ViewQuery::RequestedColumns{ + {V{"?s"}, V{"?s2"}}, {V{"?o"}, V{"?o2"}}}}; + EXPECT_EQ(query.viewName_, "testView"); + EXPECT_THAT(query.getVarsToKeep(), + ::testing::UnorderedElementsAre(::testing::Eq(V{"?s2"}), + ::testing::Eq(V{"?o2"}))); + } + // Unsupported format version. { auto plan = qlv().parseAndPlanQuery(simpleWriteQuery_); From af7416340149a06a1b41b242ebf0aec65501131f Mon Sep 17 00:00:00 2001 From: ullingerc Date: Tue, 3 Feb 2026 09:26:22 +0100 Subject: [PATCH 63/64] part of comment got lost --- src/util/StringPairHashMap.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/util/StringPairHashMap.h b/src/util/StringPairHashMap.h index 9f984b364d..26a0083cd0 100644 --- a/src/util/StringPairHashMap.h +++ b/src/util/StringPairHashMap.h @@ -15,7 +15,7 @@ // hash and equality operators. // TODO This could be extended to support `std::tuple` or -// `std::array`, not only `std::pair`, and other transparently +// `std::array`, not only `std::pair`, and other transparently comparable types. // _____________________________________________________________________________ namespace ad_utility { From 95d02561c18351fa69b8c497ed135491abb94301 Mon Sep 17 00:00:00 2001 From: ullingerc Date: Tue, 3 Feb 2026 10:06:33 +0100 Subject: [PATCH 64/64] fix linker --- src/parser/GraphPatternAnalysis.cpp | 14 -------------- src/parser/GraphPatternAnalysis.h | 12 +++++++++++- 2 files changed, 11 insertions(+), 15 deletions(-) diff --git a/src/parser/GraphPatternAnalysis.cpp b/src/parser/GraphPatternAnalysis.cpp index 55db3a28d7..8969420ae4 100644 --- a/src/parser/GraphPatternAnalysis.cpp +++ b/src/parser/GraphPatternAnalysis.cpp @@ -27,18 +27,4 @@ bool BasicGraphPatternsInvariantTo::operator()( }); } -// _____________________________________________________________________________ -template -bool BasicGraphPatternsInvariantTo::operator()(const T&) const { - // The presence of any of these operations might remove or duplicate rows. - namespace pq = parsedQuery; - static_assert( - ad_utility::SimilarToAny< - T, pq::Optional, pq::Union, pq::Subquery, pq::TransPath, - pq::BasicGraphPattern, pq::Service, pq::PathQuery, pq::SpatialQuery, - pq::TextSearchQuery, pq::Minus, pq::GroupGraphPattern, pq::Describe, - pq::Load, pq::NamedCachedResult, pq::MaterializedViewQuery>); - return false; -} - } // namespace graphPatternAnalysis diff --git a/src/parser/GraphPatternAnalysis.h b/src/parser/GraphPatternAnalysis.h index 061dcc2913..91f7c659c0 100644 --- a/src/parser/GraphPatternAnalysis.h +++ b/src/parser/GraphPatternAnalysis.h @@ -35,7 +35,17 @@ struct BasicGraphPatternsInvariantTo { bool operator()(const parsedQuery::Values& values) const; template - bool operator()(const T&) const; + bool operator()(const T&) const { + // The presence of any of these operations might remove or duplicate rows. + namespace pq = parsedQuery; + static_assert( + ad_utility::SimilarToAny< + T, pq::Optional, pq::Union, pq::Subquery, pq::TransPath, + pq::BasicGraphPattern, pq::Service, pq::PathQuery, pq::SpatialQuery, + pq::TextSearchQuery, pq::Minus, pq::GroupGraphPattern, pq::Describe, + pq::Load, pq::NamedCachedResult, pq::MaterializedViewQuery>); + return false; + } }; } // namespace graphPatternAnalysis