diff --git a/src/ServerMain.cpp b/src/ServerMain.cpp index bbc7ae48d3..1565a727a5 100644 --- a/src/ServerMain.cpp +++ b/src/ServerMain.cpp @@ -182,6 +182,11 @@ int main(int argc, char** argv) { ->multitoken(), "The names of materialized views to be loaded automatically on server " "start (this option takes an arbitrary number of arguments)."); + add("enable-materialized-view-query-rewrite", + optionFactory.getProgramOption< + &RuntimeParameters::enableMaterializedViewQueryRewrite_>(), + "If set to true, loaded materialized views will be considered as " + "alternative query plans for certain supported query patterns."); po::variables_map optionsMap; try { diff --git a/src/engine/CMakeLists.txt b/src/engine/CMakeLists.txt index 311da2ec3a..26374fbeb1 100644 --- a/src/engine/CMakeLists.txt +++ b/src/engine/CMakeLists.txt @@ -18,7 +18,8 @@ add_library(engine QueryExecutionContext.cpp ExistsJoin.cpp SparqlProtocol.cpp ParsedRequestBuilder.cpp NeutralOptional.cpp Load.cpp StripColumns.cpp NamedResultCache.cpp ExplicitIdTableOperation.cpp StringMapping.cpp MaterializedViews.cpp - PermutationSelector.cpp ConstructQueryEvaluator.cpp ConstructTripleGenerator.cpp) + PermutationSelector.cpp ConstructQueryEvaluator.cpp ConstructTripleGenerator.cpp + MaterializedViewsQueryAnalysis.cpp) qlever_target_link_libraries(engine util index parser global sparqlExpressions SortPerformanceEstimator Boost::iostreams s2 spatialjoin-dev pb_util pb_util_geo) diff --git a/src/engine/MaterializedViews.cpp b/src/engine/MaterializedViews.cpp index bbe3619c0a..3ef3ff416e 100644 --- a/src/engine/MaterializedViews.cpp +++ b/src/engine/MaterializedViews.cpp @@ -14,6 +14,8 @@ #include #include "engine/IndexScan.h" +#include "engine/Join.h" +#include "engine/MaterializedViewsQueryAnalysis.h" #include "engine/QueryExecutionContext.h" #include "engine/QueryExecutionTree.h" #include "engine/VariableToColumnMap.h" @@ -92,7 +94,9 @@ MaterializedViewWriter::getIdTableColumnNamesAndPermutation() const { // Column information for the columns selected by the user's query. auto existingCols = ::ranges::to>( targetVarsAndCols | ql::views::transform([](const auto& opt) { - AD_CONTRACT_CHECK(opt.has_value()); + AD_CONTRACT_CHECK( + opt.has_value(), + "Please ensure that all variables in your SELECT are bound."); return ColumnNameAndIndex{opt.value().variable_, opt.value().columnIndex_}; })); @@ -375,17 +379,27 @@ std::shared_ptr MaterializedView::permutation() const { // _____________________________________________________________________________ void MaterializedViewsManager::loadView(const std::string& name) const { auto lock = loadedViews_.wlock(); + auto patternLock = queryPatternCache_.wlock(); if (lock->contains(name)) { return; } - lock->insert({name, std::make_shared(onDiskBase_, name)}); -}; + auto view = std::make_shared(onDiskBase_, name); + lock->insert({name, view}); + // Analyzing the view when loading instead of (de)serializing an analysis + // result has the benefit that query analysis can be extended without needing + // to rewrite views. + if (patternLock->analyzeView(view)) { + AD_LOG_INFO << "The materialized view '" << name + << "' was added to the query pattern cache." << std::endl; + } +} // _____________________________________________________________________________ void MaterializedViewsManager::unloadViewIfLoaded( const std::string& name) const { // `HashMap::erase` is a no-op for nonexisting keys. loadedViews_.wlock()->erase(name); + // TODO Query pattern cache unload. } // _____________________________________________________________________________ @@ -597,6 +611,15 @@ std::shared_ptr MaterializedView::makeIndexScan( viewQuery.getVarsToKeep()); } +// _____________________________________________________________________________ +std::vector +MaterializedViewsManager::makeJoinReplacementIndexScans( + QueryExecutionContext* qec, + const parsedQuery::BasicGraphPattern& triples) const { + return queryPatternCache_.rlock()->makeJoinReplacementIndexScans(qec, + triples); +} + // _____________________________________________________________________________ std::shared_ptr MaterializedViewsManager::makeIndexScan( QueryExecutionContext* qec, diff --git a/src/engine/MaterializedViews.h b/src/engine/MaterializedViews.h index f3a65e5505..9452a079bb 100644 --- a/src/engine/MaterializedViews.h +++ b/src/engine/MaterializedViews.h @@ -7,16 +7,19 @@ #ifndef QLEVER_SRC_ENGINE_MATERIALIZEDVIEWS_H_ #define QLEVER_SRC_ENGINE_MATERIALIZEDVIEWS_H_ +#include "engine/MaterializedViewsQueryAnalysis.h" #include "engine/VariableToColumnMap.h" #include "engine/idTable/CompressedExternalIdTable.h" #include "index/DeltaTriples.h" #include "index/ExternalSortFunctors.h" #include "index/Permutation.h" #include "libqlever/QleverTypes.h" +#include "parser/GraphPatternOperation.h" #include "parser/MaterializedViewQuery.h" #include "parser/ParsedQuery.h" #include "parser/SparqlTriple.h" #include "util/HashMap.h" +#include "util/Synchronized.h" // Forward declarations class QueryExecutionContext; @@ -223,6 +226,9 @@ class MaterializedView { const parsedQuery::MaterializedViewQuery& viewQuery) const; }; +// Shorthand for query rewriting helper class. +using materializedViewsQueryAnalysis::MaterializedViewJoinReplacement; + // The `MaterializedViewsManager` is part of the `QueryExecutionContext` and is // used to manage the currently loaded `MaterializedViews` in a `Server` or // `Qlever` instance. @@ -232,6 +238,9 @@ class MaterializedViewsManager { mutable ad_utility::Synchronized< ad_utility::HashMap>> loadedViews_; + mutable ad_utility::Synchronized< + materializedViewsQueryAnalysis::QueryPatternCache> + queryPatternCache_; public: MaterializedViewsManager() = default; @@ -266,6 +275,14 @@ class MaterializedViewsManager { QueryExecutionContext* qec, const parsedQuery::MaterializedViewQuery& viewQuery) const; + // Given a set of triples, check if some join operations that would be + // required when evaluating them can be replaced by scans on materialized + // views that are currently loaded. This is implemented using the + // `queryPatternCache_`. + std::vector makeJoinReplacementIndexScans( + QueryExecutionContext* qec, + const parsedQuery::BasicGraphPattern& triples) const; + // Write a `MaterializedView` given a valid `name` (consisting only of // alphanumerics and hyphens) and a `queryPlan` to be executed. The query's // result is written to the view. diff --git a/src/engine/MaterializedViewsQueryAnalysis.cpp b/src/engine/MaterializedViewsQueryAnalysis.cpp new file mode 100644 index 0000000000..956366f870 --- /dev/null +++ b/src/engine/MaterializedViewsQueryAnalysis.cpp @@ -0,0 +1,265 @@ +// Copyright 2026 The QLever Authors, in particular: +// +// 2026 Christoph Ullinger , UFR +// +// UFR = University of Freiburg, Chair of Algorithms and Data Structures + +#include "engine/MaterializedViewsQueryAnalysis.h" + +#include +#include + +#include "engine/IndexScan.h" +#include "engine/MaterializedViews.h" +#include "parser/GraphPatternOperation.h" +#include "parser/PropertyPath.h" +#include "parser/SparqlParser.h" + +namespace materializedViewsQueryAnalysis { + +// _____________________________________________________________________________ +std::vector +QueryPatternCache::makeJoinReplacementIndexScans( + QueryExecutionContext* qec, + const parsedQuery::BasicGraphPattern& triples) const { + std::vector result; + + // All triples of the form `anything ?variable` where `` is covered + // by a materialized view, stored by `?variable` for finding chains. + ad_utility::HashMap> chainLeft; + + // All triples of the form `?variable ?otherVariable` where `` is + // covered by a materialized view, where `?variable` is different from + // `?otherVariable`, stored by `?variable` for finding chains. + ad_utility::HashMap> chainRight; + + // TODO Optimizations for stars. + + for (const auto& [tripleIdx, triple] : + ::ranges::views::enumerate(triples._triples)) { + if (std::holds_alternative(triple.p_)) { + const auto& path = std::get(triple.p_); + if (path.isIri()) { + const auto& iri = path.getIri().toStringRepresentation(); + // If no view that we know of contains this predicate so we can ignore + // this triple altogether. + if (!predicateInView_.contains(iri)) { + continue; + } + // Check for potential join chain triple. + if (triple.o_.isVariable()) { + if (triple.s_.isVariable()) { + // This triple could be the right side of a chain join. + chainRight[triple.s_.getVariable()].push_back(tripleIdx); + } + if (triple.s_ != triple.o_) { + // This triple could be the left side of a chain join. + chainLeft[triple.o_.getVariable()].push_back(tripleIdx); + } + } + } else if (path.isSequence()) { + AD_THROW( + "Sequence property paths are expected to be replaced by joins " + "during earlier stages of query planning."); + } + } + } + + // Using the information collected by the pass over all triples, assemble all + // chains that can potentially be rewritten. + for (const auto& [varLeft, triplesLeft] : chainLeft) { + // No triples for the right side on the same variable have been collected. + if (!chainRight.contains(varLeft)) { + continue; + } + + // Iterate over all chains present and check if they can be rewritten to a + // view scan. + for (auto tripleIdxRight : chainRight.at(varLeft)) { + for (auto tripleIdxLeft : triplesLeft) { + const auto& left = triples._triples.at(tripleIdxLeft); + const auto& right = triples._triples.at(tripleIdxRight); + + // We have already checked that this holds the correct alternative + // above. + const auto& leftP = std::get(left.p_); + const auto& rightP = std::get(right.p_); + + if (!leftP.isIri() || !rightP.isIri()) { + continue; + } + + // Lookup key based on `std::string_view` avoids copying the IRIs. + ChainedPredicatesForLookup key{ + leftP.getIri().toStringRepresentation(), + rightP.getIri().toStringRepresentation()}; + + // Lookup if there are matching views. There could potentially be + // multiple (e.g. with different sorting). + if (auto it = simpleChainCache_.find(key); + it != simpleChainCache_.end()) { + for (const auto& chainInfo : *(it->second)) { + // We have found a materialized view for this chain. Construct an + // `IndexScan`. + result.push_back( + {makeScanForSingleChain(qec, chainInfo, left.s_, varLeft, + right.o_.getVariable()), + {tripleIdxLeft, tripleIdxRight}}); + } + } + } + } + } + + return result; +} + +// _____________________________________________________________________________ +std::shared_ptr QueryPatternCache::makeScanForSingleChain( + QueryExecutionContext* qec, ChainInfo cached, TripleComponent subject, + std::optional chain, Variable object) const { + auto& [cSubject, cChainVar, cObject, view] = cached; + parsedQuery::MaterializedViewQuery::RequestedColumns cols{ + {std::move(cSubject), std::move(subject)}, + {std::move(cObject), std::move(object)}, + }; + if (chain.has_value()) { + cols.insert({std::move(cChainVar), std::move(chain.value())}); + } + return view->makeIndexScan( + qec, parsedQuery::MaterializedViewQuery{view->name(), std::move(cols)}); +} + +// _____________________________________________________________________________ +bool QueryPatternCache::analyzeSimpleChain(ViewPtr view, const SparqlTriple& a, + const SparqlTriple& b) { + // Check predicates. + auto aPred = a.getSimplePredicate(); + if (!aPred.has_value()) { + return false; + } + auto bPred = b.getSimplePredicate(); + if (!bPred.has_value()) { + return false; + } + + // Check variables. + if (!a.s_.isVariable()) { + return false; + } + auto aSubj = a.s_.getVariable(); + + if (!a.o_.isVariable() || a.o_.getVariable() == aSubj) { + return false; + } + auto chainVar = a.o_.getVariable(); + + if (!b.s_.isVariable() || b.s_.getVariable() != chainVar) { + return false; + } + + if (!b.o_.isVariable() || b.o_.getVariable() == chainVar || + b.o_.getVariable() == aSubj) { + return false; + } + auto bObj = b.o_.getVariable(); + + // Insert chain to cache. + ChainedPredicates preds{aPred.value(), bPred.value()}; + auto [it, wasNew] = simpleChainCache_.try_emplace(preds, nullptr); + if (it->second == nullptr) { + it->second = std::make_shared>(); + } + it->second->push_back( + ChainInfo{std::move(aSubj), std::move(chainVar), std::move(bObj), view}); + return true; +} + +// _____________________________________________________________________________ +bool QueryPatternCache::analyzeView(ViewPtr view) { + const auto& query = view->originalQuery(); + if (!query.has_value()) { + return false; + } + + auto explainIgnore = [&](const std::string& reason) { + AD_LOG_INFO << "Materialized view '" << view->name() + << "' will not be added to the query pattern cache for query " + "rewriting. Reason: " + << reason << "." << std::endl; + }; + + // We do not need the `EncodedIriManager` because we are only interested in + // analyzing the query structure, not in converting its components to + // `ValueId`s. + EncodedIriManager e; + auto parsed = SparqlParser::parseQuery(&e, query.value(), {}); + + auto graphPatternsFiltered = graphPatternInvariantFilter(parsed); + if (graphPatternsFiltered.size() != 1) { + explainIgnore( + "The view has more than one graph pattern (even after skipping ignored " + "patterns)"); + return false; + } + const auto& graphPattern = graphPatternsFiltered.at(0); + if (!std::holds_alternative(graphPattern)) { + explainIgnore("The graph pattern is not a basic set of triples"); + return false; + } + // TODO Property path is stored as a single predicate here. + const auto& triples = graphPattern.getBasic()._triples; + if (triples.size() == 0) { + explainIgnore("The query body is empty"); + return false; + } + bool patternFound = false; + + // TODO Possibly handle chain by property path. + if (triples.size() == 2) { + const auto& a = triples.at(0); + const auto& b = triples.at(1); + if (!analyzeSimpleChain(view, a, b)) { + patternFound = patternFound || analyzeSimpleChain(view, b, a); + } else { + patternFound = true; + } + } + + // TODO Add support for other patterns, in particular, stars. + + // Remember predicates that appear in certain views, only if any pattern is + // detected. + if (patternFound) { + for (const auto& triple : triples) { + auto predicate = triple.getSimplePredicate(); + if (predicate.has_value()) { + predicateInView_[predicate.value()].push_back(view); + } + } + } + + if (!patternFound) { + explainIgnore("No supported query pattern for rewriting joins was found"); + } + + return patternFound; +} + +// _____________________________________________________________________________ +std::vector +QueryPatternCache::graphPatternInvariantFilter(const ParsedQuery& parsed) { + BasicGraphPatternsInvariantTo invariantCheck{ + getVariablesPresentInFirstBasicGraphPattern( + parsed._rootGraphPattern._graphPatterns)}; + + // Filter out graph patterns that do not change the result of the basic graph + // pattern analyzed. + return ::ranges::to(parsed._rootGraphPattern._graphPatterns | + ql::views::filter([&](const auto& pattern) { + return !std::visit(invariantCheck, + pattern); + })); +} + +} // namespace materializedViewsQueryAnalysis diff --git a/src/engine/MaterializedViewsQueryAnalysis.h b/src/engine/MaterializedViewsQueryAnalysis.h new file mode 100644 index 0000000000..b8657eb202 --- /dev/null +++ b/src/engine/MaterializedViewsQueryAnalysis.h @@ -0,0 +1,96 @@ +// Copyright 2026 The QLever Authors, in particular: +// +// 2026 Christoph Ullinger , UFR +// +// UFR = University of Freiburg, Chair of Algorithms and Data Structures + +#ifndef QLEVER_SRC_ENGINE_MATERIALIZEDVIEWSQUERYANALYSIS_H_ +#define QLEVER_SRC_ENGINE_MATERIALIZEDVIEWSQUERYANALYSIS_H_ + +#include "parser/GraphPatternAnalysis.h" +#include "parser/GraphPatternOperation.h" +#include "parser/SparqlTriple.h" +#include "parser/TripleComponent.h" +#include "rdfTypes/Variable.h" +#include "util/StringPairHashMap.h" + +// Forward declarations to prevent cyclic dependencies. +class MaterializedView; +class IndexScan; + +// _____________________________________________________________________________ +namespace materializedViewsQueryAnalysis { + +using ViewPtr = std::shared_ptr; +using graphPatternAnalysis::BasicGraphPatternsInvariantTo; + +// Key and value types of the cache for simple chains, that is queries of the +// form `?s ?m . ?m ?o`. +using ChainedPredicates = ad_utility::detail::StringPair; +using ChainedPredicatesForLookup = ad_utility::detail::StringViewPair; +struct ChainInfo { + Variable subject_; + Variable chain_; + Variable object_; + ViewPtr view_; +}; +using SimpleChainCache = + ad_utility::StringPairHashMap>>; + +// Helper class that represents a possible join replacement and indicates the +// subset of triples it handles. +struct MaterializedViewJoinReplacement { + std::shared_ptr indexScan_; + std::vector coveredTriples_; + + // ___________________________________________________________________________ + size_t numJoins() const { return coveredTriples_.size() - 1; } +}; + +// Cache data structure for the `MaterializedViewsManager`. This object can be +// used for quickly looking up if a given query can be optimized by making use +// of an existing materialized view. +class QueryPatternCache { + private: + // Simple chains can be found by direct access into a hash map. + SimpleChainCache simpleChainCache_; + + // Cache for predicates appearing in a materialized view. + ad_utility::HashMap> predicateInView_; + + // TODO Data structure for join stars. + public: + // Given a materialized view, analyze its write query and populate the cache. + // This is called from `MaterializedViewsManager::loadView`. + bool analyzeView(ViewPtr view); + + // Given a set of triples, check if a subset of necessary join operations can + // be replaced by scans on materialized views. + std::vector makeJoinReplacementIndexScans( + QueryExecutionContext* qec, + const parsedQuery::BasicGraphPattern& triples) const; + + // Construct an `IndexScan` for a single chain join given the necessary + // information from both the materialized view and the user's query. + std::shared_ptr makeScanForSingleChain( + QueryExecutionContext* qec, ChainInfo cached, TripleComponent subject, + std::optional chain, Variable object) const; + + private: + // Helper for `analyzeView`, that checks for a simple chain. It returns `true` + // iff a simple chain `a->b` is present. + // NOTE: This function only checks one direction, so it should also be called + // with `a` and `b` switched if it returns `false`. + bool analyzeSimpleChain(ViewPtr view, const SparqlTriple& a, + const SparqlTriple& b); + + // Helper that filters the graph patterns of a parsed query using + // `BasicGraphPatternInvariantTo`. For details, see the documentation for this + // helper. + static std::vector + graphPatternInvariantFilter(const ParsedQuery& parsed); +}; + +} // namespace materializedViewsQueryAnalysis + +#endif // QLEVER_SRC_ENGINE_MATERIALIZEDVIEWSQUERYANALYSIS_H_ diff --git a/src/engine/QueryPlanner.cpp b/src/engine/QueryPlanner.cpp index 08f79eef16..6b5851b5af 100644 --- a/src/engine/QueryPlanner.cpp +++ b/src/engine/QueryPlanner.cpp @@ -1505,14 +1505,18 @@ void QueryPlanner::applyTextLimitsIfPossible(vector& row, // _____________________________________________________________________________ size_t QueryPlanner::findUniqueNodeIds( - const std::vector& connectedComponent) { + const std::vector& connectedComponent, + bool allowReplacementPlans) { ad_utility::HashSet uniqueNodeIds; auto nodeIds = connectedComponent | ql::views::transform(&SubtreePlan::_idsOfIncludedNodes); // Check that all the `_idsOfIncludedNodes` are one-hot encodings of a single - // value, i.e. they have exactly one bit set. - AD_CORRECTNESS_CHECK(ql::ranges::all_of( - nodeIds, [](auto nodeId) { return absl::popcount(nodeId) == 1; })); + // value, i.e. they have exactly one bit set. If replacement plans are allowed + // this constraint is not applicable. + AD_CORRECTNESS_CHECK(allowReplacementPlans || + ql::ranges::all_of(nodeIds, [](auto nodeId) { + return absl::popcount(nodeId) == 1; + })); ql::ranges::copy(nodeIds, std::inserter(uniqueNodeIds, uniqueNodeIds.end())); return uniqueNodeIds.size(); } @@ -1522,13 +1526,14 @@ std::vector QueryPlanner::runDynamicProgrammingOnConnectedComponent( std::vector connectedComponent, const FiltersAndOptionalSubstitutes& filters, - const TextLimitVec& textLimits, const TripleGraph& tg) const { + const TextLimitVec& textLimits, const TripleGraph& tg, + const ReplacementPlans& replacementPlans) const { vector> dpTab; // find the unique number of nodes in the current connected component // (there might be duplicates because we already have multiple candidates // for each index scan with different permutations. dpTab.push_back(std::move(connectedComponent)); - size_t numSeeds = findUniqueNodeIds(dpTab.back()); + size_t numSeeds = findUniqueNodeIds(dpTab.back(), false); for (size_t k = 2; k <= numSeeds; ++k) { AD_LOG_TRACE << "Producing plans that unite " << k << " triples." @@ -1544,6 +1549,13 @@ QueryPlanner::runDynamicProgrammingOnConnectedComponent( // As we only passed in connected components, we expect the result to always // be nonempty. AD_CORRECTNESS_CHECK(!dpTab[k - 1].empty()); + // If we have replacement plans for this level, we add them now, s.t. the + // next level can make use of them. + if (replacementPlans.size() > k - 1) { + const auto& newPlans = replacementPlans[k - 1]; + dpTab[k - 1].reserve(dpTab[k - 1].size() + newPlans.size()); + dpTab[k - 1].insert(dpTab[k - 1].end(), newPlans.begin(), newPlans.end()); + } } auto& result = dpTab.back(); applyFiltersIfPossible(result, @@ -1628,11 +1640,13 @@ size_t QueryPlanner::countSubgraphs(std::vector graph, std::vector QueryPlanner::runGreedyPlanningOnConnectedComponent( std::vector connectedComponent, const FiltersAndOptionalSubstitutes& filters, - const TextLimitVec& textLimits, const TripleGraph& tg) const { + const TextLimitVec& textLimits, const TripleGraph& tg, + const ReplacementPlans& replacementPlans) const { applyFiltersIfPossible(connectedComponent, filters); applyTextLimitsIfPossible(connectedComponent, textLimits, true); - const size_t numSeeds = findUniqueNodeIds(connectedComponent); + const size_t numSeeds = + findUniqueNodeIds(connectedComponent, !replacementPlans.empty()); if (numSeeds <= 1) { // Only 0 or 1 nodes in the input, nothing to plan. return connectedComponent; @@ -1731,7 +1745,8 @@ QueryPlanner::FiltersAndOptionalSubstitutes QueryPlanner::seedFilterSubstitutes( // _____________________________________________________________________________ std::vector> QueryPlanner::fillDpTab( const QueryPlanner::TripleGraph& tg, vector filters, - TextLimitMap& textLimits, const vector>& children) { + TextLimitMap& textLimits, const vector>& children, + ReplacementPlans replacementPlans) { auto [initialPlans, additionalFilters] = seedWithScansAndText(tg, children, textLimits); ql::ranges::move(additionalFilters, std::back_inserter(filters)); @@ -1755,9 +1770,12 @@ std::vector> QueryPlanner::fillDpTab( TextLimitVec textLimitVec(textLimits.begin(), textLimits.end()); for (auto& component : components | ql::views::values) { std::vector g; + uint64_t coveredNodes = 0; for (const auto& plan : component) { g.push_back(&plan); + coveredNodes |= plan._idsOfIncludedNodes; } + const size_t budget = getRuntimeParameter<&RuntimeParameters::queryPlanningBudget_>(); bool useGreedyPlanning = countSubgraphs(g, filters, budget) > budget; @@ -1766,12 +1784,39 @@ std::vector> QueryPlanner::fillDpTab( << "Using the greedy query planner for a large connected component" << std::endl; } + + auto [applicableReplacementPlans, hasApplicableReplacementPlans] = + findApplicableReplacementPlans(replacementPlans, coveredNodes, + useGreedyPlanning); + auto impl = useGreedyPlanning ? &QueryPlanner::runGreedyPlanningOnConnectedComponent : &QueryPlanner::runDynamicProgrammingOnConnectedComponent; - lastDpRowFromComponents.push_back( - std::invoke(impl, this, std::move(component), filtersAndOptSubstitutes, - textLimitVec, tg)); + + std::vector lastDpRow; + + auto addCandidates = [&lastDpRow](std::vector candidates) { + std::move(candidates.begin(), candidates.end(), + std::back_inserter(lastDpRow)); + }; + + if (useGreedyPlanning && hasApplicableReplacementPlans) { + // Plan once with a copy of `components` and without replacements to have + // a baseline plan. This plan may be better than the replacement if a + // certain sorting is required, that the replacement doesn't provide. + addCandidates(std::invoke(impl, this, component, filtersAndOptSubstitutes, + textLimitVec, tg, ReplacementPlans{})); + + // Then remove the plans for the nodes covered by replacement plans and + // insert the replacement plans. + useReplacementPlansForGreedyPlanner(applicableReplacementPlans, + component); + } + + addCandidates(std::invoke(impl, this, std::move(component), + filtersAndOptSubstitutes, textLimitVec, tg, + applicableReplacementPlans)); + lastDpRowFromComponents.push_back(std::move(lastDpRow)); checkCancellation(); } size_t numConnectedComponents = lastDpRowFromComponents.size(); @@ -2526,6 +2571,43 @@ auto QueryPlanner::createJoinWithTransitivePath(const SubtreePlan& a, #endif } +// _____________________________________________________________________________ +auto QueryPlanner::createMaterializedViewJoinReplacements( + const parsedQuery::BasicGraphPattern& triples) const -> ReplacementPlans { + ReplacementPlans plans; + + // Check if the user allows query rewriting. + if (!getRuntimeParameter< + &RuntimeParameters::enableMaterializedViewQueryRewrite_>()) { + return plans; + } + + // The `MaterializedViewsManager` provides `IndexScan` instances for all the + // subsets of `triples` it can rewrite. The individual results do not cover + // all items of `triples`, instead each has a vector of triple indices it + // covers. + auto scans = _qec->materializedViewsManager().makeJoinReplacementIndexScans( + _qec, triples); + plans.reserve(triples._triples.size()); + + // Convert all the `IndexScan`s to `SubtreePlan`s with the appropriate ids + // set. + for (const auto& [scan, coveredTriples] : scans) { + auto plan = makeSubtreePlan(scan); + // This is equivalent to a join between the covered triples, so we must mark + // all included nodes. + for (auto tripleIdx : coveredTriples) { + plan._idsOfIncludedNodes |= (1ULL << tripleIdx); + } + // Empty vectors of replacement plans for smaller numbers of triples. + for (size_t i = plans.size(); i < coveredTriples.size(); ++i) { + plans.push_back({}); + } + plans.at(coveredTriples.size() - 1).push_back(std::move(plan)); + } + return plans; +} + // ______________________________________________________________________________________ auto QueryPlanner::createJoinWithHasPredicateScan(const SubtreePlan& a, const SubtreePlan& b, @@ -3036,17 +3118,7 @@ void QueryPlanner::GraphPatternPlanner::visitBasicGraphPattern( const parsedQuery::BasicGraphPattern& v) { // A basic graph patterns consists only of triples. First collect all // the bound variables. - for (const SparqlTriple& t : v._triples) { - if (t.s_.isVariable()) { - boundVariables_.insert(t.s_.getVariable()); - } - if (auto predicate = t.getPredicateVariable()) { - boundVariables_.insert(predicate.value()); - } - if (t.o_.isVariable()) { - boundVariables_.insert(t.o_.getVariable()); - } - } + v.collectAllContainedVariables(boundVariables_); // Then collect the triples. Transform each triple with a property path to // an equivalent form without property path (using `seedFromPropertyPath`). @@ -3157,7 +3229,7 @@ void QueryPlanner::GraphPatternPlanner::visitPathSearch( // _______________________________________________________________ SubtreePlan QueryPlanner::getMaterializedViewIndexScanPlan( - const parsedQuery::MaterializedViewQuery& viewQuery) { + const parsedQuery::MaterializedViewQuery& viewQuery) const { return makeSubtreePlan( _qec->materializedViewsManager().makeIndexScan(_qec, viewQuery)); } @@ -3304,11 +3376,14 @@ void QueryPlanner::GraphPatternPlanner::visitSubquery( // _______________________________________________________________ void QueryPlanner::GraphPatternPlanner::optimizeCommutatively() { + auto replacementPlans = + planner_.createMaterializedViewJoinReplacements(candidateTriples_); auto tg = planner_.createTripleGraph(&candidateTriples_); - auto lastRow = planner_ - .fillDpTab(tg, rootPattern_->_filters, - rootPattern_->textLimits_, candidatePlans_) - .back(); + auto lastRow = + planner_ + .fillDpTab(tg, rootPattern_->_filters, rootPattern_->textLimits_, + candidatePlans_, std::move(replacementPlans)) + .back(); candidateTriples_._triples.clear(); candidatePlans_.clear(); candidatePlans_.push_back(std::move(lastRow)); @@ -3325,3 +3400,65 @@ void QueryPlanner::GraphPatternPlanner::visitDescribe( candidatePlans_.push_back(std::vector{std::move(describeOp)}); planner_.checkCancellation(); } + +// _______________________________________________________________ +std::pair +QueryPlanner::findApplicableReplacementPlans( + ReplacementPlans& allReplacementPlans, uint64_t coveredNodeIds, + bool useGreedyPlanning) { + // TODO This could be hash-map based if we would return the + // indices in the create helper and pass them as part of `replacementPlans`. + bool hasApplicableReplacementPlans = false; + ReplacementPlans applicableReplacementPlans; + for (auto& rPlans : allReplacementPlans) { + std::vector applicable; + for (auto& plan : rPlans) { + // Nodes covered by plan must be a subset of the covered nodes. + if ((plan._idsOfIncludedNodes & coveredNodeIds) == + plan._idsOfIncludedNodes) { + applicable.push_back(std::move(plan)); + hasApplicableReplacementPlans = true; + } + } + applicableReplacementPlans.push_back(std::move(applicable)); + } + + // Filter the plans to be disjunctive for greedy planning. This is done in + // reversed order of the number of triples they cover, s.t. plans covering + // more triples are preferred over smaller ones. + if (useGreedyPlanning) { + uint64_t nodesCoveredByReplacementPlans = 0; + for (auto& plans : applicableReplacementPlans | ql::views::reverse) { + ql::erase_if(plans, [&](SubtreePlan& plan) { + bool res = + (plan._idsOfIncludedNodes & nodesCoveredByReplacementPlans) != 0; + nodesCoveredByReplacementPlans |= plan._idsOfIncludedNodes; + return res; + }); + } + } + + return {std::move(applicableReplacementPlans), hasApplicableReplacementPlans}; +} + +// _______________________________________________________________ +void QueryPlanner::useReplacementPlansForGreedyPlanner( + ReplacementPlans& applicableReplacementPlans, + std::vector& connectedComponent) { + // Remove nodes from the `connectedComponent` that are covered by replacement + // plans. + for (const auto& plans : applicableReplacementPlans) { + for (const auto& plan : plans) { + ql::erase_if(connectedComponent, [&plan](const auto& c) { + return (plan._idsOfIncludedNodes & c._idsOfIncludedNodes) != 0; + }); + } + } + + // Insert replacement plans as leaf plans. + for (auto& plans : applicableReplacementPlans) { + for (auto& plan : plans) { + connectedComponent.push_back(std::move(plan)); + } + } +} diff --git a/src/engine/QueryPlanner.h b/src/engine/QueryPlanner.h index 94c6ce9a30..9dd3a6f971 100644 --- a/src/engine/QueryPlanner.h +++ b/src/engine/QueryPlanner.h @@ -65,15 +65,8 @@ class QueryPlanner { Node(size_t id, SparqlTriple t, std::optional graphVariable = std::nullopt) : id_(id), triple_(std::move(t)) { - if (triple_.s_.isVariable()) { - _variables.insert(triple_.s_.getVariable()); - } - if (auto predicate = triple_.getPredicateVariable()) { - _variables.insert(predicate.value()); - } - if (triple_.o_.isVariable()) { - _variables.insert(triple_.o_.getVariable()); - } + triple_.forEachVariable( + [this](const auto& var) { _variables.insert(var); }); if (graphVariable.has_value()) { _variables.insert(std::move(graphVariable).value()); } @@ -450,6 +443,19 @@ class QueryPlanner { const SubtreePlan& b, const JoinColumns& jcs); + // Helper that generates `IndexScan` query plans on materialized views if they + // can be used to avoid joins between some of the `triples`. The resulting + // plans for part of the `triples` are given in a vector of query planning + // rounds in which they should be added to the planner. + // + // For example, at index 1 there is a vector of query plans that should be + // added in round 1 of the dynamic programming algorithm. For the greedy + // algorithm, the `useReplacementPlansForGreedyPlanner` helper handles the + // necessary steps. + using ReplacementPlans = std::vector>; + ReplacementPlans createMaterializedViewJoinReplacements( + const parsedQuery::BasicGraphPattern& triples) const; + vector getOrderByRow( const ParsedQuery& pq, const std::vector>& dpTab) const; @@ -575,7 +581,8 @@ class QueryPlanner { */ vector> fillDpTab( const TripleGraph& graph, std::vector fs, - TextLimitMap& textLimits, const vector>& children); + TextLimitMap& textLimits, const vector>& children, + ReplacementPlans replacementPlans); // Internal subroutine of `fillDpTab` that only works on a single connected // component of the input. Throws if the subtrees in the `connectedComponent` @@ -584,7 +591,8 @@ class QueryPlanner { runDynamicProgrammingOnConnectedComponent( std::vector connectedComponent, const FiltersAndOptionalSubstitutes& filters, - const TextLimitVec& textLimits, const TripleGraph& tg) const; + const TextLimitVec& textLimits, const TripleGraph& tg, + const ReplacementPlans& replacementPlans) const; // Same as `runDynamicProgrammingOnConnectedComponent`, but uses a greedy // algorithm that always greedily chooses the smallest result of the possible @@ -592,7 +600,8 @@ class QueryPlanner { std::vector runGreedyPlanningOnConnectedComponent( std::vector connectedComponent, const FiltersAndOptionalSubstitutes& filters, - const TextLimitVec& textLimits, const TripleGraph& tg) const; + const TextLimitVec& textLimits, const TripleGraph& tg, + const ReplacementPlans& replacementPlans) const; // Return the number of connected subgraphs is the `graph`, or `budget + 1`, // if the number of subgraphs is `> budget`. This is used to analyze the @@ -613,7 +622,7 @@ class QueryPlanner { // Given a `MaterializedViewQuery` construct a `SubtreePlan` for an // `IndexScan` operation on the requested materialized view. SubtreePlan getMaterializedViewIndexScanPlan( - const parsedQuery::MaterializedViewQuery& viewQuery); + const parsedQuery::MaterializedViewQuery& viewQuery) const; // An internal helper class that encapsulates the functionality to optimize // a single graph pattern. It tightly interacts with the outer `QueryPlanner` @@ -721,7 +730,36 @@ class QueryPlanner { static size_t findSmallestExecutionTree( const std::vector& lastRow); static size_t findUniqueNodeIds( - const std::vector& connectedComponent); + const std::vector& connectedComponent, + bool allowReplacementPlans = false); + + // Helper for `fillDpTab` that extracts a subset of possible + // `ReplacementPlans` that is applicable to a connected component given by the + // covered node ids of the component. + // + // If the greedy query planning mode is active, this function guarantees that + // the returned replacement plans are disjunctive with regard to their covered + // node ids. + // + // The function returns the applicable replacement plans and a boolean for + // quickly checking whether any were found. + // + // NOTE: This function is destructive w.r.t. `allReplacementPlans`: the used + // replacement plans are moved out. + static std::pair findApplicableReplacementPlans( + ReplacementPlans& allReplacementPlans, uint64_t coveredNodeIds, + bool useGreedyPlanning); + + // Helper for `fillDpTab` that inserts replacement plans into a connected + // component for greedy query planning. The `IndexScan` plans for triples + // covered by the replacement plans are filtered out, s.t. the greedy planner + // is forced to use the replacement plans. + // + // NOTE: For this to work correctly the nodes covered by the replacement plans + // must be disjunctive. + static void useReplacementPlansForGreedyPlanner( + ReplacementPlans& applicableReplacementPlans, + std::vector& connectedComponent); /// if this Planner is not associated with a queryExecutionContext we are only /// in the unit test mode diff --git a/src/global/RuntimeParameters.cpp b/src/global/RuntimeParameters.cpp index 5a409a2c32..5915513615 100644 --- a/src/global/RuntimeParameters.cpp +++ b/src/global/RuntimeParameters.cpp @@ -51,6 +51,7 @@ RuntimeParameters::RuntimeParameters() { add(materializedViewWriterMemory_); add(defaultQueryTimeout_); add(sortInMemoryThreshold_); + add(enableMaterializedViewQueryRewrite_); defaultQueryTimeout_.setParameterConstraint( [](std::chrono::seconds value, std::string_view parameterName) { diff --git a/src/global/RuntimeParameters.h b/src/global/RuntimeParameters.h index 67cdfbf3f8..b8679890ab 100644 --- a/src/global/RuntimeParameters.h +++ b/src/global/RuntimeParameters.h @@ -135,6 +135,11 @@ struct RuntimeParameters { MemorySizeParameter sortInMemoryThreshold_{ ad_utility::MemorySize::gigabytes(5), "sort-in-memory-threshold"}; + // If set, the query planner checks if suitable materialized views are loaded + // to substitute more expensive query plans. + Bool enableMaterializedViewQueryRewrite_{ + true, "enable-materialized-view-query-rewrite"}; + // ___________________________________________________________________________ // IMPORTANT NOTE: IF YOU ADD PARAMETERS ABOVE, ALSO REGISTER THEM IN THE // CONSTRUCTOR, S.T. THEY CAN ALSO BE ACCESSED VIA THE RUNTIME INTERFACE. diff --git a/src/parser/CMakeLists.txt b/src/parser/CMakeLists.txt index 8c76f41716..1629ea784d 100644 --- a/src/parser/CMakeLists.txt +++ b/src/parser/CMakeLists.txt @@ -30,5 +30,6 @@ add_library(parser Quads.cpp UpdateTriples.cpp MaterializedViewQuery.cpp + GraphPatternAnalysis.cpp ) qlever_target_link_libraries(parser sparqlParser parserData sparqlExpressions rdfEscaping global re2::re2 util engine index rdfTypes) diff --git a/src/parser/GraphPatternAnalysis.cpp b/src/parser/GraphPatternAnalysis.cpp new file mode 100644 index 0000000000..8969420ae4 --- /dev/null +++ b/src/parser/GraphPatternAnalysis.cpp @@ -0,0 +1,30 @@ +// Copyright 2026 The QLever Authors, in particular: +// +// 2026 Christoph Ullinger , UFR +// +// UFR = University of Freiburg, Chair of Algorithms and Data Structures + +#include "parser/GraphPatternAnalysis.h" + +namespace graphPatternAnalysis { + +// _____________________________________________________________________________ +bool BasicGraphPatternsInvariantTo::operator()( + const parsedQuery::Bind& bind) const { + return !variables_.contains(bind._target); +} + +// _____________________________________________________________________________ +bool BasicGraphPatternsInvariantTo::operator()( + const parsedQuery::Values& valuesClause) const { + const auto& [variables, values] = valuesClause._inlineValues; + return + // There is exactly one row inside the `VALUES`. + values.size() == 1 && + // The `VALUES` doesn't bind to any of the `variables_`. + ql::ranges::none_of(variables, [this](const auto& var) { + return variables_.contains(var); + }); +} + +} // namespace graphPatternAnalysis diff --git a/src/parser/GraphPatternAnalysis.h b/src/parser/GraphPatternAnalysis.h new file mode 100644 index 0000000000..91f7c659c0 --- /dev/null +++ b/src/parser/GraphPatternAnalysis.h @@ -0,0 +1,53 @@ +// Copyright 2026 The QLever Authors, in particular: +// +// 2026 Christoph Ullinger , UFR +// +// UFR = University of Freiburg, Chair of Algorithms and Data Structures + +#ifndef QLEVER_SRC_PARSER_GRAPHPATTERNANALYSIS_H_ +#define QLEVER_SRC_PARSER_GRAPHPATTERNANALYSIS_H_ + +#include "parser/GraphPatternOperation.h" + +// This module contains helpers for analyzing the structure of graph patterns. + +// _____________________________________________________________________________ +namespace graphPatternAnalysis { + +// Check whether certain graph patterns can be ignored when we are only +// interested in the bindings for variables from `variables_` as they do not +// affect the result for these `variables_`. +// +// For example: A basic graph pattern (a list of triples) is invariant to a +// `BIND` statement whose target variable is not contained in the basic graph +// pattern, because the `BIND` only adds its own column, but neither adds nor +// deletes result rows. +// +// This is currently used for the `MaterializedViewsManager`'s +// `QueryPatternCache`. +// +// NOTE: This does not guarantee completeness, so it might return `false` even +// though we could be invariant to a `GraphPatternOperation`. +struct BasicGraphPatternsInvariantTo { + ad_utility::HashSet variables_; + + bool operator()(const parsedQuery::Bind& bind) const; + bool operator()(const parsedQuery::Values& values) const; + + template + bool operator()(const T&) const { + // The presence of any of these operations might remove or duplicate rows. + namespace pq = parsedQuery; + static_assert( + ad_utility::SimilarToAny< + T, pq::Optional, pq::Union, pq::Subquery, pq::TransPath, + pq::BasicGraphPattern, pq::Service, pq::PathQuery, pq::SpatialQuery, + pq::TextSearchQuery, pq::Minus, pq::GroupGraphPattern, pq::Describe, + pq::Load, pq::NamedCachedResult, pq::MaterializedViewQuery>); + return false; + } +}; + +} // namespace graphPatternAnalysis + +#endif // QLEVER_SRC_PARSER_GRAPHPATTERNANALYSIS_H_ diff --git a/src/parser/GraphPatternOperation.cpp b/src/parser/GraphPatternOperation.cpp index b92e2eb46d..027adbbaaa 100644 --- a/src/parser/GraphPatternOperation.cpp +++ b/src/parser/GraphPatternOperation.cpp @@ -16,6 +16,7 @@ #include "parser/TripleComponent.h" #include "util/Exception.h" #include "util/Forward.h" +#include "util/VariantRangeFilter.h" namespace parsedQuery { @@ -81,4 +82,26 @@ void BasicGraphPattern::appendTriples(BasicGraphPattern other) { auto inner = _expression.getDescriptor(); return "BIND (" + inner + " AS " + _target.name() + ")"; } + +// ____________________________________________________________________________ +void BasicGraphPattern::collectAllContainedVariables( + ad_utility::HashSet& vars) const { + for (const SparqlTriple& t : _triples) { + t.forEachVariable([&vars](const auto& var) { vars.insert(var); }); + } +} + +// _____________________________________________________________________________ +ad_utility::HashSet getVariablesPresentInFirstBasicGraphPattern( + const std::vector& graphPatterns) { + ad_utility::HashSet vars; + auto basicGraphPatterns = + ad_utility::filterRangeOfVariantsByType( + graphPatterns); + if (!ql::ranges::empty(basicGraphPatterns)) { + (*basicGraphPatterns.begin()).collectAllContainedVariables(vars); + } + return vars; +} + } // namespace parsedQuery diff --git a/src/parser/GraphPatternOperation.h b/src/parser/GraphPatternOperation.h index 4f16be3f9a..9e6687d58a 100644 --- a/src/parser/GraphPatternOperation.h +++ b/src/parser/GraphPatternOperation.h @@ -80,8 +80,21 @@ struct BasicGraphPattern { std::vector _triples; /// Append the triples from `other` to this `BasicGraphPattern` void appendTriples(BasicGraphPattern other); + + // Collect all the `Variable`s present in this `BasicGraphPattern` and add + // them to a `HashSet`. + void collectAllContainedVariables(ad_utility::HashSet& vars) const; }; +// Extract all variables present in a the first `BasicGraphPattern` contained in +// a vector of `GraphPatternOperation`s. It is used for skipping some graph +// patterns in `MaterializedViewQueryAnalysis.cpp`. +// +// IMPORTANT: This function does not consider variables that are contained in +// other types of `GraphPatternOperation`s. +ad_utility::HashSet getVariablesPresentInFirstBasicGraphPattern( + const std::vector& graphPatterns); + /// A `Values` clause struct Values { SparqlValues _inlineValues; diff --git a/src/parser/MaterializedViewQuery.cpp b/src/parser/MaterializedViewQuery.cpp index 7ad56493e9..2ee31d2a4d 100644 --- a/src/parser/MaterializedViewQuery.cpp +++ b/src/parser/MaterializedViewQuery.cpp @@ -92,6 +92,12 @@ MaterializedViewQuery::MaterializedViewQuery(const SparqlTriple& triple) { addRequestedColumn(requestedColumn, simpleTriple.o_); } +// _____________________________________________________________________________ +MaterializedViewQuery::MaterializedViewQuery(std::string name, + RequestedColumns requestedColumns) + : viewName_{std::move(name)}, + requestedColumns_{std::move(requestedColumns)} {}; + // _____________________________________________________________________________ ad_utility::HashSet MaterializedViewQuery::getVarsToKeep() const { ad_utility::HashSet varsToKeep; diff --git a/src/parser/MaterializedViewQuery.h b/src/parser/MaterializedViewQuery.h index 4a44b4f9fc..886e239943 100644 --- a/src/parser/MaterializedViewQuery.h +++ b/src/parser/MaterializedViewQuery.h @@ -47,7 +47,8 @@ struct MaterializedViewQuery : MagicServiceQuery { // column names in the query result or literals/IRIs to restrict the column // on. This can be used for filtering the results and reading any number of // payload columns from the materialized view. - ad_utility::HashMap requestedColumns_; + using RequestedColumns = ad_utility::HashMap; + RequestedColumns requestedColumns_; // This constructor takes an IRI consisting of the magic service IRI for // materialized views with the view name as a suffix. If this is used, add the @@ -58,6 +59,9 @@ struct MaterializedViewQuery : MagicServiceQuery { // are necessary in this case. explicit MaterializedViewQuery(const SparqlTriple& triple); + // For query rewriting: Initialize directly using name and requested columns. + MaterializedViewQuery(std::string name, RequestedColumns requestedColumns); + void addParameter(const SparqlTriple& triple) override; // Return the variables that should be visible from this read on the diff --git a/src/parser/PropertyPath.cpp b/src/parser/PropertyPath.cpp index cccfad61fe..b405a70ccc 100644 --- a/src/parser/PropertyPath.cpp +++ b/src/parser/PropertyPath.cpp @@ -130,6 +130,18 @@ bool PropertyPath::isIri() const { return std::holds_alternative(path_); } +// _____________________________________________________________________________ +const std::vector& PropertyPath::getSequence() const { + AD_CONTRACT_CHECK(isSequence()); + return std::get(path_).children_; +} + +// _____________________________________________________________________________ +bool PropertyPath::isSequence() const { + return std::holds_alternative(path_) && + std::get(path_).modifier_ == Modifier::SEQUENCE; +} + // _____________________________________________________________________________ std::optional> PropertyPath::getChildOfInvertedPath() const { diff --git a/src/parser/PropertyPath.h b/src/parser/PropertyPath.h index 08fa199863..0daab58efe 100644 --- a/src/parser/PropertyPath.h +++ b/src/parser/PropertyPath.h @@ -134,6 +134,13 @@ class PropertyPath { // otherwise. bool isIri() const; + // If the path is a sequence, return the children (that is, the parts of the + // sequence). If the path is not a sequence this will throw. + const std::vector& getSequence() const; + + // Check if the path is a sequence. + bool isSequence() const; + // If the path is a modified path with an inverse modifier, return the pointer // to its only child. Otherwise, return nullptr. std::optional> diff --git a/src/parser/SparqlTriple.h b/src/parser/SparqlTriple.h index 2ba815c743..e0212a4a73 100644 --- a/src/parser/SparqlTriple.h +++ b/src/parser/SparqlTriple.h @@ -133,6 +133,19 @@ class SparqlTriple auto ptr = std::get_if(&p_); return (ptr != nullptr && *ptr == variable); } + + // Call a function for every variable contained in the triple. + void forEachVariable(auto function) const { + if (s_.isVariable()) { + function(s_.getVariable()); + } + if (auto predicate = getPredicateVariable()) { + function(predicate.value()); + } + if (o_.isVariable()) { + function(o_.getVariable()); + } + } }; #endif // QLEVER_SRC_PARSER_SPARQLTRIPLE_H diff --git a/src/util/StringPairHashMap.h b/src/util/StringPairHashMap.h new file mode 100644 index 0000000000..26a0083cd0 --- /dev/null +++ b/src/util/StringPairHashMap.h @@ -0,0 +1,71 @@ +// Copyright 2026 The QLever Authors, in particular: +// +// 2026 Christoph Ullinger , UFR +// +// UFR = University of Freiburg, Chair of Algorithms and Data Structures + +#ifndef QLEVER_SRC_UTIL_STRINGPAIRHASHMAP_H_ +#define QLEVER_SRC_UTIL_STRINGPAIRHASHMAP_H_ + +#include "util/HashMap.h" + +// This module provides a modified version of `ad_utility::HashMap` that uses +// pairs of strings as keys. Unlike the default hash map it allows looking up +// values with pairs of string views as keys. This is implemented using custom +// hash and equality operators. + +// TODO This could be extended to support `std::tuple` or +// `std::array`, not only `std::pair`, and other transparently comparable types. + +// _____________________________________________________________________________ +namespace ad_utility { + +// _____________________________________________________________________________ +namespace detail { + +using StringPair = std::pair; +using StringViewPair = std::pair; + +// _____________________________________________________________________________ +struct StringPairHash { + // Allows looking up values from a hash map with `StringPair` keys also with + // `StringViewPair`. + using is_transparent = void; + + size_t operator()(const StringPair& p) const { + return absl::HashOf(p.first, p.second); + } + + size_t operator()(const StringViewPair& p) const { + return absl::HashOf(p.first, p.second); + } +}; + +// _____________________________________________________________________________ +struct StringPairEq { + using is_transparent = void; + + bool operator()(const StringPair& a, const StringPair& b) const { + return a == b; + } + + bool operator()(const StringPair& a, const StringViewPair& b) const { + return a.first == b.first && a.second == b.second; + } + + bool operator()(const StringViewPair& a, const StringPair& b) const { + return b.first == a.first && b.second == a.second; + } +}; + +} // namespace detail + +template +using StringPairHashMap = + ad_utility::HashMap; + +} // namespace ad_utility + +#endif // QLEVER_SRC_UTIL_STRINGPAIRHASHMAP_H_ diff --git a/src/util/VariantRangeFilter.h b/src/util/VariantRangeFilter.h new file mode 100644 index 0000000000..b6a5661488 --- /dev/null +++ b/src/util/VariantRangeFilter.h @@ -0,0 +1,26 @@ +// Copyright 2026 The QLever Authors, in particular: +// +// 2026 Christoph Ullinger , UFR +// +// UFR = University of Freiburg, Chair of Algorithms and Data Structures + +#ifndef QLEVER_SRC_UTIL_VARIANTRANGEFILTER_H +#define QLEVER_SRC_UTIL_VARIANTRANGEFILTER_H + +#include "backports/algorithm.h" +#include "util/TransparentFunctors.h" + +namespace ad_utility { + +// Helper that filters a range, like `std::vector` which contains `std::variant` +// elements by a certain type `T` and returns a view of the contained values. +CPP_template(typename T, typename R)( + requires ql::ranges::range) auto filterRangeOfVariantsByType(const R& + range) { + return range | ql::views::filter(holdsAlternative) | + ql::views::transform(get); +} + +} // namespace ad_utility + +#endif // QLEVER_SRC_UTIL_VARIANTRANGEFILTER_H diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 32aa042017..6653abc2ea 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -193,6 +193,8 @@ addLinkAndDiscoverTestSerial(QueryPlannerSpatialJoinTest engine) addLinkAndDiscoverTestNoLibs(HashMapTest) +addLinkAndDiscoverTestNoLibs(StringPairHashMapTest) + addLinkAndDiscoverTest(HashSetTest) addLinkAndDiscoverTestSerial(GroupByTest engine) @@ -493,3 +495,5 @@ addLinkAndDiscoverTest(MaterializedViewsTest qlever engine server) addLinkAndDiscoverTestNoLibs(ConstexprMapTest) addLinkAndDiscoverTestNoLibs(ParallelExecutorTest) + +addLinkAndDiscoverTestNoLibs(VariantRangeFilterTest) diff --git a/test/MaterializedViewsTest.cpp b/test/MaterializedViewsTest.cpp index 887263df63..2809374d34 100644 --- a/test/MaterializedViewsTest.cpp +++ b/test/MaterializedViewsTest.cpp @@ -8,8 +8,10 @@ #include #include "./MaterializedViewsTestHelpers.h" +#include "./QueryPlannerTestHelpers.h" #include "./ServerTestHelpers.h" #include "./util/HttpRequestHelpers.h" +#include "./util/RuntimeParametersTestHelpers.h" #include "engine/IndexScan.h" #include "engine/MaterializedViews.h" #include "engine/QueryExecutionContext.h" @@ -556,6 +558,16 @@ TEST_F(MaterializedViewsTest, ManualConfigurations) { ::testing::Eq(V{"?o"}))); } + // Test internal constructor. + { + ViewQuery query{"testView", ViewQuery::RequestedColumns{ + {V{"?s"}, V{"?s2"}}, {V{"?o"}, V{"?o2"}}}}; + EXPECT_EQ(query.viewName_, "testView"); + EXPECT_THAT(query.getVarsToKeep(), + ::testing::UnorderedElementsAre(::testing::Eq(V{"?s2"}), + ::testing::Eq(V{"?o2"}))); + } + // Unsupported format version. { auto plan = qlv().parseAndPlanQuery(simpleWriteQuery_); @@ -786,3 +798,92 @@ TEST_F(MaterializedViewsTestLarge, LazyScan) { EXPECT_EQ(count.getInt(), 2 * numFakeSubjects_); } } + +// Example queries for testing query rewriting. +constexpr std::string_view simpleChain = "SELECT * { ?s ?m . ?m ?o }"; +constexpr std::string_view simpleChainRenamed = + "SELECT * { ?b ?c . ?a ?b }"; +constexpr std::string_view simpleChainFixed = + "SELECT * { / ?c . }"; +constexpr std::string_view simpleChainPlusJoin = + "SELECT * { ?s / ?o . ?s ?o2 }"; +constexpr std::string_view simpleChainRenamedPlusBind = + "SELECT ?a ?b ?c ?x { ?b ?c . ?a ?b . BIND(5 AS ?x) }"; + +// _____________________________________________________________________________ +TEST_P(MaterializedViewsQueryRewriteTest, simpleChain) { + namespace h = queryPlannerTestHelpers; + + RewriteTestParams p = GetParam(); + auto cleanup = + setRuntimeParameterForTest<&RuntimeParameters::queryPlanningBudget_>( + p.queryPlanningBudget_); + + // Test dataset and query. + const std::string chainTtl = + " . \n" + " . \n" + " . \n" + " . \n" + " \"abc\" . \n" + " . \n"; + const std::string onDiskBase = "_materializedViewRewriteChain"; + const std::string viewName = "testViewChain"; + + // Initialized libqlever. + materializedViewsTestHelpers::makeTestIndex(onDiskBase, chainTtl); + auto cleanUp = absl::MakeCleanup( + [&]() { materializedViewsTestHelpers::removeTestIndex(onDiskBase); }); + qlever::EngineConfig config; + config.baseName_ = onDiskBase; + qlever::Qlever qlv{config}; + + // Without the materialized view, a regular join is executed. + h::expect(std::string{simpleChain}, + h::Join(h::IndexScanFromStrings("?s", "", "?m"), + h::IndexScanFromStrings("?m", "", "?o"))); + + // Write a chain structure to the materialized view. + MaterializedViewsManager manager{onDiskBase}; + manager.writeViewToDisk(viewName, qlv.parseAndPlanQuery(p.writeQuery_)); + qlv.loadMaterializedView(viewName); + + // With the materialized view loaded, an index scan on the view is performed + // instead of a regular join. + auto qpExpect = [](qlever::Qlever& qlv, const auto& query, + ::testing::Matcher matcher, + source_location sourceLocation = AD_CURRENT_SOURCE_LOC()) { + auto l = generateLocationTrace(sourceLocation); + auto [qet, qec, parsed] = qlv.parseAndPlanQuery(std::string{query}); + EXPECT_THAT(*qet, matcher); + }; + auto viewScan = [](std::string a, std::string b, std::string c) { + return h::IndexScanFromStrings(std::move(a), std::move(b), std::move(c), + {Permutation::Enum::SPO}); + }; + + qpExpect(qlv, simpleChain, viewScan("?s", "?m", "?o")); + qpExpect(qlv, simpleChainRenamed, viewScan("?a", "?b", "?c")); + qpExpect(qlv, simpleChainFixed, + viewScan("", "?_QLever_internal_variable_qp_0", "?c")); + qpExpect(qlv, simpleChainPlusJoin, + h::Join(viewScan("?s", "?_QLever_internal_variable_qp_0", "?o"), + h::IndexScanFromStrings("?s", "", "?o2"))); + + // TODO Test overlapping view plans. +} + +// _____________________________________________________________________________ +INSTANTIATE_TEST_SUITE_P( + MaterializedViewsTest, MaterializedViewsQueryRewriteTest, + ::testing::Values( + // Default case. + RewriteTestParams{std::string{simpleChain}, 1500}, + + // Default query for writing the materialized view, but forced greedy + // planning. + RewriteTestParams{std::string{simpleChain}, 1}, + + // An additional `BIND` is ignored and the view can still be used for + // query rewriting. Also uses a different sorting. + RewriteTestParams{std::string{simpleChainRenamedPlusBind}, 1500})); diff --git a/test/MaterializedViewsTestHelpers.h b/test/MaterializedViewsTestHelpers.h index 2829fe5490..5b1d98438a 100644 --- a/test/MaterializedViewsTestHelpers.h +++ b/test/MaterializedViewsTestHelpers.h @@ -143,6 +143,29 @@ class MaterializedViewsTestLarge : public MaterializedViewsTest { } }; +// _____________________________________________________________________________ +struct RewriteTestParams { + // Query to write the test view. + std::string writeQuery_; + + // Enforce a query planning budget to allow testing the greedy query planner + // with toy examples. + size_t queryPlanningBudget_; +}; + +// _____________________________________________________________________________ +class MaterializedViewsQueryRewriteTest + : public ::testing::TestWithParam { + protected: + std::stringstream log_; + + // ___________________________________________________________________________ + void SetUp() override { ad_utility::setGlobalLoggingStream(&log_); } + + // ___________________________________________________________________________ + void TearDown() override { ad_utility::setGlobalLoggingStream(&std::cout); } +}; + } // namespace materializedViewsTestHelpers #endif // QLEVER_TEST_MATERIALIZEDVIEWSTESTHELPERS_H_ diff --git a/test/StringPairHashMapTest.cpp b/test/StringPairHashMapTest.cpp new file mode 100644 index 0000000000..d6ee1b3dfd --- /dev/null +++ b/test/StringPairHashMapTest.cpp @@ -0,0 +1,62 @@ +// Copyright 2026 The QLever Authors, in particular: +// +// 2026 Christoph Ullinger , UFR +// +// UFR = University of Freiburg, Chair of Algorithms and Data Structures + +#include + +#include "util/StringPairHashMap.h" + +// _____________________________________________________________________________ +TEST(StringPairHashMapTest, InsertAndLookup) { + ad_utility::StringPairHashMap map; + + using ad_utility::detail::StringPair; + using ad_utility::detail::StringViewPair; + + // Insert using `std::string` pairs. + map[StringPair{"hello", "world"}] = 7; + map[StringPair{"foo", "bar"}] = 42; + + ASSERT_EQ(map.size(), 2u); + + // Lookup using `std::string_view` pairs. + auto it = map.find(StringViewPair{"hello", "world"}); + ASSERT_NE(it, map.end()); + ASSERT_EQ(it->second, 7); + + EXPECT_EQ(map.count(StringViewPair{"foo", "bar"}), 1u); + EXPECT_EQ(map.count(StringViewPair{"does not", "exist"}), 0u); +} + +// _____________________________________________________________________________ +TEST(StringPairHashMapTest, StringPairEq) { + using ad_utility::detail::StringPair; + using ad_utility::detail::StringViewPair; + ad_utility::detail::StringPairEq eq; + + StringPair a{"a", "b"}; + StringPair b{"x", "y"}; + StringPair c{"x", "g"}; + + EXPECT_TRUE(eq(a, a)); + EXPECT_FALSE(eq(a, b)); + EXPECT_FALSE(eq(a, c)); + + StringViewPair aEq{"a", "b"}; + StringViewPair aNe{"a", "c"}; + StringViewPair bNe{"f", "g"}; + + EXPECT_TRUE(eq(a, aEq)); + EXPECT_FALSE(eq(a, aNe)); + EXPECT_FALSE(eq(b, bNe)); + + EXPECT_TRUE(eq(aEq, a)); + EXPECT_FALSE(eq(aNe, a)); + EXPECT_FALSE(eq(bNe, b)); + + StringViewPair aSv{"a", "b"}; + + EXPECT_TRUE(eq(a, aSv)); +} diff --git a/test/VariantRangeFilterTest.cpp b/test/VariantRangeFilterTest.cpp new file mode 100644 index 0000000000..ca9036fd56 --- /dev/null +++ b/test/VariantRangeFilterTest.cpp @@ -0,0 +1,38 @@ +// Copyright 2026 The QLever Authors, in particular: +// +// 2026 Christoph Ullinger , UFR +// +// UFR = University of Freiburg, Chair of Algorithms and Data Structures + +#include + +#include "./util/GTestHelpers.h" +#include "util/VariantRangeFilter.h" + +namespace { + +// Helper for testing `filterRangeOfVariantsByType`. +template +void expectFilteredRange( + std::vector input, std::vector expected, + ad_utility::source_location location = AD_CURRENT_SOURCE_LOC()) { + auto l = generateLocationTrace(location); + auto matcher = liftMatcherToElementsAreArray>( + [](auto value) { return ::testing::Eq(value); }); + auto actual = ad_utility::filterRangeOfVariantsByType(input) | + ::ranges::to>; + EXPECT_THAT(actual, matcher(expected)); +} + +// _____________________________________________________________________________ +TEST(VariantRangeFilterTest, Test) { + using V = std::variant; + std::vector vec{1, 'c', true, false, true, 3, 'f'}; + + expectFilteredRange(vec, {1, 3}); + expectFilteredRange(vec, {'c', 'f'}); + expectFilteredRange(vec, {true, false, true}); + expectFilteredRange(vec, {}); +} + +} // namespace diff --git a/test/parser/CMakeLists.txt b/test/parser/CMakeLists.txt index 8810d5f332..45cfc320e7 100644 --- a/test/parser/CMakeLists.txt +++ b/test/parser/CMakeLists.txt @@ -8,6 +8,7 @@ addLinkAndDiscoverTest(BlankNodeExpressionTest engine) addLinkAndDiscoverTest(PropertyPathTest parser) addLinkAndDiscoverTest(UpdateTriplesTest parser) addLinkAndDiscoverTest(NamedCachedResultTest parser) +addLinkAndDiscoverTest(GraphPatternOperationTest parser) addLinkAndDiscoverTestSerial(SparqlAntlrParserTest parser engine) addLinkAndDiscoverTestSerial(SparqlAntlrParserUpdateTest parser engine) addLinkAndDiscoverTestSerial(SparqlAntlrParserExpressionTest parser sparqlExpressions engine) diff --git a/test/parser/GraphPatternOperationTest.cpp b/test/parser/GraphPatternOperationTest.cpp new file mode 100644 index 0000000000..71b11e9aae --- /dev/null +++ b/test/parser/GraphPatternOperationTest.cpp @@ -0,0 +1,38 @@ +// Copyright 2026 The QLever Authors, in particular: +// +// 2026 Christoph Ullinger , UFR +// +// UFR = University of Freiburg, Chair of Algorithms and Data Structures + +#include + +#include "parser/GraphPatternOperation.h" +#include "parser/SparqlTriple.h" + +// _____________________________________________________________________________ +TEST(GraphPatternOperationTest, BasicPatternContainedVars) { + SparqlTripleSimple example1{Variable{"?s"}, Variable{"?p"}, Variable{"?o"}}; + SparqlTripleSimple example2{ + ad_utility::triple_component::Iri::fromIriref(""), + ad_utility::triple_component::Iri::fromIriref("

"), Variable{"?o2"}}; + + auto triple1 = SparqlTriple::fromSimple(example1); + auto triple2 = SparqlTriple::fromSimple(example2); + + parsedQuery::BasicGraphPattern bgp{{triple1, triple2}}; + + ad_utility::HashSet vars; + bgp.collectAllContainedVariables(vars); + auto expectedVarsMatcher = ::testing::UnorderedElementsAre( + ::testing::Eq(Variable{"?s"}), ::testing::Eq(Variable{"?p"}), + ::testing::Eq(Variable{"?o"}), ::testing::Eq(Variable{"?o2"})); + EXPECT_THAT(vars, expectedVarsMatcher); + + parsedQuery::Bind bind{ + sparqlExpression::SparqlExpressionPimpl::makeVariableExpression( + Variable{"?x"}), + Variable{"?y"}}; + std::vector graphPatterns{bind, bgp}; + EXPECT_THAT(getVariablesPresentInFirstBasicGraphPattern(graphPatterns), + expectedVarsMatcher); +} diff --git a/test/parser/PropertyPathTest.cpp b/test/parser/PropertyPathTest.cpp index 0d99b1791b..de2cd749e1 100644 --- a/test/parser/PropertyPathTest.cpp +++ b/test/parser/PropertyPathTest.cpp @@ -288,3 +288,33 @@ TEST(PropertyPath, handlePath) { }), 2); } + +// _____________________________________________________________________________ +TEST(PropertyPath, Getters) { + auto path1 = PropertyPath::fromIri(iri1); + EXPECT_TRUE(path1.isIri()); + EXPECT_FALSE(path1.isSequence()); + EXPECT_EQ(path1.getIri(), iri1); + + auto path2 = PropertyPath::makeInverse(PropertyPath::fromIri(iri1)); + EXPECT_FALSE(path2.isIri()); + EXPECT_FALSE(path2.isSequence()); + + auto path3 = PropertyPath::makeAlternative( + {PropertyPath::fromIri(iri1), PropertyPath::fromIri(iri2)}); + EXPECT_FALSE(path3.isIri()); + EXPECT_FALSE(path3.isSequence()); + + auto path4 = PropertyPath::makeSequence( + {PropertyPath::fromIri(iri1), PropertyPath::fromIri(iri2)}); + EXPECT_FALSE(path4.isIri()); + EXPECT_TRUE(path4.isSequence()); + auto matchIri = [](ad_utility::triple_component::Iri iri) + -> ::testing::Matcher { + return ::testing::AllOf( + ::testing::Property(&PropertyPath::isIri, ::testing::IsTrue()), + ::testing::Property(&PropertyPath::getIri, ::testing::Eq(iri))); + }; + EXPECT_THAT(path4.getSequence(), + ::testing::ElementsAre(matchIri(iri1), matchIri(iri2))); +}