-
Notifications
You must be signed in to change notification settings - Fork 107
Add utilities in preparation for materialized view query rewriting #2692
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
98eab72
04d0143
76509c7
e8ff99e
44e4f89
7d0fc41
2d34cfc
a2ea70b
cd263f4
41b733c
af74163
95d0256
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,30 @@ | ||
| // Copyright 2026 The QLever Authors, in particular: | ||
| // | ||
| // 2026 Christoph Ullinger <[email protected]>, UFR | ||
| // | ||
| // UFR = University of Freiburg, Chair of Algorithms and Data Structures | ||
|
|
||
| #include "parser/GraphPatternAnalysis.h" | ||
|
|
||
| namespace graphPatternAnalysis { | ||
|
|
||
| // _____________________________________________________________________________ | ||
| bool BasicGraphPatternsInvariantTo::operator()( | ||
| const parsedQuery::Bind& bind) const { | ||
| return !variables_.contains(bind._target); | ||
| } | ||
|
|
||
| // _____________________________________________________________________________ | ||
| bool BasicGraphPatternsInvariantTo::operator()( | ||
| const parsedQuery::Values& valuesClause) const { | ||
| const auto& [variables, values] = valuesClause._inlineValues; | ||
| return | ||
| // There is exactly one row inside the `VALUES`. | ||
| values.size() == 1 && | ||
| // The `VALUES` doesn't bind to any of the `variables_`. | ||
| ql::ranges::none_of(variables, [this](const auto& var) { | ||
| return variables_.contains(var); | ||
| }); | ||
| } | ||
|
|
||
| } // namespace graphPatternAnalysis | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,53 @@ | ||
| // Copyright 2026 The QLever Authors, in particular: | ||
| // | ||
| // 2026 Christoph Ullinger <[email protected]>, UFR | ||
| // | ||
| // UFR = University of Freiburg, Chair of Algorithms and Data Structures | ||
|
|
||
| #ifndef QLEVER_SRC_PARSER_GRAPHPATTERNANALYSIS_H_ | ||
| #define QLEVER_SRC_PARSER_GRAPHPATTERNANALYSIS_H_ | ||
|
|
||
| #include "parser/GraphPatternOperation.h" | ||
|
|
||
| // This module contains helpers for analyzing the structure of graph patterns. | ||
|
|
||
| // _____________________________________________________________________________ | ||
| namespace graphPatternAnalysis { | ||
|
|
||
| // Check whether certain graph patterns can be ignored when we are only | ||
| // interested in the bindings for variables from `variables_` as they do not | ||
| // affect the result for these `variables_`. | ||
| // | ||
| // For example: A basic graph pattern (a list of triples) is invariant to a | ||
| // `BIND` statement whose target variable is not contained in the basic graph | ||
| // pattern, because the `BIND` only adds its own column, but neither adds nor | ||
| // deletes result rows. | ||
| // | ||
|
Comment on lines
+18
to
+25
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this statement actually true: Or will you in this case consider filter the whole thing out, because you see the filter and EVERYTHING has to be invariant? in that case please comment this, that this is only true for one step. But otherwise: The comment is now great to understand |
||
| // This is currently used for the `MaterializedViewsManager`'s | ||
| // `QueryPatternCache`. | ||
| // | ||
| // NOTE: This does not guarantee completeness, so it might return `false` even | ||
| // though we could be invariant to a `GraphPatternOperation`. | ||
| struct BasicGraphPatternsInvariantTo { | ||
| ad_utility::HashSet<Variable> variables_; | ||
|
|
||
| bool operator()(const parsedQuery::Bind& bind) const; | ||
| bool operator()(const parsedQuery::Values& values) const; | ||
|
|
||
| template <typename T> | ||
| bool operator()(const T&) const { | ||
| // The presence of any of these operations might remove or duplicate rows. | ||
| namespace pq = parsedQuery; | ||
| static_assert( | ||
| ad_utility::SimilarToAny< | ||
| T, pq::Optional, pq::Union, pq::Subquery, pq::TransPath, | ||
| pq::BasicGraphPattern, pq::Service, pq::PathQuery, pq::SpatialQuery, | ||
| pq::TextSearchQuery, pq::Minus, pq::GroupGraphPattern, pq::Describe, | ||
| pq::Load, pq::NamedCachedResult, pq::MaterializedViewQuery>); | ||
| return false; | ||
| } | ||
| }; | ||
|
|
||
| } // namespace graphPatternAnalysis | ||
|
|
||
| #endif // QLEVER_SRC_PARSER_GRAPHPATTERNANALYSIS_H_ | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -16,6 +16,7 @@ | |
| #include "parser/TripleComponent.h" | ||
| #include "util/Exception.h" | ||
| #include "util/Forward.h" | ||
| #include "util/VariantRangeFilter.h" | ||
|
|
||
| namespace parsedQuery { | ||
|
|
||
|
|
@@ -81,4 +82,26 @@ void BasicGraphPattern::appendTriples(BasicGraphPattern other) { | |
| auto inner = _expression.getDescriptor(); | ||
| return "BIND (" + inner + " AS " + _target.name() + ")"; | ||
| } | ||
|
|
||
| // ____________________________________________________________________________ | ||
| void BasicGraphPattern::collectAllContainedVariables( | ||
| ad_utility::HashSet<Variable>& vars) const { | ||
| for (const SparqlTriple& t : _triples) { | ||
| t.forEachVariable([&vars](const auto& var) { vars.insert(var); }); | ||
| } | ||
| } | ||
|
|
||
| // _____________________________________________________________________________ | ||
| ad_utility::HashSet<Variable> getVariablesPresentInFirstBasicGraphPattern( | ||
| const std::vector<parsedQuery::GraphPatternOperation>& graphPatterns) { | ||
| ad_utility::HashSet<Variable> vars; | ||
| auto basicGraphPatterns = | ||
| ad_utility::filterRangeOfVariantsByType<parsedQuery::BasicGraphPattern>( | ||
| graphPatterns); | ||
| if (!ql::ranges::empty(basicGraphPatterns)) { | ||
| (*basicGraphPatterns.begin()).collectAllContainedVariables(vars); | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why not |
||
| } | ||
| return vars; | ||
| } | ||
|
|
||
| } // namespace parsedQuery | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -80,8 +80,21 @@ struct BasicGraphPattern { | |
| std::vector<SparqlTriple> _triples; | ||
| /// Append the triples from `other` to this `BasicGraphPattern` | ||
| void appendTriples(BasicGraphPattern other); | ||
|
|
||
| // Collect all the `Variable`s present in this `BasicGraphPattern` and add | ||
| // them to a `HashSet`. | ||
| void collectAllContainedVariables(ad_utility::HashSet<Variable>& vars) const; | ||
| }; | ||
|
|
||
| // Extract all variables present in a the first `BasicGraphPattern` contained in | ||
| // a vector of `GraphPatternOperation`s. It is used for skipping some graph | ||
| // patterns in `MaterializedViewQueryAnalysis.cpp`. | ||
| // | ||
|
Comment on lines
+89
to
+92
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The comment is a little ambigious:
|
||
| // IMPORTANT: This function does not consider variables that are contained in | ||
| // other types of `GraphPatternOperation`s. | ||
| ad_utility::HashSet<Variable> getVariablesPresentInFirstBasicGraphPattern( | ||
| const std::vector<parsedQuery::GraphPatternOperation>& graphPatterns); | ||
|
|
||
| /// A `Values` clause | ||
| struct Values { | ||
| SparqlValues _inlineValues; | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -133,6 +133,19 @@ class SparqlTriple | |
| auto ptr = std::get_if<Variable>(&p_); | ||
| return (ptr != nullptr && *ptr == variable); | ||
| } | ||
|
|
||
| // Call a function for every variable contained in the triple. | ||
| void forEachVariable(auto function) const { | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Add a requirement (invocable with a |
||
| if (s_.isVariable()) { | ||
| function(s_.getVariable()); | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. consider using |
||
| } | ||
| if (auto predicate = getPredicateVariable()) { | ||
| function(predicate.value()); | ||
| } | ||
| if (o_.isVariable()) { | ||
| function(o_.getVariable()); | ||
| } | ||
| } | ||
| }; | ||
|
|
||
| #endif // QLEVER_SRC_PARSER_SPARQLTRIPLE_H | ||
| Original file line number | Diff line number | Diff line change | ||||||||
|---|---|---|---|---|---|---|---|---|---|---|
| @@ -0,0 +1,71 @@ | ||||||||||
| // Copyright 2026 The QLever Authors, in particular: | ||||||||||
| // | ||||||||||
| // 2026 Christoph Ullinger <[email protected]>, UFR | ||||||||||
| // | ||||||||||
| // UFR = University of Freiburg, Chair of Algorithms and Data Structures | ||||||||||
|
|
||||||||||
| #ifndef QLEVER_SRC_UTIL_STRINGPAIRHASHMAP_H_ | ||||||||||
| #define QLEVER_SRC_UTIL_STRINGPAIRHASHMAP_H_ | ||||||||||
|
|
||||||||||
| #include "util/HashMap.h" | ||||||||||
|
|
||||||||||
| // This module provides a modified version of `ad_utility::HashMap` that uses | ||||||||||
| // pairs of strings as keys. Unlike the default hash map it allows looking up | ||||||||||
| // values with pairs of string views as keys. This is implemented using custom | ||||||||||
| // hash and equality operators. | ||||||||||
|
Comment on lines
+14
to
+15
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
(give people something to google:)) |
||||||||||
|
|
||||||||||
| // TODO<ullingerc> This could be extended to support `std::tuple` or | ||||||||||
| // `std::array`, not only `std::pair`, and other transparently comparable types. | ||||||||||
|
|
||||||||||
| // _____________________________________________________________________________ | ||||||||||
| namespace ad_utility { | ||||||||||
|
|
||||||||||
| // _____________________________________________________________________________ | ||||||||||
| namespace detail { | ||||||||||
|
|
||||||||||
| using StringPair = std::pair<std::string, std::string>; | ||||||||||
| using StringViewPair = std::pair<std::string_view, std::string_view>; | ||||||||||
|
|
||||||||||
| // _____________________________________________________________________________ | ||||||||||
| struct StringPairHash { | ||||||||||
| // Allows looking up values from a hash map with `StringPair` keys also with | ||||||||||
| // `StringViewPair`. | ||||||||||
|
Comment on lines
+31
to
+32
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Have you checked that absl doesn't already provide this (e.g. transparent hashing of tuple-like types?), same for the equality, isn't there an implicit and transparent equality for tuples in the STL (haven't checked yet myself, just want to make sure that this is needed). |
||||||||||
| using is_transparent = void; | ||||||||||
|
|
||||||||||
| size_t operator()(const StringPair& p) const { | ||||||||||
| return absl::HashOf(p.first, p.second); | ||||||||||
| } | ||||||||||
|
|
||||||||||
| size_t operator()(const StringViewPair& p) const { | ||||||||||
| return absl::HashOf(p.first, p.second); | ||||||||||
| } | ||||||||||
| }; | ||||||||||
|
|
||||||||||
| // _____________________________________________________________________________ | ||||||||||
| struct StringPairEq { | ||||||||||
| using is_transparent = void; | ||||||||||
|
|
||||||||||
| bool operator()(const StringPair& a, const StringPair& b) const { | ||||||||||
| return a == b; | ||||||||||
| } | ||||||||||
|
|
||||||||||
| bool operator()(const StringPair& a, const StringViewPair& b) const { | ||||||||||
| return a.first == b.first && a.second == b.second; | ||||||||||
| } | ||||||||||
|
|
||||||||||
| bool operator()(const StringViewPair& a, const StringPair& b) const { | ||||||||||
| return b.first == a.first && b.second == a.second; | ||||||||||
| } | ||||||||||
| }; | ||||||||||
|
|
||||||||||
| } // namespace detail | ||||||||||
|
|
||||||||||
| template <typename ValueType> | ||||||||||
| using StringPairHashMap = | ||||||||||
| ad_utility::HashMap<ad_utility::detail::StringPair, ValueType, | ||||||||||
|
Comment on lines
+63
to
+65
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Does this support types other than |
||||||||||
| ad_utility::detail::StringPairHash, | ||||||||||
| ad_utility::detail::StringPairEq>; | ||||||||||
|
|
||||||||||
| } // namespace ad_utility | ||||||||||
|
|
||||||||||
| #endif // QLEVER_SRC_UTIL_STRINGPAIRHASHMAP_H_ | ||||||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,26 @@ | ||
| // Copyright 2026 The QLever Authors, in particular: | ||
| // | ||
| // 2026 Christoph Ullinger <[email protected]>, UFR | ||
| // | ||
| // UFR = University of Freiburg, Chair of Algorithms and Data Structures | ||
|
|
||
| #ifndef QLEVER_SRC_UTIL_VARIANTRANGEFILTER_H | ||
| #define QLEVER_SRC_UTIL_VARIANTRANGEFILTER_H | ||
|
|
||
| #include "backports/algorithm.h" | ||
| #include "util/TransparentFunctors.h" | ||
|
|
||
| namespace ad_utility { | ||
|
|
||
| // Helper that filters a range, like `std::vector` which contains `std::variant` | ||
| // elements by a certain type `T` and returns a view of the contained values. | ||
| CPP_template(typename T, typename R)( | ||
| requires ql::ranges::range<R>) auto filterRangeOfVariantsByType(const R& | ||
| range) { | ||
|
Comment on lines
+17
to
+19
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. doesn't need to be constrained to |
||
| return range | ql::views::filter(holdsAlternative<T>) | | ||
| ql::views::transform(get<T>); | ||
| } | ||
|
|
||
| } // namespace ad_utility | ||
|
|
||
| #endif // QLEVER_SRC_UTIL_VARIANTRANGEFILTER_H | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Technically this module is currently untested (at least I cannot find the tests, otherwise please point me to them). I am not sure if this is a hard problem for me, but they should be easy to vibecode etc.