Skip to content

Commit 5c9f444

Browse files
author
Hannah Bast
committed
Merge remote-tracking branch 'origin/master' into update-dockerfile-to-ubuntu-2204
2 parents c0d9e18 + 39ca684 commit 5c9f444

File tree

102 files changed

+7875
-3673
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

102 files changed

+7875
-3673
lines changed

Dockerfile

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,16 +8,21 @@ RUN apt-get update && apt-get install -y software-properties-common wget && add-
88
RUN wget https://apt.kitware.com/kitware-archive.sh && chmod +x kitware-archive.sh &&./kitware-archive.sh
99

1010
FROM base AS builder
11+
ARG TARGETPLATFORM
1112
RUN apt-get update && apt-get install -y build-essential cmake libicu-dev tzdata pkg-config uuid-runtime uuid-dev git libjemalloc-dev ninja-build libzstd-dev libssl-dev libboost1.83-dev libboost-program-options1.83-dev libboost-iostreams1.83-dev libboost-url1.83-dev
1213

1314
COPY . /qlever/
1415

1516
WORKDIR /qlever/
1617
ENV DEBIAN_FRONTEND=noninteractive
1718

19+
# Don't build and run tests on ARM64, as it takes too long on GitHub actions.
20+
# TODO: re-enable these tests as soon as we can use a native ARM64 platform to compile the docker container.
1821
WORKDIR /qlever/build/
19-
RUN cmake -DCMAKE_BUILD_TYPE=Release -DLOGLEVEL=INFO -DUSE_PARALLEL=true -D_NO_TIMING_TESTS=ON -GNinja .. && ninja
20-
RUN ctest --rerun-failed --output-on-failure
22+
RUN cmake -DCMAKE_BUILD_TYPE=Release -DLOGLEVEL=INFO -DUSE_PARALLEL=true -D_NO_TIMING_TESTS=ON -GNinja ..
23+
RUN if [ $TARGETPLATFORM = "linux/arm64" ] ; then echo "target is ARM64, don't build tests to avoid timeout"; fi
24+
RUN if [ $TARGETPLATFORM = "linux/arm64" ] ; then cmake --build . --target IndexBuilderMain ServerMain; else cmake --build . ; fi
25+
RUN if [ $TARGETPLATFORM = "linux/arm64" ] ; then echo "Skipping tests for ARM64" ; else ctest --rerun-failed --output-on-failure ; fi
2126

2227
FROM base AS runtime
2328
WORKDIR /qlever

src/ServerMain.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,12 @@ int main(int argc, char** argv) {
111111
optionFactory.getProgramOption<"service-max-value-rows">(),
112112
"The maximal number of result rows to be passed to a SERVICE operation "
113113
"as a VALUES clause to optimize its computation.");
114+
add("throw-on-unbound-variables",
115+
optionFactory.getProgramOption<"throw-on-unbound-variables">(),
116+
"If set to true, the queries that use GROUP BY, BIND, or ORDER BY with "
117+
"variables that are unbound in the query throw an exception. These "
118+
"queries technically are allowed by the SPARQL standard, but typically "
119+
"are the result of typos and unintended by the user");
114120
po::variables_map optionsMap;
115121

116122
try {

src/engine/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,5 +13,5 @@ add_library(engine
1313
VariableToColumnMap.cpp ExportQueryExecutionTrees.cpp
1414
CartesianProductJoin.cpp TextIndexScanForWord.cpp TextIndexScanForEntity.cpp
1515
TextLimit.cpp LazyGroupBy.cpp GroupByHashMapOptimization.cpp SpatialJoin.cpp
16-
CountConnectedSubgraphs.cpp SpatialJoinAlgorithms.cpp PathSearch.cpp)
16+
CountConnectedSubgraphs.cpp SpatialJoinAlgorithms.cpp PathSearch.cpp ExecuteUpdate.cpp)
1717
qlever_target_link_libraries(engine util index parser sparqlExpressions http SortPerformanceEstimator Boost::iostreams s2)

src/engine/CartesianProductJoin.cpp

Lines changed: 212 additions & 94 deletions
Large diffs are not rendered by default.

src/engine/CartesianProductJoin.h

Lines changed: 44 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ class CartesianProductJoin : public Operation {
1515

1616
private:
1717
Children children_;
18+
size_t chunkSize_;
1819

1920
// Access to the actual operations of the children.
2021
// TODO<joka921> We can move this whole children management into a base class
@@ -33,9 +34,11 @@ class CartesianProductJoin : public Operation {
3334

3435
public:
3536
// Constructor. `children` must not be empty and the variables of all the
36-
// children must be disjoint, else an `AD_CONTRACT_CHECK` fails.
37+
// children must be disjoint, else an `AD_CONTRACT_CHECK` fails. Accept a
38+
// custom `chunkSize` for chunking lazy results.
3739
explicit CartesianProductJoin(QueryExecutionContext* executionContext,
38-
Children children);
40+
Children children,
41+
size_t chunkSize = 1'000'000);
3942

4043
/// get non-owning pointers to all the held subtrees to actually use the
4144
/// Execution Trees as trees
@@ -77,16 +80,49 @@ class CartesianProductJoin : public Operation {
7780

7881
private:
7982
//! Compute the result of the query-subtree rooted at this element..
80-
ProtoResult computeResult([[maybe_unused]] bool requestLaziness) override;
83+
ProtoResult computeResult(bool requestLaziness) override;
8184

8285
// Copy each element from the `inputColumn` `groupSize` times to the
8386
// `targetColumn`. Repeat until the `targetColumn` is completely filled. Skip
8487
// the first `offset` write operations to the `targetColumn`. Call
85-
// `checkCancellation` after each write. If `StaticGroupSize != 0`, then the
86-
// group size is known at compile time which allows for more efficient loop
87-
// processing for very small group sizes.
88-
template <size_t StaticGroupSize = 0>
88+
// `checkCancellation` after each write.
8989
void writeResultColumn(std::span<Id> targetColumn,
9090
std::span<const Id> inputColumn, size_t groupSize,
91-
size_t offset);
91+
size_t offset) const;
92+
93+
// Write all columns of the subresults into an `IdTable` and return it.
94+
// `offset` indicates how many rows to skip in the result and `limit` how many
95+
// rows to write at most. `lastTableOffset` is the offset of the last table,
96+
// to account for cases where the last table does not cover the whole result
97+
// and so index 0 of a table does not correspond to row 0 of the result.
98+
IdTable writeAllColumns(std::ranges::random_access_range auto idTables,
99+
size_t offset, size_t limit,
100+
size_t lastTableOffset = 0) const;
101+
102+
// Calculate the subresults of the children and store them into a vector. If
103+
// the rightmost child can produce a lazy result, it will be stored outside of
104+
// the vector and returned as the first element of the pair. Otherwise this
105+
// will be an empty shared_ptr. The vector is guaranteed to only contain fully
106+
// materialized results.
107+
std::pair<std::vector<std::shared_ptr<const Result>>,
108+
std::shared_ptr<const Result>>
109+
calculateSubResults(bool requestLaziness);
110+
111+
// Take a range of `IdTable`s and a corresponding `LocalVocab` and yield
112+
// `IdTable`s with sizes up to `chunkSize_` until the limit is reached.
113+
// `offset` indicates the total offset of the desired result.
114+
// `limit` is the maximum number of rows to yield.
115+
// `lastTableOffset` is the offset of the last table in the range. This is
116+
// used to handle `IdTable`s yielded by generators where the range of indices
117+
// they represent do not cover the whole result.
118+
Result::Generator produceTablesLazily(LocalVocab mergedVocab,
119+
std::ranges::range auto idTables,
120+
size_t offset, size_t limit,
121+
size_t lastTableOffset = 0) const;
122+
123+
// Similar to `produceTablesLazily` but can handle a single lazy result.
124+
Result::Generator createLazyConsumer(
125+
LocalVocab staticMergedVocab,
126+
std::vector<std::shared_ptr<const Result>> subresults,
127+
std::shared_ptr<const Result> lazyResult) const;
92128
};

src/engine/CountAvailablePredicates.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -165,9 +165,10 @@ void CountAvailablePredicates::computePatternTrickAllEntities(
165165
TripleComponent::Iri::fromIriref(HAS_PATTERN_PREDICATE), std::nullopt,
166166
std::nullopt}
167167
.toScanSpecification(index);
168-
auto fullHasPattern = index.getPermutation(Permutation::Enum::PSO)
169-
.lazyScan(scanSpec, std::nullopt, {},
170-
cancellationHandle_, deltaTriples());
168+
auto fullHasPattern =
169+
index.getPermutation(Permutation::Enum::PSO)
170+
.lazyScan(scanSpec, std::nullopt, {}, cancellationHandle_,
171+
locatedTriplesSnapshot());
171172
for (const auto& idTable : fullHasPattern) {
172173
for (const auto& patternId : idTable.getColumn(1)) {
173174
AD_CORRECTNESS_CHECK(patternId.getDatatype() == Datatype::Int);

src/engine/ExecuteUpdate.cpp

Lines changed: 175 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,175 @@
1+
// Copyright 2024, University of Freiburg,
2+
// Chair of Algorithms and Data Structures.
3+
// Author: Julian Mundhahs <[email protected]>
4+
5+
#include "engine/ExecuteUpdate.h"
6+
7+
#include "engine/ExportQueryExecutionTrees.h"
8+
9+
// _____________________________________________________________________________
10+
void ExecuteUpdate::executeUpdate(
11+
const Index& index, const ParsedQuery& query, const QueryExecutionTree& qet,
12+
DeltaTriples& deltaTriples, const CancellationHandle& cancellationHandle) {
13+
auto [toInsert, toDelete] =
14+
computeGraphUpdateQuads(index, query, qet, cancellationHandle);
15+
16+
// "The deletion of the triples happens before the insertion." (SPARQL 1.1
17+
// Update 3.1.3)
18+
deltaTriples.deleteTriples(cancellationHandle,
19+
std::move(toDelete.idTriples_));
20+
deltaTriples.insertTriples(cancellationHandle,
21+
std::move(toInsert.idTriples_));
22+
}
23+
24+
// _____________________________________________________________________________
25+
std::pair<std::vector<ExecuteUpdate::TransformedTriple>, LocalVocab>
26+
ExecuteUpdate::transformTriplesTemplate(
27+
const Index::Vocab& vocab, const VariableToColumnMap& variableColumns,
28+
std::vector<SparqlTripleSimpleWithGraph>&& triples) {
29+
// This LocalVocab only contains IDs that are related to the
30+
// template. Most of the IDs will be added to the DeltaTriples' LocalVocab. An
31+
// ID will only not be added if it belongs to a Quad with a variable that has
32+
// no solutions.
33+
LocalVocab localVocab{};
34+
35+
auto transformSparqlTripleComponent =
36+
[&vocab, &localVocab,
37+
&variableColumns](TripleComponent component) -> IdOrVariableIndex {
38+
if (component.isVariable()) {
39+
AD_CORRECTNESS_CHECK(variableColumns.contains(component.getVariable()));
40+
return variableColumns.at(component.getVariable()).columnIndex_;
41+
} else {
42+
return std::move(component).toValueId(vocab, localVocab);
43+
}
44+
};
45+
Id defaultGraphIri = [&transformSparqlTripleComponent] {
46+
IdOrVariableIndex defaultGraph = transformSparqlTripleComponent(
47+
ad_utility::triple_component::Iri::fromIriref(DEFAULT_GRAPH_IRI));
48+
AD_CORRECTNESS_CHECK(std::holds_alternative<Id>(defaultGraph));
49+
return std::get<Id>(defaultGraph);
50+
}();
51+
auto transformGraph =
52+
[&vocab, &localVocab, &defaultGraphIri,
53+
&variableColumns](SparqlTripleSimpleWithGraph::Graph graph) {
54+
return std::visit(
55+
ad_utility::OverloadCallOperator{
56+
[&defaultGraphIri](const std::monostate&) -> IdOrVariableIndex {
57+
return defaultGraphIri;
58+
},
59+
[&vocab, &localVocab](const Iri& iri) -> IdOrVariableIndex {
60+
ad_utility::triple_component::Iri i =
61+
ad_utility::triple_component::Iri::fromIriref(iri.iri());
62+
return TripleComponent(i).toValueId(vocab, localVocab);
63+
},
64+
[&variableColumns](const Variable& var) -> IdOrVariableIndex {
65+
AD_CORRECTNESS_CHECK(variableColumns.contains(var));
66+
return variableColumns.at(var).columnIndex_;
67+
}},
68+
graph);
69+
};
70+
auto transformSparqlTripleSimple =
71+
[&transformSparqlTripleComponent,
72+
&transformGraph](SparqlTripleSimpleWithGraph triple) {
73+
return std::array{transformSparqlTripleComponent(std::move(triple.s_)),
74+
transformSparqlTripleComponent(std::move(triple.p_)),
75+
transformSparqlTripleComponent(std::move(triple.o_)),
76+
transformGraph(std::move(triple.g_))};
77+
};
78+
return {
79+
ad_utility::transform(std::move(triples), transformSparqlTripleSimple),
80+
std::move(localVocab)};
81+
}
82+
83+
// _____________________________________________________________________________
84+
std::optional<Id> ExecuteUpdate::resolveVariable(const IdTable& idTable,
85+
const uint64_t& rowIdx,
86+
IdOrVariableIndex idOrVar) {
87+
auto visitId = [](const Id& id) {
88+
return id.isUndefined() ? std::optional<Id>{} : id;
89+
};
90+
return std::visit(
91+
ad_utility::OverloadCallOperator{
92+
[&idTable, &rowIdx, &visitId](const ColumnIndex& columnInfo) {
93+
return visitId(idTable(rowIdx, columnInfo));
94+
},
95+
visitId},
96+
idOrVar);
97+
}
98+
99+
// _____________________________________________________________________________
100+
void ExecuteUpdate::computeAndAddQuadsForResultRow(
101+
const std::vector<TransformedTriple>& templates,
102+
std::vector<IdTriple<>>& result, const IdTable& idTable,
103+
const uint64_t rowIdx) {
104+
for (const auto& [s, p, o, g] : templates) {
105+
auto subject = resolveVariable(idTable, rowIdx, s);
106+
auto predicate = resolveVariable(idTable, rowIdx, p);
107+
auto object = resolveVariable(idTable, rowIdx, o);
108+
auto graph = resolveVariable(idTable, rowIdx, g);
109+
110+
if (!subject.has_value() || !predicate.has_value() || !object.has_value() ||
111+
!graph.has_value()) {
112+
continue;
113+
}
114+
result.emplace_back(std::array{*subject, *predicate, *object, *graph});
115+
}
116+
}
117+
118+
// _____________________________________________________________________________
119+
std::pair<ExecuteUpdate::IdTriplesAndLocalVocab,
120+
ExecuteUpdate::IdTriplesAndLocalVocab>
121+
ExecuteUpdate::computeGraphUpdateQuads(
122+
const Index& index, const ParsedQuery& query, const QueryExecutionTree& qet,
123+
const CancellationHandle& cancellationHandle) {
124+
AD_CONTRACT_CHECK(query.hasUpdateClause());
125+
auto updateClause = query.updateClause();
126+
if (!std::holds_alternative<updateClause::GraphUpdate>(updateClause.op_)) {
127+
throw std::runtime_error(
128+
"Only INSERT/DELETE update operations are currently supported.");
129+
}
130+
auto graphUpdate = std::get<updateClause::GraphUpdate>(updateClause.op_);
131+
// Fully materialize the result for now. This makes it easier to execute the
132+
// update.
133+
auto result = qet.getResult(false);
134+
135+
const auto& vocab = index.getVocab();
136+
137+
auto prepareTemplateAndResultContainer =
138+
[&vocab, &qet,
139+
&result](std::vector<SparqlTripleSimpleWithGraph>&& tripleTemplates) {
140+
auto [transformedTripleTemplates, localVocab] =
141+
transformTriplesTemplate(vocab, qet.getVariableColumns(),
142+
std::move(tripleTemplates));
143+
std::vector<IdTriple<>> updateTriples;
144+
// The maximum result size is size(query result) x num template rows.
145+
// The actual result can be smaller if there are template rows with
146+
// variables for which a result row does not have a value.
147+
updateTriples.reserve(result->idTable().size() *
148+
transformedTripleTemplates.size());
149+
150+
return std::tuple{std::move(transformedTripleTemplates),
151+
std::move(updateTriples), std::move(localVocab)};
152+
};
153+
154+
auto [toInsertTemplates, toInsert, localVocabInsert] =
155+
prepareTemplateAndResultContainer(std::move(graphUpdate.toInsert_));
156+
auto [toDeleteTemplates, toDelete, localVocabDelete] =
157+
prepareTemplateAndResultContainer(std::move(graphUpdate.toDelete_));
158+
159+
uint64_t resultSize = 0;
160+
for (const auto& [pair, range] : ExportQueryExecutionTrees::getRowIndices(
161+
query._limitOffset, *result, resultSize)) {
162+
auto& idTable = pair.idTable_;
163+
for (const uint64_t i : range) {
164+
computeAndAddQuadsForResultRow(toInsertTemplates, toInsert, idTable, i);
165+
cancellationHandle->throwIfCancelled();
166+
167+
computeAndAddQuadsForResultRow(toDeleteTemplates, toDelete, idTable, i);
168+
cancellationHandle->throwIfCancelled();
169+
}
170+
}
171+
172+
return {
173+
IdTriplesAndLocalVocab{std::move(toInsert), std::move(localVocabInsert)},
174+
IdTriplesAndLocalVocab{std::move(toDelete), std::move(localVocabDelete)}};
175+
}

src/engine/ExecuteUpdate.h

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
// Copyright 2024, University of Freiburg,
2+
// Chair of Algorithms and Data Structures.
3+
// Author: Julian Mundhahs <[email protected]>
4+
5+
#pragma once
6+
7+
#include <gtest/gtest_prod.h>
8+
9+
#include "index/Index.h"
10+
#include "parser/ParsedQuery.h"
11+
#include "util/CancellationHandle.h"
12+
13+
class ExecuteUpdate {
14+
public:
15+
using CancellationHandle = ad_utility::SharedCancellationHandle;
16+
using IdOrVariableIndex = std::variant<Id, ColumnIndex>;
17+
using TransformedTriple = std::array<IdOrVariableIndex, 4>;
18+
19+
// Execute an update. This function is comparable to
20+
// `ExportQueryExecutionTrees::computeResult` for queries.
21+
static void executeUpdate(const Index& index, const ParsedQuery& query,
22+
const QueryExecutionTree& qet,
23+
DeltaTriples& deltaTriples,
24+
const CancellationHandle& cancellationHandle);
25+
26+
private:
27+
// Resolve all `TripleComponent`s and `Graph`s in a vector of
28+
// `SparqlTripleSimpleWithGraph` into `Variable`s or `Id`s.
29+
static std::pair<std::vector<TransformedTriple>, LocalVocab>
30+
transformTriplesTemplate(const Index::Vocab& vocab,
31+
const VariableToColumnMap& variableColumns,
32+
std::vector<SparqlTripleSimpleWithGraph>&& triples);
33+
FRIEND_TEST(ExecuteUpdate, transformTriplesTemplate);
34+
35+
// Resolve a single `IdOrVariable` to an `Id` by looking up the value in the
36+
// result row. The `Id`s will never be undefined. If (and only if) the input
37+
// `Id` or the `Id` looked up in the `IdTable` is undefined then
38+
// `std::nullopt` is returned.
39+
static std::optional<Id> resolveVariable(const IdTable& idTable,
40+
const uint64_t& rowIdx,
41+
IdOrVariableIndex idOrVar);
42+
FRIEND_TEST(ExecuteUpdate, resolveVariable);
43+
44+
// Calculate and add the set of quads for the update that results from
45+
// interpolating one result row into the template. The resulting `IdTriple`s
46+
// consist of only `Id`s.
47+
static void computeAndAddQuadsForResultRow(
48+
const std::vector<TransformedTriple>& templates,
49+
std::vector<IdTriple<>>& result, const IdTable& idTable, uint64_t rowIdx);
50+
FRIEND_TEST(ExecuteUpdate, computeAndAddQuadsForResultRow);
51+
52+
struct IdTriplesAndLocalVocab {
53+
std::vector<IdTriple<>> idTriples_;
54+
LocalVocab localVocab_;
55+
};
56+
// Compute the set of quads to insert and delete for the given update. The
57+
// ParsedQuery's clause must be an UpdateClause. The UpdateClause's operation
58+
// must be a GraphUpdate.
59+
static std::pair<IdTriplesAndLocalVocab, IdTriplesAndLocalVocab>
60+
computeGraphUpdateQuads(const Index& index, const ParsedQuery& query,
61+
const QueryExecutionTree& qet,
62+
const CancellationHandle& cancellationHandle);
63+
FRIEND_TEST(ExecuteUpdate, computeGraphUpdateQuads);
64+
};

0 commit comments

Comments
 (0)