Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/master' into update-dockerfile-t…
Browse files Browse the repository at this point in the history
…o-ubuntu-2204
  • Loading branch information
Hannah Bast committed Nov 16, 2024
2 parents c0d9e18 + 39ca684 commit 5c9f444
Show file tree
Hide file tree
Showing 102 changed files with 7,875 additions and 3,673 deletions.
9 changes: 7 additions & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -8,16 +8,21 @@ RUN apt-get update && apt-get install -y software-properties-common wget && add-
RUN wget https://apt.kitware.com/kitware-archive.sh && chmod +x kitware-archive.sh &&./kitware-archive.sh

FROM base AS builder
ARG TARGETPLATFORM
RUN apt-get update && apt-get install -y build-essential cmake libicu-dev tzdata pkg-config uuid-runtime uuid-dev git libjemalloc-dev ninja-build libzstd-dev libssl-dev libboost1.83-dev libboost-program-options1.83-dev libboost-iostreams1.83-dev libboost-url1.83-dev

COPY . /qlever/

WORKDIR /qlever/
ENV DEBIAN_FRONTEND=noninteractive

# Don't build and run tests on ARM64, as it takes too long on GitHub actions.
# TODO: re-enable these tests as soon as we can use a native ARM64 platform to compile the docker container.
WORKDIR /qlever/build/
RUN cmake -DCMAKE_BUILD_TYPE=Release -DLOGLEVEL=INFO -DUSE_PARALLEL=true -D_NO_TIMING_TESTS=ON -GNinja .. && ninja
RUN ctest --rerun-failed --output-on-failure
RUN cmake -DCMAKE_BUILD_TYPE=Release -DLOGLEVEL=INFO -DUSE_PARALLEL=true -D_NO_TIMING_TESTS=ON -GNinja ..
RUN if [ $TARGETPLATFORM = "linux/arm64" ] ; then echo "target is ARM64, don't build tests to avoid timeout"; fi
RUN if [ $TARGETPLATFORM = "linux/arm64" ] ; then cmake --build . --target IndexBuilderMain ServerMain; else cmake --build . ; fi
RUN if [ $TARGETPLATFORM = "linux/arm64" ] ; then echo "Skipping tests for ARM64" ; else ctest --rerun-failed --output-on-failure ; fi

FROM base AS runtime
WORKDIR /qlever
Expand Down
6 changes: 6 additions & 0 deletions src/ServerMain.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,12 @@ int main(int argc, char** argv) {
optionFactory.getProgramOption<"service-max-value-rows">(),
"The maximal number of result rows to be passed to a SERVICE operation "
"as a VALUES clause to optimize its computation.");
add("throw-on-unbound-variables",
optionFactory.getProgramOption<"throw-on-unbound-variables">(),
"If set to true, the queries that use GROUP BY, BIND, or ORDER BY with "
"variables that are unbound in the query throw an exception. These "
"queries technically are allowed by the SPARQL standard, but typically "
"are the result of typos and unintended by the user");
po::variables_map optionsMap;

try {
Expand Down
2 changes: 1 addition & 1 deletion src/engine/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,5 +13,5 @@ add_library(engine
VariableToColumnMap.cpp ExportQueryExecutionTrees.cpp
CartesianProductJoin.cpp TextIndexScanForWord.cpp TextIndexScanForEntity.cpp
TextLimit.cpp LazyGroupBy.cpp GroupByHashMapOptimization.cpp SpatialJoin.cpp
CountConnectedSubgraphs.cpp SpatialJoinAlgorithms.cpp PathSearch.cpp)
CountConnectedSubgraphs.cpp SpatialJoinAlgorithms.cpp PathSearch.cpp ExecuteUpdate.cpp)
qlever_target_link_libraries(engine util index parser sparqlExpressions http SortPerformanceEstimator Boost::iostreams s2)
306 changes: 212 additions & 94 deletions src/engine/CartesianProductJoin.cpp

Large diffs are not rendered by default.

52 changes: 44 additions & 8 deletions src/engine/CartesianProductJoin.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ class CartesianProductJoin : public Operation {

private:
Children children_;
size_t chunkSize_;

// Access to the actual operations of the children.
// TODO<joka921> We can move this whole children management into a base class
Expand All @@ -33,9 +34,11 @@ class CartesianProductJoin : public Operation {

public:
// Constructor. `children` must not be empty and the variables of all the
// children must be disjoint, else an `AD_CONTRACT_CHECK` fails.
// children must be disjoint, else an `AD_CONTRACT_CHECK` fails. Accept a
// custom `chunkSize` for chunking lazy results.
explicit CartesianProductJoin(QueryExecutionContext* executionContext,
Children children);
Children children,
size_t chunkSize = 1'000'000);

/// get non-owning pointers to all the held subtrees to actually use the
/// Execution Trees as trees
Expand Down Expand Up @@ -77,16 +80,49 @@ class CartesianProductJoin : public Operation {

private:
//! Compute the result of the query-subtree rooted at this element..
ProtoResult computeResult([[maybe_unused]] bool requestLaziness) override;
ProtoResult computeResult(bool requestLaziness) override;

// Copy each element from the `inputColumn` `groupSize` times to the
// `targetColumn`. Repeat until the `targetColumn` is completely filled. Skip
// the first `offset` write operations to the `targetColumn`. Call
// `checkCancellation` after each write. If `StaticGroupSize != 0`, then the
// group size is known at compile time which allows for more efficient loop
// processing for very small group sizes.
template <size_t StaticGroupSize = 0>
// `checkCancellation` after each write.
void writeResultColumn(std::span<Id> targetColumn,
std::span<const Id> inputColumn, size_t groupSize,
size_t offset);
size_t offset) const;

// Write all columns of the subresults into an `IdTable` and return it.
// `offset` indicates how many rows to skip in the result and `limit` how many
// rows to write at most. `lastTableOffset` is the offset of the last table,
// to account for cases where the last table does not cover the whole result
// and so index 0 of a table does not correspond to row 0 of the result.
IdTable writeAllColumns(std::ranges::random_access_range auto idTables,
size_t offset, size_t limit,
size_t lastTableOffset = 0) const;

// Calculate the subresults of the children and store them into a vector. If
// the rightmost child can produce a lazy result, it will be stored outside of
// the vector and returned as the first element of the pair. Otherwise this
// will be an empty shared_ptr. The vector is guaranteed to only contain fully
// materialized results.
std::pair<std::vector<std::shared_ptr<const Result>>,
std::shared_ptr<const Result>>
calculateSubResults(bool requestLaziness);

// Take a range of `IdTable`s and a corresponding `LocalVocab` and yield
// `IdTable`s with sizes up to `chunkSize_` until the limit is reached.
// `offset` indicates the total offset of the desired result.
// `limit` is the maximum number of rows to yield.
// `lastTableOffset` is the offset of the last table in the range. This is
// used to handle `IdTable`s yielded by generators where the range of indices
// they represent do not cover the whole result.
Result::Generator produceTablesLazily(LocalVocab mergedVocab,
std::ranges::range auto idTables,
size_t offset, size_t limit,
size_t lastTableOffset = 0) const;

// Similar to `produceTablesLazily` but can handle a single lazy result.
Result::Generator createLazyConsumer(
LocalVocab staticMergedVocab,
std::vector<std::shared_ptr<const Result>> subresults,
std::shared_ptr<const Result> lazyResult) const;
};
7 changes: 4 additions & 3 deletions src/engine/CountAvailablePredicates.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -165,9 +165,10 @@ void CountAvailablePredicates::computePatternTrickAllEntities(
TripleComponent::Iri::fromIriref(HAS_PATTERN_PREDICATE), std::nullopt,
std::nullopt}
.toScanSpecification(index);
auto fullHasPattern = index.getPermutation(Permutation::Enum::PSO)
.lazyScan(scanSpec, std::nullopt, {},
cancellationHandle_, deltaTriples());
auto fullHasPattern =
index.getPermutation(Permutation::Enum::PSO)
.lazyScan(scanSpec, std::nullopt, {}, cancellationHandle_,
locatedTriplesSnapshot());
for (const auto& idTable : fullHasPattern) {
for (const auto& patternId : idTable.getColumn(1)) {
AD_CORRECTNESS_CHECK(patternId.getDatatype() == Datatype::Int);
Expand Down
175 changes: 175 additions & 0 deletions src/engine/ExecuteUpdate.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,175 @@
// Copyright 2024, University of Freiburg,
// Chair of Algorithms and Data Structures.
// Author: Julian Mundhahs <[email protected]>

#include "engine/ExecuteUpdate.h"

#include "engine/ExportQueryExecutionTrees.h"

// _____________________________________________________________________________
void ExecuteUpdate::executeUpdate(
const Index& index, const ParsedQuery& query, const QueryExecutionTree& qet,
DeltaTriples& deltaTriples, const CancellationHandle& cancellationHandle) {
auto [toInsert, toDelete] =
computeGraphUpdateQuads(index, query, qet, cancellationHandle);

// "The deletion of the triples happens before the insertion." (SPARQL 1.1
// Update 3.1.3)
deltaTriples.deleteTriples(cancellationHandle,
std::move(toDelete.idTriples_));
deltaTriples.insertTriples(cancellationHandle,
std::move(toInsert.idTriples_));
}

// _____________________________________________________________________________
std::pair<std::vector<ExecuteUpdate::TransformedTriple>, LocalVocab>
ExecuteUpdate::transformTriplesTemplate(
const Index::Vocab& vocab, const VariableToColumnMap& variableColumns,
std::vector<SparqlTripleSimpleWithGraph>&& triples) {
// This LocalVocab only contains IDs that are related to the
// template. Most of the IDs will be added to the DeltaTriples' LocalVocab. An
// ID will only not be added if it belongs to a Quad with a variable that has
// no solutions.
LocalVocab localVocab{};

auto transformSparqlTripleComponent =
[&vocab, &localVocab,
&variableColumns](TripleComponent component) -> IdOrVariableIndex {
if (component.isVariable()) {
AD_CORRECTNESS_CHECK(variableColumns.contains(component.getVariable()));
return variableColumns.at(component.getVariable()).columnIndex_;
} else {
return std::move(component).toValueId(vocab, localVocab);
}
};
Id defaultGraphIri = [&transformSparqlTripleComponent] {
IdOrVariableIndex defaultGraph = transformSparqlTripleComponent(
ad_utility::triple_component::Iri::fromIriref(DEFAULT_GRAPH_IRI));
AD_CORRECTNESS_CHECK(std::holds_alternative<Id>(defaultGraph));
return std::get<Id>(defaultGraph);
}();
auto transformGraph =
[&vocab, &localVocab, &defaultGraphIri,
&variableColumns](SparqlTripleSimpleWithGraph::Graph graph) {
return std::visit(
ad_utility::OverloadCallOperator{
[&defaultGraphIri](const std::monostate&) -> IdOrVariableIndex {
return defaultGraphIri;
},
[&vocab, &localVocab](const Iri& iri) -> IdOrVariableIndex {
ad_utility::triple_component::Iri i =
ad_utility::triple_component::Iri::fromIriref(iri.iri());
return TripleComponent(i).toValueId(vocab, localVocab);
},
[&variableColumns](const Variable& var) -> IdOrVariableIndex {
AD_CORRECTNESS_CHECK(variableColumns.contains(var));
return variableColumns.at(var).columnIndex_;
}},
graph);
};
auto transformSparqlTripleSimple =
[&transformSparqlTripleComponent,
&transformGraph](SparqlTripleSimpleWithGraph triple) {
return std::array{transformSparqlTripleComponent(std::move(triple.s_)),
transformSparqlTripleComponent(std::move(triple.p_)),
transformSparqlTripleComponent(std::move(triple.o_)),
transformGraph(std::move(triple.g_))};
};
return {
ad_utility::transform(std::move(triples), transformSparqlTripleSimple),
std::move(localVocab)};
}

// _____________________________________________________________________________
std::optional<Id> ExecuteUpdate::resolveVariable(const IdTable& idTable,
const uint64_t& rowIdx,
IdOrVariableIndex idOrVar) {
auto visitId = [](const Id& id) {
return id.isUndefined() ? std::optional<Id>{} : id;
};
return std::visit(
ad_utility::OverloadCallOperator{
[&idTable, &rowIdx, &visitId](const ColumnIndex& columnInfo) {
return visitId(idTable(rowIdx, columnInfo));
},
visitId},
idOrVar);
}

// _____________________________________________________________________________
void ExecuteUpdate::computeAndAddQuadsForResultRow(
const std::vector<TransformedTriple>& templates,
std::vector<IdTriple<>>& result, const IdTable& idTable,
const uint64_t rowIdx) {
for (const auto& [s, p, o, g] : templates) {
auto subject = resolveVariable(idTable, rowIdx, s);
auto predicate = resolveVariable(idTable, rowIdx, p);
auto object = resolveVariable(idTable, rowIdx, o);
auto graph = resolveVariable(idTable, rowIdx, g);

if (!subject.has_value() || !predicate.has_value() || !object.has_value() ||
!graph.has_value()) {
continue;
}
result.emplace_back(std::array{*subject, *predicate, *object, *graph});
}
}

// _____________________________________________________________________________
std::pair<ExecuteUpdate::IdTriplesAndLocalVocab,
ExecuteUpdate::IdTriplesAndLocalVocab>
ExecuteUpdate::computeGraphUpdateQuads(
const Index& index, const ParsedQuery& query, const QueryExecutionTree& qet,
const CancellationHandle& cancellationHandle) {
AD_CONTRACT_CHECK(query.hasUpdateClause());
auto updateClause = query.updateClause();
if (!std::holds_alternative<updateClause::GraphUpdate>(updateClause.op_)) {
throw std::runtime_error(
"Only INSERT/DELETE update operations are currently supported.");
}
auto graphUpdate = std::get<updateClause::GraphUpdate>(updateClause.op_);
// Fully materialize the result for now. This makes it easier to execute the
// update.
auto result = qet.getResult(false);

const auto& vocab = index.getVocab();

auto prepareTemplateAndResultContainer =
[&vocab, &qet,
&result](std::vector<SparqlTripleSimpleWithGraph>&& tripleTemplates) {
auto [transformedTripleTemplates, localVocab] =
transformTriplesTemplate(vocab, qet.getVariableColumns(),
std::move(tripleTemplates));
std::vector<IdTriple<>> updateTriples;
// The maximum result size is size(query result) x num template rows.
// The actual result can be smaller if there are template rows with
// variables for which a result row does not have a value.
updateTriples.reserve(result->idTable().size() *
transformedTripleTemplates.size());

return std::tuple{std::move(transformedTripleTemplates),
std::move(updateTriples), std::move(localVocab)};
};

auto [toInsertTemplates, toInsert, localVocabInsert] =
prepareTemplateAndResultContainer(std::move(graphUpdate.toInsert_));
auto [toDeleteTemplates, toDelete, localVocabDelete] =
prepareTemplateAndResultContainer(std::move(graphUpdate.toDelete_));

uint64_t resultSize = 0;
for (const auto& [pair, range] : ExportQueryExecutionTrees::getRowIndices(
query._limitOffset, *result, resultSize)) {
auto& idTable = pair.idTable_;
for (const uint64_t i : range) {
computeAndAddQuadsForResultRow(toInsertTemplates, toInsert, idTable, i);
cancellationHandle->throwIfCancelled();

computeAndAddQuadsForResultRow(toDeleteTemplates, toDelete, idTable, i);
cancellationHandle->throwIfCancelled();
}
}

return {
IdTriplesAndLocalVocab{std::move(toInsert), std::move(localVocabInsert)},
IdTriplesAndLocalVocab{std::move(toDelete), std::move(localVocabDelete)}};
}
64 changes: 64 additions & 0 deletions src/engine/ExecuteUpdate.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
// Copyright 2024, University of Freiburg,
// Chair of Algorithms and Data Structures.
// Author: Julian Mundhahs <[email protected]>

#pragma once

#include <gtest/gtest_prod.h>

#include "index/Index.h"
#include "parser/ParsedQuery.h"
#include "util/CancellationHandle.h"

class ExecuteUpdate {
public:
using CancellationHandle = ad_utility::SharedCancellationHandle;
using IdOrVariableIndex = std::variant<Id, ColumnIndex>;
using TransformedTriple = std::array<IdOrVariableIndex, 4>;

// Execute an update. This function is comparable to
// `ExportQueryExecutionTrees::computeResult` for queries.
static void executeUpdate(const Index& index, const ParsedQuery& query,
const QueryExecutionTree& qet,
DeltaTriples& deltaTriples,
const CancellationHandle& cancellationHandle);

private:
// Resolve all `TripleComponent`s and `Graph`s in a vector of
// `SparqlTripleSimpleWithGraph` into `Variable`s or `Id`s.
static std::pair<std::vector<TransformedTriple>, LocalVocab>
transformTriplesTemplate(const Index::Vocab& vocab,
const VariableToColumnMap& variableColumns,
std::vector<SparqlTripleSimpleWithGraph>&& triples);
FRIEND_TEST(ExecuteUpdate, transformTriplesTemplate);

// Resolve a single `IdOrVariable` to an `Id` by looking up the value in the
// result row. The `Id`s will never be undefined. If (and only if) the input
// `Id` or the `Id` looked up in the `IdTable` is undefined then
// `std::nullopt` is returned.
static std::optional<Id> resolveVariable(const IdTable& idTable,
const uint64_t& rowIdx,
IdOrVariableIndex idOrVar);
FRIEND_TEST(ExecuteUpdate, resolveVariable);

// Calculate and add the set of quads for the update that results from
// interpolating one result row into the template. The resulting `IdTriple`s
// consist of only `Id`s.
static void computeAndAddQuadsForResultRow(
const std::vector<TransformedTriple>& templates,
std::vector<IdTriple<>>& result, const IdTable& idTable, uint64_t rowIdx);
FRIEND_TEST(ExecuteUpdate, computeAndAddQuadsForResultRow);

struct IdTriplesAndLocalVocab {
std::vector<IdTriple<>> idTriples_;
LocalVocab localVocab_;
};
// Compute the set of quads to insert and delete for the given update. The
// ParsedQuery's clause must be an UpdateClause. The UpdateClause's operation
// must be a GraphUpdate.
static std::pair<IdTriplesAndLocalVocab, IdTriplesAndLocalVocab>
computeGraphUpdateQuads(const Index& index, const ParsedQuery& query,
const QueryExecutionTree& qet,
const CancellationHandle& cancellationHandle);
FRIEND_TEST(ExecuteUpdate, computeGraphUpdateQuads);
};
Loading

0 comments on commit 5c9f444

Please sign in to comment.