Skip to content
Open
6 changes: 4 additions & 2 deletions src/engine/CountAvailablePredicates.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -169,9 +169,11 @@ void CountAvailablePredicates::computePatternTrickAllEntities(
std::nullopt}
.toScanSpecification(index);
const auto& perm = index.getPermutation(Permutation::Enum::PSO);
const auto& locatedTriple = locatedTriplesSnapshot();
const auto& locatedTriple =
perm.getLocatedTriplesForPermutation(locatedTriplesSnapshot());
auto fullHasPattern =
perm.lazyScan(perm.getScanSpecAndBlocks(scanSpec, locatedTriple),
perm.lazyScan(CompressedRelationReader::ScanSpecAndBlocks::withUpdates(
scanSpec, locatedTriple),
std::nullopt, {}, cancellationHandle_, locatedTriple);
for (const auto& idTable : fullHasPattern) {
for (const auto& patternId : idTable.getColumn(1)) {
Expand Down
6 changes: 4 additions & 2 deletions src/engine/GroupByImpl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -841,7 +841,8 @@ std::optional<IdTable> GroupByImpl::computeGroupByObjectWithCount() const {
// do the index scan, but something smarter).
const auto& permutation = indexScan->permutation();
auto result = permutation.getDistinctCol1IdsAndCounts(
col0Id.value(), cancellationHandle_, locatedTriplesSnapshot(),
col0Id.value(), cancellationHandle_,
permutation.getLocatedTriplesForPermutation(locatedTriplesSnapshot()),
indexScan->getLimitOffset());

indexScan->updateRuntimeInformationWhenOptimizedOut({});
Expand Down Expand Up @@ -900,7 +901,8 @@ std::optional<IdTable> GroupByImpl::computeGroupByForFullIndexScan() const {
getExecutionContext()->getIndex().getPimpl().getPermutation(
permutationEnum.value());
auto table = permutation.getDistinctCol0IdsAndCounts(
cancellationHandle_, locatedTriplesSnapshot(),
cancellationHandle_,
permutation.getLocatedTriplesForPermutation(locatedTriplesSnapshot()),
indexScan->getLimitOffset());
if (numCounts == 0) {
table.setColumnSubset(std::array{ColumnIndex{0}});
Expand Down
12 changes: 7 additions & 5 deletions src/engine/HasPredicateScan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -420,9 +420,11 @@ std::shared_ptr<QueryExecutionTree> HasPredicateScan::makePatternScan(
std::move(subject),
ad_utility::triple_component::Iri::fromIriref(HAS_PATTERN_PREDICATE),
TripleComponent{std::move(object)}};
return ad_utility::makeExecutionTree<IndexScan>(
qec,
qlever::getPermutationForTriple(Permutation::Enum::PSO, qec->getIndex(),
triple),
qec->sharedLocatedTriplesSnapshot(), triple);
auto [permutation, locatedTriples] =
qlever::getPermutationAndLocatedTriplesPerBlockForTriple(
Permutation::Enum::PSO, qec->getIndex(),
qec->sharedLocatedTriplesSnapshot(), triple);
return ad_utility::makeExecutionTree<IndexScan>(qec, std::move(permutation),
std::move(locatedTriples),
std::move(triple));
}
53 changes: 28 additions & 25 deletions src/engine/IndexScan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,13 +32,13 @@ static size_t getNumberOfVariables(const TripleComponent& subject,

// _____________________________________________________________________________
IndexScan::IndexScan(QueryExecutionContext* qec, PermutationPtr permutation,
LocatedTriplesSnapshotPtr locatedTriplesSnapshot,
LocatedTriplesPerBlockPtr locatedTriplesPerBlock,
const SparqlTripleSimple& triple, Graphs graphsToFilter,
std::optional<ScanSpecAndBlocks> scanSpecAndBlocks,
VarsToKeep varsToKeep)
: Operation(qec),
permutation_(permutation),
locatedTriplesSnapshot_(locatedTriplesSnapshot),
permutation_(std::move(permutation)),
locatedTriplesPerBlock_(std::move(locatedTriplesPerBlock)),
subject_(triple.s_),
predicate_(triple.p_),
object_(triple.o_),
Expand All @@ -49,7 +49,7 @@ IndexScan::IndexScan(QueryExecutionContext* qec, PermutationPtr permutation,
numVariables_(getNumberOfVariables(subject_, predicate_, object_)),
varsToKeep_(std::move(varsToKeep)) {
AD_CONTRACT_CHECK(permutation_ != nullptr);
AD_CONTRACT_CHECK(locatedTriplesSnapshot_ != nullptr);
AD_CONTRACT_CHECK(locatedTriplesPerBlock_ != nullptr);

// We previously had `nullptr`s here in unit tests. This is no longer
// necessary nor allowed.
Expand Down Expand Up @@ -78,23 +78,26 @@ IndexScan::IndexScan(QueryExecutionContext* qec,
Permutation::Enum permutationType,
const SparqlTripleSimple& triple, Graphs graphsToFilter,
std::optional<ScanSpecAndBlocks> scanSpecAndBlocks)
: IndexScan(qec,
qec->getIndex().getImpl().getPermutationPtr(permutationType),
qec->sharedLocatedTriplesSnapshot(), triple,
std::move(graphsToFilter), std::move(scanSpecAndBlocks)) {}
: IndexScan(
qec, qec->getIndex().getImpl().getPermutationPtr(permutationType),
std::shared_ptr<const LocatedTriplesPerBlock>{
qec->sharedLocatedTriplesSnapshot(),
&qec->sharedLocatedTriplesSnapshot()
->getLocatedTriplesForPermutation(permutationType)},
triple, std::move(graphsToFilter), std::move(scanSpecAndBlocks)) {}

// _____________________________________________________________________________
IndexScan::IndexScan(QueryExecutionContext* qec, PermutationPtr permutation,
LocatedTriplesSnapshotPtr locatedTriplesSnapshot,
LocatedTriplesPerBlockPtr locatedTriplesPerBlock,
const TripleComponent& s, const TripleComponent& p,
const TripleComponent& o,
std::vector<ColumnIndex> additionalColumns,
std::vector<Variable> additionalVariables,
Graphs graphsToFilter, ScanSpecAndBlocks scanSpecAndBlocks,
bool scanSpecAndBlocksIsPrefiltered, VarsToKeep varsToKeep)
: Operation(qec),
permutation_(permutation),
locatedTriplesSnapshot_(locatedTriplesSnapshot),
permutation_(std::move(permutation)),
locatedTriplesPerBlock_(std::move(locatedTriplesPerBlock)),
subject_(s),
predicate_(p),
object_(o),
Expand All @@ -107,7 +110,7 @@ IndexScan::IndexScan(QueryExecutionContext* qec, PermutationPtr permutation,
varsToKeep_{std::move(varsToKeep)} {
AD_CONTRACT_CHECK(qec != nullptr);
AD_CONTRACT_CHECK(permutation_ != nullptr);
AD_CONTRACT_CHECK(locatedTriplesSnapshot_ != nullptr);
AD_CONTRACT_CHECK(locatedTriplesPerBlock_ != nullptr);
std::tie(sizeEstimateIsExact_, sizeEstimate_) = computeSizeEstimate();
determineMultiplicities();
}
Expand Down Expand Up @@ -260,7 +263,7 @@ std::shared_ptr<QueryExecutionTree>
IndexScan::makeCopyWithPrefilteredScanSpecAndBlocks(
ScanSpecAndBlocks scanSpecAndBlocks) const {
return ad_utility::makeExecutionTree<IndexScan>(
getExecutionContext(), permutation_, locatedTriplesSnapshot_, subject_,
getExecutionContext(), permutation_, locatedTriplesPerBlock_, subject_,
predicate_, object_, additionalColumns_, additionalVariables_,
graphsToFilter_, std::move(scanSpecAndBlocks), true, varsToKeep_);
}
Expand All @@ -277,7 +280,7 @@ Result::LazyResult IndexScan::chunkedIndexScan() const {
IdTable IndexScan::materializedIndexScan() const {
IdTable idTable = permutation().scan(
scanSpecAndBlocks_, additionalColumns(), cancellationHandle_,
locatedTriplesSnapshot(), getLimitOffset());
locatedTriplesPerBlock(), getLimitOffset());
AD_LOG_DEBUG << "IndexScan result computation done.\n";
checkCancellation();
idTable = makeApplyColumnSubset()(std::move(idTable));
Expand All @@ -301,16 +304,16 @@ const Permutation& IndexScan::permutation() const {
}

// _____________________________________________________________________________
const LocatedTriplesSnapshot& IndexScan::locatedTriplesSnapshot() const {
AD_CONTRACT_CHECK(locatedTriplesSnapshot_ != nullptr);
return *locatedTriplesSnapshot_;
const LocatedTriplesPerBlock& IndexScan::locatedTriplesPerBlock() const {
AD_CONTRACT_CHECK(locatedTriplesPerBlock_ != nullptr);
return *locatedTriplesPerBlock_;
}

// _____________________________________________________________________________
std::pair<bool, size_t> IndexScan::computeSizeEstimate() const {
AD_CORRECTNESS_CHECK(_executionContext);
auto [lower, upper] = permutation().getSizeEstimateForScan(
scanSpecAndBlocks_, locatedTriplesSnapshot());
scanSpecAndBlocks_, locatedTriplesPerBlock());
// NOTE: Starting from C++20 we could use `std::midpoint` here
return {lower == upper, lower + (upper - lower) / 2};
}
Expand All @@ -319,7 +322,7 @@ std::pair<bool, size_t> IndexScan::computeSizeEstimate() const {
size_t IndexScan::getExactSize() const {
AD_CORRECTNESS_CHECK(_executionContext);
return permutation().getResultSizeOfScan(scanSpecAndBlocks_,
locatedTriplesSnapshot());
locatedTriplesPerBlock());
}

// _____________________________________________________________________________
Expand Down Expand Up @@ -396,8 +399,8 @@ IndexScan::getSortedVariableAndMetadataColumnIndexForPrefiltering() const {

// ___________________________________________________________________________
Permutation::ScanSpecAndBlocks IndexScan::getScanSpecAndBlocks() const {
return permutation().getScanSpecAndBlocks(getScanSpecification(),
locatedTriplesSnapshot());
return CompressedRelationReader::ScanSpecAndBlocks::withUpdates(
getScanSpecification(), locatedTriplesPerBlock());
}

// _____________________________________________________________________________
Expand All @@ -411,7 +414,7 @@ CompressedRelationReader::IdTableGeneratorInputRange IndexScan::getLazyScan(
getLimitOffset().isUnconstrained() ? std::move(blocks) : std::nullopt;
auto lazyScanAllCols = permutation().lazyScan(
scanSpecAndBlocks_, filteredBlocks, additionalColumns(),
cancellationHandle_, locatedTriplesSnapshot(), getLimitOffset());
cancellationHandle_, locatedTriplesPerBlock(), getLimitOffset());

return CompressedRelationReader::IdTableGeneratorInputRange{
ad_utility::CachingTransformInputRange<
Expand All @@ -425,7 +428,7 @@ CompressedRelationReader::IdTableGeneratorInputRange IndexScan::getLazyScan(
std::optional<Permutation::MetadataAndBlocks> IndexScan::getMetadataForScan()
const {
return permutation().getMetadataAndBlocks(scanSpecAndBlocks_,
locatedTriplesSnapshot());
locatedTriplesPerBlock());
};

// _____________________________________________________________________________
Expand Down Expand Up @@ -746,7 +749,7 @@ std::pair<Result::LazyResult, Result::LazyResult> IndexScan::prefilterTables(
// _____________________________________________________________________________
std::unique_ptr<Operation> IndexScan::cloneImpl() const {
return std::make_unique<IndexScan>(
_executionContext, permutation_, locatedTriplesSnapshot_, subject_,
_executionContext, permutation_, locatedTriplesPerBlock_, subject_,
predicate_, object_, additionalColumns_, additionalVariables_,
graphsToFilter_, scanSpecAndBlocks_, scanSpecAndBlocksIsPrefiltered_,
varsToKeep_);
Expand All @@ -771,7 +774,7 @@ IndexScan::makeTreeWithStrippedColumns(
}

return ad_utility::makeExecutionTree<IndexScan>(
_executionContext, permutation_, locatedTriplesSnapshot_, subject_,
_executionContext, permutation_, locatedTriplesPerBlock_, subject_,
predicate_, object_, additionalColumns_, additionalVariables_,
graphsToFilter_, scanSpecAndBlocks_, scanSpecAndBlocksIsPrefiltered_,
VarsToKeep{std::move(newVariables)});
Expand Down
16 changes: 10 additions & 6 deletions src/engine/IndexScan.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,16 +16,19 @@
class IndexScan final : public Operation {
public:
using Graphs = ScanSpecificationAsTripleComponent::GraphFilter;
// TODO: get rid of them, especially the dubling with `PermutationSelector`

Check failure on line 19 in src/engine/IndexScan.h

View workflow job for this annotation

GitHub Actions / Check for spelling errors

dubling ==> doubling, dublin
using PermutationPtr = std::shared_ptr<const Permutation>;
using LocatedTriplesSnapshotPtr =
std::shared_ptr<const LocatedTriplesSnapshot>;
using LocatedTriplesPerBlockPtr =
std::shared_ptr<const LocatedTriplesPerBlock>;

private:
using ScanSpecAndBlocks = Permutation::ScanSpecAndBlocks;

private:
PermutationPtr permutation_;
LocatedTriplesSnapshotPtr locatedTriplesSnapshot_;
LocatedTriplesPerBlockPtr locatedTriplesPerBlock_;
TripleComponent subject_;
TripleComponent predicate_;
TripleComponent object_;
Expand All @@ -51,7 +54,7 @@

public:
IndexScan(QueryExecutionContext* qec, PermutationPtr permutation,
LocatedTriplesSnapshotPtr locatedTriplesSnapshot,
LocatedTriplesPerBlockPtr locatedTriplesPerBlock,
const SparqlTripleSimple& triple,
Graphs graphsToFilter = Graphs::All(),
std::optional<ScanSpecAndBlocks> scanSpecAndBlocks = std::nullopt,
Expand All @@ -67,7 +70,7 @@

// Constructor to simplify copy creation of an `IndexScan`.
IndexScan(QueryExecutionContext* qec, PermutationPtr permutation,
LocatedTriplesSnapshotPtr locatedTriplesSnapshot,
LocatedTriplesPerBlockPtr locatedTriplesPerBlock,
const TripleComponent& s, const TripleComponent& p,
const TripleComponent& o,
std::vector<ColumnIndex> additionalColumns,
Expand Down Expand Up @@ -186,9 +189,10 @@

// Instead of using the `LocatedTriplesSnapshot` of the `Operation` base
// class, which accesses the one stored in the `QueryExecutionContext`, use
// the `LocatedTriplesSnapshot` held in this object. This might be a different
// one if a custom permutation is used.
const LocatedTriplesSnapshot& locatedTriplesSnapshot() const override;
// the `LocatedTriplesPerBlock` held in this object. This already is exactly
// the located triples for the permutation of the index scan and should be
// used wherever possible.
const LocatedTriplesPerBlock& locatedTriplesPerBlock() const;

// Return the stored triple in the order that corresponds to the
// `permutation_`. For example if `permutation_ == PSO` then the result is
Expand Down
26 changes: 10 additions & 16 deletions src/engine/MaterializedViews.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -296,7 +296,7 @@ void MaterializedViewWriter::computeResultAndWritePermutation() const {
MaterializedView::MaterializedView(std::string onDiskBase, std::string name)
: onDiskBase_{std::move(onDiskBase)},
name_{std::move(name)},
locatedTriplesSnapshot_{makeEmptyLocatedTriplesSnapshot()} {
locatedTriplesPerBlock_{makeEmptyLocatedTriplesPerBlock()} {
AD_CORRECTNESS_CHECK(onDiskBase_ != "",
"The index base filename was not set.");
throwIfInvalidName(name_);
Expand Down Expand Up @@ -517,23 +517,17 @@ void MaterializedViewsManager::setOnDiskBase(const std::string& onDiskBase) {
}

// _____________________________________________________________________________
std::shared_ptr<const LocatedTriplesSnapshot>
MaterializedView::locatedTriplesSnapshot() const {
return locatedTriplesSnapshot_;
std::shared_ptr<const LocatedTriplesPerBlock>
MaterializedView::locatedTriplesPerBlock() const {
return locatedTriplesPerBlock_;
}

// _____________________________________________________________________________
std::shared_ptr<LocatedTriplesSnapshot>
MaterializedView::makeEmptyLocatedTriplesSnapshot() const {
LocatedTriplesPerBlockAllPermutations<false> emptyLocatedTriples;
emptyLocatedTriples[static_cast<size_t>(permutation_->permutation())]
.setOriginalMetadata(permutation_->metaData().blockDataShared());
LocatedTriplesPerBlockAllPermutations<true> emptyInternalLocatedTriples;
LocalVocab emptyVocab;

return std::make_shared<LocatedTriplesSnapshot>(
emptyLocatedTriples, emptyInternalLocatedTriples,
emptyVocab.getLifetimeExtender(), 0);
std::shared_ptr<LocatedTriplesPerBlock>
MaterializedView::makeEmptyLocatedTriplesPerBlock() const {
auto ltpb = std::make_shared<LocatedTriplesPerBlock>();
ltpb->setOriginalMetadata(permutation_->metaData().blockDataShared());
return ltpb;
}

// _____________________________________________________________________________
Expand All @@ -550,7 +544,7 @@ std::shared_ptr<IndexScan> MaterializedView::makeIndexScan(
// query.
auto scanTriple = makeScanConfig(viewQuery);
return std::make_shared<IndexScan>(
qec, permutation_, locatedTriplesSnapshot_, std::move(scanTriple),
qec, permutation_, locatedTriplesPerBlock_, std::move(scanTriple),
IndexScan::Graphs::All(), std::nullopt, viewQuery.getVarsToKeep());
}

Expand Down
10 changes: 5 additions & 5 deletions src/engine/MaterializedViews.h
Original file line number Diff line number Diff line change
Expand Up @@ -148,13 +148,13 @@ class MaterializedView {
std::shared_ptr<Permutation> permutation_{std::make_shared<Permutation>(
Permutation::Enum::SPO, ad_utility::makeUnlimitedAllocator<Id>())};
VariableToColumnMap varToColMap_;
std::shared_ptr<LocatedTriplesSnapshot> locatedTriplesSnapshot_;
std::shared_ptr<LocatedTriplesPerBlock> locatedTriplesPerBlock_;

using AdditionalScanColumns = SparqlTripleSimple::AdditionalScanColumns;

// Helper to create an empty `LocatedTriplesSnapshot` for `IndexScan`s as
// Helper to create an empty `LocatedTriplesPerBlock` for `IndexScan`s as
// materialized views do not support updates yet.
std::shared_ptr<LocatedTriplesSnapshot> makeEmptyLocatedTriplesSnapshot()
std::shared_ptr<LocatedTriplesPerBlock> makeEmptyLocatedTriplesPerBlock()
const;

public:
Expand Down Expand Up @@ -182,10 +182,10 @@ class MaterializedView {
// `nullptr`.
std::shared_ptr<const Permutation> permutation() const;

// Return a reference to the `LocatedTriplesSnapshot` for the permutation. For
// Return a reference to the `LocatedTriplesPerBlock` for the permutation. For
// now this is always an empty snapshot but with the correct permutation
// metadata.
std::shared_ptr<const LocatedTriplesSnapshot> locatedTriplesSnapshot() const;
std::shared_ptr<const LocatedTriplesPerBlock> locatedTriplesPerBlock() const;

// Checks if the given name is allowed for a materialized view. Currently only
// alphanumerics and hyphens are allowed. This is relevant for safe filenames
Expand Down
30 changes: 27 additions & 3 deletions src/engine/PermutationSelector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@

#include "engine/PermutationSelector.h"

#include <utility>

#include "index/IndexImpl.h"

namespace {
Expand Down Expand Up @@ -34,9 +36,9 @@ bool containsInternalIri(const SparqlTripleSimple& triple) {

namespace qlever {
// _____________________________________________________________________________
std::shared_ptr<const Permutation> getPermutationForTriple(
Permutation::Enum permutation, const Index& index,
const SparqlTripleSimple& triple) {
PermutationPtr getPermutationForTriple(Permutation::Enum permutation,
const Index& index,
const SparqlTripleSimple& triple) {
auto actualPermutation = index.getImpl().getPermutationPtr(permutation);

if (containsInternalIri(triple)) {
Expand All @@ -47,4 +49,26 @@ std::shared_ptr<const Permutation> getPermutationForTriple(
}
return actualPermutation;
}

// _____________________________________________________________________________
LocatedTriplesPerBlockPtr getLocatedTriplesPerBlockForTriple(
Permutation::Enum permutation, LocatedTriplesSnapshotPtr snapshot,
const SparqlTripleSimple& triple) {
// Create alias shared pointer of internal the right `LocatedTriplesPerBlock`.
const auto& locatedTriples =
containsInternalIri(triple)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We have the exact same check in getPermutationForTriple. Is there a place where only one of the two functions is used? In my imagination you could simply put this return statement you have here and put it in the other function.
On the other hand separation of concerns is also a valid concern.

? snapshot->getInternalLocatedTriplesForPermutation(permutation)
: snapshot->getLocatedTriplesForPermutation(permutation);
return LocatedTriplesPerBlockPtr{std::move(snapshot), &locatedTriples};
}

// _____________________________________________________________________________
std::pair<PermutationPtr, LocatedTriplesPerBlockPtr>
getPermutationAndLocatedTriplesPerBlockForTriple(
Permutation::Enum permutation, const Index& index,
LocatedTriplesSnapshotPtr snapshot, const SparqlTripleSimple& triple) {
return {getPermutationForTriple(permutation, index, triple),
getLocatedTriplesPerBlockForTriple(permutation, std::move(snapshot),
triple)};
}
} // namespace qlever
Loading
Loading