Skip to content
Open
12 changes: 7 additions & 5 deletions src/engine/CountAvailablePredicates.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -168,11 +168,13 @@ void CountAvailablePredicates::computePatternTrickAllEntities(
TripleComponent::Iri::fromIriref(HAS_PATTERN_PREDICATE), std::nullopt,
std::nullopt}
.toScanSpecification(index);
const auto& perm = index.getPermutation(Permutation::Enum::PSO);
const auto& locatedTriple = locatedTriplesState();
auto fullHasPattern =
perm.lazyScan(perm.getScanSpecAndBlocks(scanSpec, locatedTriple),
std::nullopt, {}, cancellationHandle_, locatedTriple);
const auto perm = Permutation::Enum::PSO;
const auto& locatedTriples =
locatedTriplesState().getLocatedTriplesForPermutation<false>(perm);
auto fullHasPattern = index.getPermutation(perm).lazyScan(
CompressedRelationReader::ScanSpecAndBlocks::withUpdates(scanSpec,
locatedTriples),
std::nullopt, {}, cancellationHandle_, locatedTriples);
for (const auto& idTable : fullHasPattern) {
for (const auto& patternId : idTable.getColumn(1)) {
AD_CORRECTNESS_CHECK(patternId.getDatatype() == Datatype::Int);
Expand Down
8 changes: 6 additions & 2 deletions src/engine/GroupByImpl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -840,8 +840,9 @@ std::optional<IdTable> GroupByImpl::computeGroupByObjectWithCount() const {
// Compute the result and update the runtime information (we don't actually
// do the index scan, but something smarter).
const auto& permutation = indexScan->permutation();
const auto& locatedTriples = indexScan->locatedTriplesPerBlock();
auto result = permutation.getDistinctCol1IdsAndCounts(
col0Id.value(), cancellationHandle_, locatedTriplesState(),
col0Id.value(), cancellationHandle_, locatedTriples,
indexScan->getLimitOffset());

indexScan->updateRuntimeInformationWhenOptimizedOut({});
Expand Down Expand Up @@ -900,7 +901,10 @@ std::optional<IdTable> GroupByImpl::computeGroupByForFullIndexScan() const {
getExecutionContext()->getIndex().getPimpl().getPermutation(
permutationEnum.value());
auto table = permutation.getDistinctCol0IdsAndCounts(
cancellationHandle_, locatedTriplesState(), indexScan->getLimitOffset());
cancellationHandle_,
locatedTriplesState().getLocatedTriplesForPermutation<false>(
permutationEnum.value()),
indexScan->getLimitOffset());
if (numCounts == 0) {
table.setColumnSubset(std::array{ColumnIndex{0}});
} else if (!variableIsBoundInSubtree) {
Expand Down
9 changes: 5 additions & 4 deletions src/engine/HasPredicateScan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -420,9 +420,10 @@ std::shared_ptr<QueryExecutionTree> HasPredicateScan::makePatternScan(
std::move(subject),
ad_utility::triple_component::Iri::fromIriref(HAS_PATTERN_PREDICATE),
TripleComponent{std::move(object)}};
auto [permutation, locatedTriples] =
qlever::getPermutationAndLocatedTriplesPerBlockForTriple(
Permutation::Enum::PSO, qec->getIndex(),
qec->locatedTriplesSharedState(), triple);
return ad_utility::makeExecutionTree<IndexScan>(
qec,
qlever::getPermutationForTriple(Permutation::Enum::PSO, qec->getIndex(),
triple),
qec->locatedTriplesSharedState(), triple);
qec, std::move(permutation), std::move(locatedTriples), triple);
}
73 changes: 45 additions & 28 deletions src/engine/IndexScan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include <string>
#include <utility>

#include "engine/PermutationSelector.h"
#include "engine/QueryExecutionTree.h"
#include "index/IndexImpl.h"
#include "parser/ParsedQuery.h"
Expand All @@ -33,13 +34,13 @@ static size_t getNumberOfVariables(const TripleComponent& subject,

// _____________________________________________________________________________
IndexScan::IndexScan(QueryExecutionContext* qec, PermutationPtr permutation,
LocatedTriplesSharedState locatedTriplesSharedState,
LocatedTriplesPerBlockPtr locatedTriplesPerBlock,
const SparqlTripleSimple& triple, Graphs graphsToFilter,
std::optional<ScanSpecAndBlocks> scanSpecAndBlocks,
VarsToKeep varsToKeep)
: Operation(qec),
permutation_(std::move(permutation)),
locatedTriplesSharedState_(std::move(locatedTriplesSharedState)),
locatedTriplesPerBlock_(std::move(locatedTriplesPerBlock)),
subject_(triple.s_),
predicate_(triple.p_),
object_(triple.o_),
Expand All @@ -50,7 +51,7 @@ IndexScan::IndexScan(QueryExecutionContext* qec, PermutationPtr permutation,
numVariables_(getNumberOfVariables(subject_, predicate_, object_)),
varsToKeep_(std::move(varsToKeep)) {
AD_CONTRACT_CHECK(permutation_ != nullptr);
AD_CONTRACT_CHECK(locatedTriplesSharedState_ != nullptr);
AD_CONTRACT_CHECK(locatedTriplesPerBlock_ != nullptr);

// We previously had `nullptr`s here in unit tests. This is no longer
// necessary nor allowed.
Expand Down Expand Up @@ -79,14 +80,15 @@ IndexScan::IndexScan(QueryExecutionContext* qec,
Permutation::Enum permutationType,
const SparqlTripleSimple& triple, Graphs graphsToFilter,
std::optional<ScanSpecAndBlocks> scanSpecAndBlocks)
: IndexScan(qec,
qec->getIndex().getImpl().getPermutationPtr(permutationType),
qec->locatedTriplesSharedState(), triple,
std::move(graphsToFilter), std::move(scanSpecAndBlocks)) {}
: IndexScan(
qec, qec->getIndex().getImpl().getPermutationPtr(permutationType),
qlever::getLocatedTriplesPerBlockForTriple(
permutationType, qec->locatedTriplesSharedState(), triple),
triple, std::move(graphsToFilter), std::move(scanSpecAndBlocks)) {}

// _____________________________________________________________________________
IndexScan::IndexScan(QueryExecutionContext* qec, PermutationPtr permutation,
LocatedTriplesSharedState locatedTriplesSharedState,
LocatedTriplesPerBlockPtr locatedTriplesPerBlock,
const TripleComponent& s, const TripleComponent& p,
const TripleComponent& o,
std::vector<ColumnIndex> additionalColumns,
Expand All @@ -95,7 +97,7 @@ IndexScan::IndexScan(QueryExecutionContext* qec, PermutationPtr permutation,
bool scanSpecAndBlocksIsPrefiltered, VarsToKeep varsToKeep)
: Operation(qec),
permutation_(std::move(permutation)),
locatedTriplesSharedState_(std::move(locatedTriplesSharedState)),
locatedTriplesPerBlock_(std::move(locatedTriplesPerBlock)),
subject_(s),
predicate_(p),
object_(o),
Expand All @@ -108,7 +110,7 @@ IndexScan::IndexScan(QueryExecutionContext* qec, PermutationPtr permutation,
varsToKeep_{std::move(varsToKeep)} {
AD_CONTRACT_CHECK(qec != nullptr);
AD_CONTRACT_CHECK(permutation_ != nullptr);
AD_CONTRACT_CHECK(locatedTriplesSharedState_ != nullptr);
AD_CONTRACT_CHECK(locatedTriplesPerBlock_ != nullptr);
std::tie(sizeEstimateIsExact_, sizeEstimate_) = computeSizeEstimate();
determineMultiplicities();
}
Expand Down Expand Up @@ -261,7 +263,7 @@ std::shared_ptr<QueryExecutionTree>
IndexScan::makeCopyWithPrefilteredScanSpecAndBlocks(
ScanSpecAndBlocks scanSpecAndBlocks) const {
return ad_utility::makeExecutionTree<IndexScan>(
getExecutionContext(), permutation_, locatedTriplesSharedState_, subject_,
getExecutionContext(), permutation_, locatedTriplesPerBlock_, subject_,
predicate_, object_, additionalColumns_, additionalVariables_,
graphsToFilter_, std::move(scanSpecAndBlocks), true, varsToKeep_);
}
Expand All @@ -276,9 +278,9 @@ Result::LazyResult IndexScan::chunkedIndexScan() const {

// _____________________________________________________________________________
IdTable IndexScan::materializedIndexScan() const {
IdTable idTable = permutation().scan(scanSpecAndBlocks_, additionalColumns(),
cancellationHandle_,
locatedTriplesState(), getLimitOffset());
IdTable idTable = permutation().scan(
scanSpecAndBlocks_, additionalColumns(), cancellationHandle_,
locatedTriplesPerBlock(), getLimitOffset());
AD_LOG_DEBUG << "IndexScan result computation done.\n";
checkCancellation();
idTable = makeApplyColumnSubset()(std::move(idTable));
Expand All @@ -302,16 +304,16 @@ const Permutation& IndexScan::permutation() const {
}

// _____________________________________________________________________________
const LocatedTriplesState& IndexScan::locatedTriplesState() const {
AD_CONTRACT_CHECK(locatedTriplesSharedState_ != nullptr);
return *locatedTriplesSharedState_;
const LocatedTriplesPerBlock& IndexScan::locatedTriplesPerBlock() const {
AD_CONTRACT_CHECK(locatedTriplesPerBlock_ != nullptr);
return *locatedTriplesPerBlock_;
}

// _____________________________________________________________________________
std::pair<bool, size_t> IndexScan::computeSizeEstimate() const {
AD_CORRECTNESS_CHECK(_executionContext);
auto [lower, upper] = permutation().getSizeEstimateForScan(
scanSpecAndBlocks_, locatedTriplesState());
scanSpecAndBlocks_, locatedTriplesPerBlock());
// NOTE: Starting from C++20 we could use `std::midpoint` here
return {lower == upper, lower + (upper - lower) / 2};
}
Expand All @@ -320,7 +322,7 @@ std::pair<bool, size_t> IndexScan::computeSizeEstimate() const {
size_t IndexScan::getExactSize() const {
AD_CORRECTNESS_CHECK(_executionContext);
return permutation().getResultSizeOfScan(scanSpecAndBlocks_,
locatedTriplesState());
locatedTriplesPerBlock());
}

// _____________________________________________________________________________
Expand All @@ -340,8 +342,7 @@ void IndexScan::determineMultiplicities() {
// There are no duplicate triples in RDF and two elements are fixed.
return {1.0f};
} else if (numVariables_ == 2) {
return idx.getMultiplicities(*getPermutedTriple()[0], permutation(),
locatedTriplesState());
return getMultiplicities(*getPermutedTriple()[0]);
} else {
AD_CORRECTNESS_CHECK(numVariables_ == 3);
return idx.getMultiplicities(permutation());
Expand Down Expand Up @@ -396,9 +397,10 @@ IndexScan::getSortedVariableAndMetadataColumnIndexForPrefiltering() const {
}

// ___________________________________________________________________________
Permutation::ScanSpecAndBlocks IndexScan::getScanSpecAndBlocks() const {
return permutation().getScanSpecAndBlocks(getScanSpecification(),
locatedTriplesState());
CompressedRelationReader::ScanSpecAndBlocks IndexScan::getScanSpecAndBlocks()
const {
return CompressedRelationReader::ScanSpecAndBlocks::withUpdates(
getScanSpecification(), locatedTriplesPerBlock());
}

// _____________________________________________________________________________
Expand All @@ -412,7 +414,7 @@ CompressedRelationReader::IdTableGeneratorInputRange IndexScan::getLazyScan(
getLimitOffset().isUnconstrained() ? std::move(blocks) : std::nullopt;
auto lazyScanAllCols = permutation().lazyScan(
scanSpecAndBlocks_, filteredBlocks, additionalColumns(),
cancellationHandle_, locatedTriplesState(), getLimitOffset());
cancellationHandle_, locatedTriplesPerBlock(), getLimitOffset());

return CompressedRelationReader::IdTableGeneratorInputRange{
ad_utility::CachingTransformInputRange<
Expand All @@ -426,7 +428,7 @@ CompressedRelationReader::IdTableGeneratorInputRange IndexScan::getLazyScan(
std::optional<Permutation::MetadataAndBlocks> IndexScan::getMetadataForScan()
const {
return permutation().getMetadataAndBlocks(scanSpecAndBlocks_,
locatedTriplesState());
locatedTriplesPerBlock());
};

// _____________________________________________________________________________
Expand Down Expand Up @@ -747,7 +749,7 @@ std::pair<Result::LazyResult, Result::LazyResult> IndexScan::prefilterTables(
// _____________________________________________________________________________
std::unique_ptr<Operation> IndexScan::cloneImpl() const {
return std::make_unique<IndexScan>(
_executionContext, permutation_, locatedTriplesSharedState_, subject_,
_executionContext, permutation_, locatedTriplesPerBlock_, subject_,
predicate_, object_, additionalColumns_, additionalVariables_,
graphsToFilter_, scanSpecAndBlocks_, scanSpecAndBlocksIsPrefiltered_,
varsToKeep_);
Expand All @@ -772,7 +774,7 @@ IndexScan::makeTreeWithStrippedColumns(
}

return ad_utility::makeExecutionTree<IndexScan>(
_executionContext, permutation_, locatedTriplesSharedState_, subject_,
_executionContext, permutation_, locatedTriplesPerBlock_, subject_,
predicate_, object_, additionalColumns_, additionalVariables_,
graphsToFilter_, scanSpecAndBlocks_, scanSpecAndBlocksIsPrefiltered_,
VarsToKeep{std::move(newVariables)});
Expand Down Expand Up @@ -800,3 +802,18 @@ std::vector<ColumnIndex> IndexScan::getSubsetForStrippedColumns() const {
}
return result;
}

// _____________________________________________________________________________
std::vector<float> IndexScan::getMultiplicities(
const TripleComponent& key) const {
const auto& idx = getIndex();
if (auto keyId = key.toValueId(idx.getVocab(), idx.encodedIriManager())) {
auto meta =
permutation().getMetadata(keyId.value(), locatedTriplesPerBlock());
if (meta.has_value()) {
return {meta.value().getCol1Multiplicity(),
meta.value().getCol2Multiplicity()};
}
}
return {1.0f, 1.0f};
}
19 changes: 13 additions & 6 deletions src/engine/IndexScan.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,16 +15,20 @@ class SparqlTriple;
class SparqlTripleSimple;

class IndexScan final : public Operation {
FRIEND_TEST(IndexScanTest, getMultiplicities);

public:
using Graphs = ScanSpecificationAsTripleComponent::GraphFilter;
using PermutationPtr = std::shared_ptr<const Permutation>;
using LocatedTriplesPerBlockPtr =
std::shared_ptr<const LocatedTriplesPerBlock>;

private:
using ScanSpecAndBlocks = Permutation::ScanSpecAndBlocks;

private:
PermutationPtr permutation_;
LocatedTriplesSharedState locatedTriplesSharedState_;
LocatedTriplesPerBlockPtr locatedTriplesPerBlock_;
TripleComponent subject_;
TripleComponent predicate_;
TripleComponent object_;
Expand All @@ -50,7 +54,7 @@ class IndexScan final : public Operation {

public:
IndexScan(QueryExecutionContext* qec, PermutationPtr permutation,
LocatedTriplesSharedState locatedTriplesSharedState,
LocatedTriplesPerBlockPtr locatedTriplesPerBlock,
const SparqlTripleSimple& triple,
Graphs graphsToFilter = Graphs::All(),
std::optional<ScanSpecAndBlocks> scanSpecAndBlocks = std::nullopt,
Expand All @@ -66,7 +70,7 @@ class IndexScan final : public Operation {

// Constructor to simplify copy creation of an `IndexScan`.
IndexScan(QueryExecutionContext* qec, PermutationPtr permutation,
LocatedTriplesSharedState locatedTriplesSharedState,
LocatedTriplesPerBlockPtr locatedTriplesPerBlock,
const TripleComponent& s, const TripleComponent& p,
const TripleComponent& o,
std::vector<ColumnIndex> additionalColumns,
Expand Down Expand Up @@ -185,9 +189,10 @@ class IndexScan final : public Operation {

// Instead of using the `LocatedTriplesSnapshot` of the `Operation` base
// class, which accesses the one stored in the `QueryExecutionContext`, use
// the `LocatedTriplesSnapshot` held in this object. This might be a different
// one if a custom permutation is used.
const LocatedTriplesState& locatedTriplesState() const override;
// the `LocatedTriplesPerBlock` held in this object. This already is exactly
// the located triples for the permutation of the index scan.
// `locatedTriplesState` should not be used in `IndexScan`.
const LocatedTriplesPerBlock& locatedTriplesPerBlock() const;

// Return the stored triple in the order that corresponds to the
// `permutation_`. For example if `permutation_ == PSO` then the result is
Expand Down Expand Up @@ -287,6 +292,8 @@ class IndexScan final : public Operation {
};
}

std::vector<float> getMultiplicities(const TripleComponent& key) const;

public:
std::optional<std::shared_ptr<QueryExecutionTree>>
makeTreeWithStrippedColumns(
Expand Down
28 changes: 11 additions & 17 deletions src/engine/MaterializedViews.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -296,7 +296,7 @@ void MaterializedViewWriter::computeResultAndWritePermutation() const {
MaterializedView::MaterializedView(std::string onDiskBase, std::string name)
: onDiskBase_{std::move(onDiskBase)},
name_{std::move(name)},
locatedTriplesState_{makeEmptyLocatedTriplesState()} {
locatedTriplesPerBlock_{makeEmptyLocatedTriplesPerBlock()} {
AD_CORRECTNESS_CHECK(onDiskBase_ != "",
"The index base filename was not set.");
throwIfInvalidName(name_);
Expand Down Expand Up @@ -517,22 +517,17 @@ void MaterializedViewsManager::setOnDiskBase(const std::string& onDiskBase) {
}

// _____________________________________________________________________________
LocatedTriplesSharedState MaterializedView::locatedTriplesState() const {
return {locatedTriplesState_};
std::shared_ptr<const LocatedTriplesPerBlock>
MaterializedView::locatedTriplesPerBlock() const {
return locatedTriplesPerBlock_;
}

// _____________________________________________________________________________
std::shared_ptr<LocatedTriplesState>
MaterializedView::makeEmptyLocatedTriplesState() const {
LocatedTriplesPerBlockAllPermutations<false> emptyLocatedTriples;
emptyLocatedTriples.at(static_cast<size_t>(permutation_->permutation()))
.setOriginalMetadata(permutation_->metaData().blockDataShared());
LocatedTriplesPerBlockAllPermutations<true> emptyInternalLocatedTriples;
LocalVocab emptyVocab;

return std::make_shared<LocatedTriplesState>(
emptyLocatedTriples, emptyInternalLocatedTriples,
emptyVocab.getLifetimeExtender(), 0);
std::shared_ptr<LocatedTriplesPerBlock>
MaterializedView::makeEmptyLocatedTriplesPerBlock() const {
auto ltpb = std::make_shared<LocatedTriplesPerBlock>();
ltpb->setOriginalMetadata(permutation_->metaData().blockDataShared());
return ltpb;
}

// _____________________________________________________________________________
Expand All @@ -549,9 +544,8 @@ std::shared_ptr<IndexScan> MaterializedView::makeIndexScan(
// query.
auto scanTriple = makeScanConfig(viewQuery);
return std::make_shared<IndexScan>(
qec, permutation_, LocatedTriplesSharedState{locatedTriplesState_},
std::move(scanTriple), IndexScan::Graphs::All(), std::nullopt,
viewQuery.getVarsToKeep());
qec, permutation_, locatedTriplesPerBlock_, std::move(scanTriple),
IndexScan::Graphs::All(), std::nullopt, viewQuery.getVarsToKeep());
}

// _____________________________________________________________________________
Expand Down
Loading
Loading