diff --git a/src/engine/CountAvailablePredicates.cpp b/src/engine/CountAvailablePredicates.cpp index a251abf407..02799b2b0a 100644 --- a/src/engine/CountAvailablePredicates.cpp +++ b/src/engine/CountAvailablePredicates.cpp @@ -168,11 +168,13 @@ void CountAvailablePredicates::computePatternTrickAllEntities( TripleComponent::Iri::fromIriref(HAS_PATTERN_PREDICATE), std::nullopt, std::nullopt} .toScanSpecification(index); - const auto& perm = index.getPermutation(Permutation::Enum::PSO); - const auto& locatedTriple = locatedTriplesState(); - auto fullHasPattern = - perm.lazyScan(perm.getScanSpecAndBlocks(scanSpec, locatedTriple), - std::nullopt, {}, cancellationHandle_, locatedTriple); + const auto perm = Permutation::Enum::PSO; + const auto& locatedTriples = + locatedTriplesState().getLocatedTriplesForPermutation(perm); + auto fullHasPattern = index.getPermutation(perm).lazyScan( + CompressedRelationReader::ScanSpecAndBlocks::withUpdates(scanSpec, + locatedTriples), + std::nullopt, {}, cancellationHandle_, locatedTriples); for (const auto& idTable : fullHasPattern) { for (const auto& patternId : idTable.getColumn(1)) { AD_CORRECTNESS_CHECK(patternId.getDatatype() == Datatype::Int); diff --git a/src/engine/GroupByImpl.cpp b/src/engine/GroupByImpl.cpp index 9439b547d4..68e198fa55 100644 --- a/src/engine/GroupByImpl.cpp +++ b/src/engine/GroupByImpl.cpp @@ -840,8 +840,9 @@ std::optional GroupByImpl::computeGroupByObjectWithCount() const { // Compute the result and update the runtime information (we don't actually // do the index scan, but something smarter). const auto& permutation = indexScan->permutation(); + const auto& locatedTriples = indexScan->locatedTriplesPerBlock(); auto result = permutation.getDistinctCol1IdsAndCounts( - col0Id.value(), cancellationHandle_, locatedTriplesState(), + col0Id.value(), cancellationHandle_, locatedTriples, indexScan->getLimitOffset()); indexScan->updateRuntimeInformationWhenOptimizedOut({}); @@ -900,7 +901,10 @@ std::optional GroupByImpl::computeGroupByForFullIndexScan() const { getExecutionContext()->getIndex().getPimpl().getPermutation( permutationEnum.value()); auto table = permutation.getDistinctCol0IdsAndCounts( - cancellationHandle_, locatedTriplesState(), indexScan->getLimitOffset()); + cancellationHandle_, + locatedTriplesState().getLocatedTriplesForPermutation( + permutationEnum.value()), + indexScan->getLimitOffset()); if (numCounts == 0) { table.setColumnSubset(std::array{ColumnIndex{0}}); } else if (!variableIsBoundInSubtree) { diff --git a/src/engine/HasPredicateScan.cpp b/src/engine/HasPredicateScan.cpp index e48e997408..4097b64b21 100644 --- a/src/engine/HasPredicateScan.cpp +++ b/src/engine/HasPredicateScan.cpp @@ -420,9 +420,10 @@ std::shared_ptr HasPredicateScan::makePatternScan( std::move(subject), ad_utility::triple_component::Iri::fromIriref(HAS_PATTERN_PREDICATE), TripleComponent{std::move(object)}}; + auto [permutation, locatedTriples] = + qlever::getPermutationAndLocatedTriplesPerBlockForTriple( + Permutation::Enum::PSO, qec->getIndex(), + qec->locatedTriplesSharedState(), triple); return ad_utility::makeExecutionTree( - qec, - qlever::getPermutationForTriple(Permutation::Enum::PSO, qec->getIndex(), - triple), - qec->locatedTriplesSharedState(), triple); + qec, std::move(permutation), std::move(locatedTriples), triple); } diff --git a/src/engine/IndexScan.cpp b/src/engine/IndexScan.cpp index 9dad45f671..a53cfdcf2e 100644 --- a/src/engine/IndexScan.cpp +++ b/src/engine/IndexScan.cpp @@ -11,6 +11,7 @@ #include #include +#include "engine/PermutationSelector.h" #include "engine/QueryExecutionTree.h" #include "index/IndexImpl.h" #include "parser/ParsedQuery.h" @@ -33,13 +34,13 @@ static size_t getNumberOfVariables(const TripleComponent& subject, // _____________________________________________________________________________ IndexScan::IndexScan(QueryExecutionContext* qec, PermutationPtr permutation, - LocatedTriplesSharedState locatedTriplesSharedState, + LocatedTriplesPerBlockPtr locatedTriplesPerBlock, const SparqlTripleSimple& triple, Graphs graphsToFilter, std::optional scanSpecAndBlocks, VarsToKeep varsToKeep) : Operation(qec), permutation_(std::move(permutation)), - locatedTriplesSharedState_(std::move(locatedTriplesSharedState)), + locatedTriplesPerBlock_(std::move(locatedTriplesPerBlock)), subject_(triple.s_), predicate_(triple.p_), object_(triple.o_), @@ -50,7 +51,7 @@ IndexScan::IndexScan(QueryExecutionContext* qec, PermutationPtr permutation, numVariables_(getNumberOfVariables(subject_, predicate_, object_)), varsToKeep_(std::move(varsToKeep)) { AD_CONTRACT_CHECK(permutation_ != nullptr); - AD_CONTRACT_CHECK(locatedTriplesSharedState_ != nullptr); + AD_CONTRACT_CHECK(locatedTriplesPerBlock_ != nullptr); // We previously had `nullptr`s here in unit tests. This is no longer // necessary nor allowed. @@ -79,14 +80,15 @@ IndexScan::IndexScan(QueryExecutionContext* qec, Permutation::Enum permutationType, const SparqlTripleSimple& triple, Graphs graphsToFilter, std::optional scanSpecAndBlocks) - : IndexScan(qec, - qec->getIndex().getImpl().getPermutationPtr(permutationType), - qec->locatedTriplesSharedState(), triple, - std::move(graphsToFilter), std::move(scanSpecAndBlocks)) {} + : IndexScan( + qec, qec->getIndex().getImpl().getPermutationPtr(permutationType), + qlever::getLocatedTriplesPerBlockForTriple( + permutationType, qec->locatedTriplesSharedState(), triple), + triple, std::move(graphsToFilter), std::move(scanSpecAndBlocks)) {} // _____________________________________________________________________________ IndexScan::IndexScan(QueryExecutionContext* qec, PermutationPtr permutation, - LocatedTriplesSharedState locatedTriplesSharedState, + LocatedTriplesPerBlockPtr locatedTriplesPerBlock, const TripleComponent& s, const TripleComponent& p, const TripleComponent& o, std::vector additionalColumns, @@ -95,7 +97,7 @@ IndexScan::IndexScan(QueryExecutionContext* qec, PermutationPtr permutation, bool scanSpecAndBlocksIsPrefiltered, VarsToKeep varsToKeep) : Operation(qec), permutation_(std::move(permutation)), - locatedTriplesSharedState_(std::move(locatedTriplesSharedState)), + locatedTriplesPerBlock_(std::move(locatedTriplesPerBlock)), subject_(s), predicate_(p), object_(o), @@ -108,7 +110,7 @@ IndexScan::IndexScan(QueryExecutionContext* qec, PermutationPtr permutation, varsToKeep_{std::move(varsToKeep)} { AD_CONTRACT_CHECK(qec != nullptr); AD_CONTRACT_CHECK(permutation_ != nullptr); - AD_CONTRACT_CHECK(locatedTriplesSharedState_ != nullptr); + AD_CONTRACT_CHECK(locatedTriplesPerBlock_ != nullptr); std::tie(sizeEstimateIsExact_, sizeEstimate_) = computeSizeEstimate(); determineMultiplicities(); } @@ -261,7 +263,7 @@ std::shared_ptr IndexScan::makeCopyWithPrefilteredScanSpecAndBlocks( ScanSpecAndBlocks scanSpecAndBlocks) const { return ad_utility::makeExecutionTree( - getExecutionContext(), permutation_, locatedTriplesSharedState_, subject_, + getExecutionContext(), permutation_, locatedTriplesPerBlock_, subject_, predicate_, object_, additionalColumns_, additionalVariables_, graphsToFilter_, std::move(scanSpecAndBlocks), true, varsToKeep_); } @@ -276,9 +278,9 @@ Result::LazyResult IndexScan::chunkedIndexScan() const { // _____________________________________________________________________________ IdTable IndexScan::materializedIndexScan() const { - IdTable idTable = permutation().scan(scanSpecAndBlocks_, additionalColumns(), - cancellationHandle_, - locatedTriplesState(), getLimitOffset()); + IdTable idTable = permutation().scan( + scanSpecAndBlocks_, additionalColumns(), cancellationHandle_, + locatedTriplesPerBlock(), getLimitOffset()); AD_LOG_DEBUG << "IndexScan result computation done.\n"; checkCancellation(); idTable = makeApplyColumnSubset()(std::move(idTable)); @@ -302,16 +304,16 @@ const Permutation& IndexScan::permutation() const { } // _____________________________________________________________________________ -const LocatedTriplesState& IndexScan::locatedTriplesState() const { - AD_CONTRACT_CHECK(locatedTriplesSharedState_ != nullptr); - return *locatedTriplesSharedState_; +const LocatedTriplesPerBlock& IndexScan::locatedTriplesPerBlock() const { + AD_CONTRACT_CHECK(locatedTriplesPerBlock_ != nullptr); + return *locatedTriplesPerBlock_; } // _____________________________________________________________________________ std::pair IndexScan::computeSizeEstimate() const { AD_CORRECTNESS_CHECK(_executionContext); auto [lower, upper] = permutation().getSizeEstimateForScan( - scanSpecAndBlocks_, locatedTriplesState()); + scanSpecAndBlocks_, locatedTriplesPerBlock()); // NOTE: Starting from C++20 we could use `std::midpoint` here return {lower == upper, lower + (upper - lower) / 2}; } @@ -320,7 +322,7 @@ std::pair IndexScan::computeSizeEstimate() const { size_t IndexScan::getExactSize() const { AD_CORRECTNESS_CHECK(_executionContext); return permutation().getResultSizeOfScan(scanSpecAndBlocks_, - locatedTriplesState()); + locatedTriplesPerBlock()); } // _____________________________________________________________________________ @@ -340,8 +342,7 @@ void IndexScan::determineMultiplicities() { // There are no duplicate triples in RDF and two elements are fixed. return {1.0f}; } else if (numVariables_ == 2) { - return idx.getMultiplicities(*getPermutedTriple()[0], permutation(), - locatedTriplesState()); + return getMultiplicities(*getPermutedTriple()[0]); } else { AD_CORRECTNESS_CHECK(numVariables_ == 3); return idx.getMultiplicities(permutation()); @@ -396,9 +397,10 @@ IndexScan::getSortedVariableAndMetadataColumnIndexForPrefiltering() const { } // ___________________________________________________________________________ -Permutation::ScanSpecAndBlocks IndexScan::getScanSpecAndBlocks() const { - return permutation().getScanSpecAndBlocks(getScanSpecification(), - locatedTriplesState()); +CompressedRelationReader::ScanSpecAndBlocks IndexScan::getScanSpecAndBlocks() + const { + return CompressedRelationReader::ScanSpecAndBlocks::withUpdates( + getScanSpecification(), locatedTriplesPerBlock()); } // _____________________________________________________________________________ @@ -412,7 +414,7 @@ CompressedRelationReader::IdTableGeneratorInputRange IndexScan::getLazyScan( getLimitOffset().isUnconstrained() ? std::move(blocks) : std::nullopt; auto lazyScanAllCols = permutation().lazyScan( scanSpecAndBlocks_, filteredBlocks, additionalColumns(), - cancellationHandle_, locatedTriplesState(), getLimitOffset()); + cancellationHandle_, locatedTriplesPerBlock(), getLimitOffset()); return CompressedRelationReader::IdTableGeneratorInputRange{ ad_utility::CachingTransformInputRange< @@ -426,7 +428,7 @@ CompressedRelationReader::IdTableGeneratorInputRange IndexScan::getLazyScan( std::optional IndexScan::getMetadataForScan() const { return permutation().getMetadataAndBlocks(scanSpecAndBlocks_, - locatedTriplesState()); + locatedTriplesPerBlock()); }; // _____________________________________________________________________________ @@ -747,7 +749,7 @@ std::pair IndexScan::prefilterTables( // _____________________________________________________________________________ std::unique_ptr IndexScan::cloneImpl() const { return std::make_unique( - _executionContext, permutation_, locatedTriplesSharedState_, subject_, + _executionContext, permutation_, locatedTriplesPerBlock_, subject_, predicate_, object_, additionalColumns_, additionalVariables_, graphsToFilter_, scanSpecAndBlocks_, scanSpecAndBlocksIsPrefiltered_, varsToKeep_); @@ -772,7 +774,7 @@ IndexScan::makeTreeWithStrippedColumns( } return ad_utility::makeExecutionTree( - _executionContext, permutation_, locatedTriplesSharedState_, subject_, + _executionContext, permutation_, locatedTriplesPerBlock_, subject_, predicate_, object_, additionalColumns_, additionalVariables_, graphsToFilter_, scanSpecAndBlocks_, scanSpecAndBlocksIsPrefiltered_, VarsToKeep{std::move(newVariables)}); @@ -800,3 +802,18 @@ std::vector IndexScan::getSubsetForStrippedColumns() const { } return result; } + +// _____________________________________________________________________________ +std::vector IndexScan::getMultiplicities( + const TripleComponent& key) const { + const auto& idx = getIndex(); + if (auto keyId = key.toValueId(idx.getVocab(), idx.encodedIriManager())) { + auto meta = + permutation().getMetadata(keyId.value(), locatedTriplesPerBlock()); + if (meta.has_value()) { + return {meta.value().getCol1Multiplicity(), + meta.value().getCol2Multiplicity()}; + } + } + return {1.0f, 1.0f}; +} diff --git a/src/engine/IndexScan.h b/src/engine/IndexScan.h index ed3f318a2e..48dda9e978 100644 --- a/src/engine/IndexScan.h +++ b/src/engine/IndexScan.h @@ -15,16 +15,20 @@ class SparqlTriple; class SparqlTripleSimple; class IndexScan final : public Operation { + FRIEND_TEST(IndexScanTest, getMultiplicities); + public: using Graphs = ScanSpecificationAsTripleComponent::GraphFilter; using PermutationPtr = std::shared_ptr; + using LocatedTriplesPerBlockPtr = + std::shared_ptr; private: using ScanSpecAndBlocks = Permutation::ScanSpecAndBlocks; private: PermutationPtr permutation_; - LocatedTriplesSharedState locatedTriplesSharedState_; + LocatedTriplesPerBlockPtr locatedTriplesPerBlock_; TripleComponent subject_; TripleComponent predicate_; TripleComponent object_; @@ -50,7 +54,7 @@ class IndexScan final : public Operation { public: IndexScan(QueryExecutionContext* qec, PermutationPtr permutation, - LocatedTriplesSharedState locatedTriplesSharedState, + LocatedTriplesPerBlockPtr locatedTriplesPerBlock, const SparqlTripleSimple& triple, Graphs graphsToFilter = Graphs::All(), std::optional scanSpecAndBlocks = std::nullopt, @@ -66,7 +70,7 @@ class IndexScan final : public Operation { // Constructor to simplify copy creation of an `IndexScan`. IndexScan(QueryExecutionContext* qec, PermutationPtr permutation, - LocatedTriplesSharedState locatedTriplesSharedState, + LocatedTriplesPerBlockPtr locatedTriplesPerBlock, const TripleComponent& s, const TripleComponent& p, const TripleComponent& o, std::vector additionalColumns, @@ -185,9 +189,10 @@ class IndexScan final : public Operation { // Instead of using the `LocatedTriplesSnapshot` of the `Operation` base // class, which accesses the one stored in the `QueryExecutionContext`, use - // the `LocatedTriplesSnapshot` held in this object. This might be a different - // one if a custom permutation is used. - const LocatedTriplesState& locatedTriplesState() const override; + // the `LocatedTriplesPerBlock` held in this object. This already is exactly + // the located triples for the permutation of the index scan. + // `locatedTriplesState` should not be used in `IndexScan`. + const LocatedTriplesPerBlock& locatedTriplesPerBlock() const; // Return the stored triple in the order that corresponds to the // `permutation_`. For example if `permutation_ == PSO` then the result is @@ -287,6 +292,8 @@ class IndexScan final : public Operation { }; } + std::vector getMultiplicities(const TripleComponent& key) const; + public: std::optional> makeTreeWithStrippedColumns( diff --git a/src/engine/MaterializedViews.cpp b/src/engine/MaterializedViews.cpp index ea6f482e70..a273faa563 100644 --- a/src/engine/MaterializedViews.cpp +++ b/src/engine/MaterializedViews.cpp @@ -296,7 +296,7 @@ void MaterializedViewWriter::computeResultAndWritePermutation() const { MaterializedView::MaterializedView(std::string onDiskBase, std::string name) : onDiskBase_{std::move(onDiskBase)}, name_{std::move(name)}, - locatedTriplesState_{makeEmptyLocatedTriplesState()} { + locatedTriplesPerBlock_{makeEmptyLocatedTriplesPerBlock()} { AD_CORRECTNESS_CHECK(onDiskBase_ != "", "The index base filename was not set."); throwIfInvalidName(name_); @@ -517,22 +517,17 @@ void MaterializedViewsManager::setOnDiskBase(const std::string& onDiskBase) { } // _____________________________________________________________________________ -LocatedTriplesSharedState MaterializedView::locatedTriplesState() const { - return {locatedTriplesState_}; +std::shared_ptr +MaterializedView::locatedTriplesPerBlock() const { + return locatedTriplesPerBlock_; } // _____________________________________________________________________________ -std::shared_ptr -MaterializedView::makeEmptyLocatedTriplesState() const { - LocatedTriplesPerBlockAllPermutations emptyLocatedTriples; - emptyLocatedTriples.at(static_cast(permutation_->permutation())) - .setOriginalMetadata(permutation_->metaData().blockDataShared()); - LocatedTriplesPerBlockAllPermutations emptyInternalLocatedTriples; - LocalVocab emptyVocab; - - return std::make_shared( - emptyLocatedTriples, emptyInternalLocatedTriples, - emptyVocab.getLifetimeExtender(), 0); +std::shared_ptr +MaterializedView::makeEmptyLocatedTriplesPerBlock() const { + auto ltpb = std::make_shared(); + ltpb->setOriginalMetadata(permutation_->metaData().blockDataShared()); + return ltpb; } // _____________________________________________________________________________ @@ -549,9 +544,8 @@ std::shared_ptr MaterializedView::makeIndexScan( // query. auto scanTriple = makeScanConfig(viewQuery); return std::make_shared( - qec, permutation_, LocatedTriplesSharedState{locatedTriplesState_}, - std::move(scanTriple), IndexScan::Graphs::All(), std::nullopt, - viewQuery.getVarsToKeep()); + qec, permutation_, locatedTriplesPerBlock_, std::move(scanTriple), + IndexScan::Graphs::All(), std::nullopt, viewQuery.getVarsToKeep()); } // _____________________________________________________________________________ diff --git a/src/engine/MaterializedViews.h b/src/engine/MaterializedViews.h index 5b3ad0082c..4a870812cf 100644 --- a/src/engine/MaterializedViews.h +++ b/src/engine/MaterializedViews.h @@ -148,13 +148,14 @@ class MaterializedView { std::shared_ptr permutation_{std::make_shared( Permutation::Enum::SPO, ad_utility::makeUnlimitedAllocator())}; VariableToColumnMap varToColMap_; - std::shared_ptr locatedTriplesState_; + std::shared_ptr locatedTriplesPerBlock_; using AdditionalScanColumns = SparqlTripleSimple::AdditionalScanColumns; - // Helper to create an empty `LocatedTriplesState` for `IndexScan`s as + // Helper to create an empty `LocatedTriplesPerBlock` for `IndexScan`s as // materialized views do not support updates yet. - std::shared_ptr makeEmptyLocatedTriplesState() const; + std::shared_ptr makeEmptyLocatedTriplesPerBlock() + const; public: // Load a materialized view from disk given the filename components. The @@ -181,10 +182,10 @@ class MaterializedView { // `nullptr`. std::shared_ptr permutation() const; - // Return a reference to the `LocatedTriplesSnapshot` for the permutation. For + // Return a reference to the `LocatedTriplesPerBlock` for the permutation. For // now this is always an empty snapshot but with the correct permutation // metadata. - LocatedTriplesSharedState locatedTriplesState() const; + std::shared_ptr locatedTriplesPerBlock() const; // Checks if the given name is allowed for a materialized view. Currently only // alphanumerics and hyphens are allowed. This is relevant for safe filenames diff --git a/src/engine/PermutationSelector.cpp b/src/engine/PermutationSelector.cpp index 0949cbb21a..0b6ee906f7 100644 --- a/src/engine/PermutationSelector.cpp +++ b/src/engine/PermutationSelector.cpp @@ -4,6 +4,8 @@ #include "engine/PermutationSelector.h" +#include + #include "index/IndexImpl.h" namespace { @@ -34,9 +36,9 @@ bool containsInternalIri(const SparqlTripleSimple& triple) { namespace qlever { // _____________________________________________________________________________ -std::shared_ptr getPermutationForTriple( - Permutation::Enum permutation, const Index& index, - const SparqlTripleSimple& triple) { +PermutationPtr getPermutationForTriple(Permutation::Enum permutation, + const Index& index, + const SparqlTripleSimple& triple) { auto actualPermutation = index.getImpl().getPermutationPtr(permutation); if (containsInternalIri(triple)) { @@ -47,4 +49,26 @@ std::shared_ptr getPermutationForTriple( } return actualPermutation; } + +// _____________________________________________________________________________ +LocatedTriplesPerBlockPtr getLocatedTriplesPerBlockForTriple( + Permutation::Enum permutation, LocatedTriplesSharedState snapshot, + const SparqlTripleSimple& triple) { + // Create alias shared pointer of internal the right `LocatedTriplesPerBlock`. + const auto& locatedTriples = + containsInternalIri(triple) + ? snapshot->getLocatedTriplesForPermutation(permutation) + : snapshot->getLocatedTriplesForPermutation(permutation); + return LocatedTriplesPerBlockPtr{std::move(snapshot), &locatedTriples}; +} + +// _____________________________________________________________________________ +std::pair +getPermutationAndLocatedTriplesPerBlockForTriple( + Permutation::Enum permutation, const Index& index, + LocatedTriplesSharedState snapshot, const SparqlTripleSimple& triple) { + return {getPermutationForTriple(permutation, index, triple), + getLocatedTriplesPerBlockForTriple(permutation, std::move(snapshot), + triple)}; +} } // namespace qlever diff --git a/src/engine/PermutationSelector.h b/src/engine/PermutationSelector.h index 53389c1519..df353a2034 100644 --- a/src/engine/PermutationSelector.h +++ b/src/engine/PermutationSelector.h @@ -7,22 +7,42 @@ #include +#include "index/DeltaTriples.h" #include "index/Index.h" #include "index/Permutation.h" #include "parser/SparqlTriple.h" namespace qlever { +using LocatedTriplesPerBlockPtr = std::shared_ptr; +using PermutationPtr = std::shared_ptr; + // Return a shared pointer to the correct permutation in `index` based on the -// `permutation` enum and the values in `triple`. In particular return the -// associated internal permutation if the passed `triple` contains an internal -// IRI at any position. If no internal permutation is available for the passed -// `permutation` enum, throw instead.Internal IRIs include language-tagged IRIs. -// like `@en@rdfs:label` for example, or ones that start with +// `permutation` enum and the values in `triple`. See +// `getPermutationAndLocatedTriplesPerBlockForTriple` for details. +PermutationPtr getPermutationForTriple(Permutation::Enum permutation, + const Index& index, + const SparqlTripleSimple& triple); + +// Return a shared pointer to the correct `LocatedTriplesPerBlock` in `snapshot` +// based on the `permutation` enum and the values in `triple`. See +// `getPermutationAndLocatedTriplesPerBlockForTriple` for details. +LocatedTriplesPerBlockPtr getLocatedTriplesPerBlockForTriple( + Permutation::Enum permutation, LocatedTriplesSharedState snapshot, + const SparqlTripleSimple& triple); + +// Return a shared pointer to the correct permutation in `index` and +// `LocatedTriplesPerBlock` in `snapshot ` based on the `permutation` enum and +// the values in `triple`. In particular return the associated internal +// permutation if the passed `triple` contains an internal IRI at any position. +// If no internal permutation is available for the passed `permutation` enum, +// throw instead. Internal IRIs include language-tagged IRIs. like +// `@en@rdfs:label` for example, or ones that start with // ` getPermutationForTriple( +std::pair +getPermutationAndLocatedTriplesPerBlockForTriple( Permutation::Enum permutation, const Index& index, - const SparqlTripleSimple& triple); + LocatedTriplesSharedState snapshot, const SparqlTripleSimple& triple); } // namespace qlever #endif // QLEVER_SRC_ENGINE_PERMUTATIONSELECTOR_H diff --git a/src/engine/QueryPlanner.cpp b/src/engine/QueryPlanner.cpp index 08f79eef16..708f5d01d6 100644 --- a/src/engine/QueryPlanner.cpp +++ b/src/engine/QueryPlanner.cpp @@ -923,11 +923,12 @@ auto QueryPlanner::seedWithScansAndText( std::move(internalVariable)); } - auto actualPermutation = qlever::getPermutationForTriple( - permutation, _qec->getIndex(), triple); - + auto [actualPermutation, locatedTriples] = + qlever::getPermutationAndLocatedTriplesPerBlockForTriple( + permutation, _qec->getIndex(), _qec->locatedTriplesSharedState(), + triple); pushPlan(makeSubtreePlan(_qec, std::move(actualPermutation), - _qec->locatedTriplesSharedState(), + std::move(locatedTriples), std::move(triple), relevantGraphs)); }; seedFromOrdinaryTriple(node, addIndexScan, addFilter); diff --git a/src/index/CompressedRelation.cpp b/src/index/CompressedRelation.cpp index 74e342802e..a8a029df9e 100644 --- a/src/index/CompressedRelation.cpp +++ b/src/index/CompressedRelation.cpp @@ -1827,6 +1827,14 @@ CompressedRelationReader::ScanSpecAndBlocks::ScanSpecAndBlocks( sizeBlockMetadata_ = getNumberOfBlockMetadataValues(blockMetadata_); } +// _____________________________________________________________________________ +CompressedRelationReader::ScanSpecAndBlocks +CompressedRelationReader::ScanSpecAndBlocks::withUpdates( + ScanSpecification scanSpec, const LocatedTriplesPerBlock& locatedTriples) { + BlockMetadataSpan blocks(locatedTriples.getAugmentedMetadata()); + return {std::move(scanSpec), {{blocks.begin(), blocks.end()}}}; +} + // _____________________________________________________________________________ ql::span CompressedRelationReader::ScanSpecAndBlocks::getBlockMetadataSpan() const { diff --git a/src/index/CompressedRelation.h b/src/index/CompressedRelation.h index 3ba72d5181..a7083b6995 100644 --- a/src/index/CompressedRelation.h +++ b/src/index/CompressedRelation.h @@ -609,6 +609,12 @@ class CompressedRelationReader { ScanSpecAndBlocks(ScanSpecification scanSpec, const BlockMetadataRanges& blockMetadataRanges); + // Create a new `ScanSpecAndBlocks` with the current block metadata changed + // through updates. + static ScanSpecAndBlocks withUpdates( + ScanSpecification scanSpec, + const LocatedTriplesPerBlock& locatedTriples); + // Direct view access via `ql::views::join` over all // `CompressedBlockMetadata` values contained in `BlockMetadatatRanges // blockMetadata_`. diff --git a/src/index/Index.cpp b/src/index/Index.cpp index 8a8a03def7..32c19d002f 100644 --- a/src/index/Index.cpp +++ b/src/index/Index.cpp @@ -267,13 +267,6 @@ std::vector Index::getMultiplicities( return pimpl_->getMultiplicities(permutation); } -// ____________________________________________________________________________ -std::vector Index::getMultiplicities( - const TripleComponent& key, const Permutation& p, - const LocatedTriplesState& locatedTriplesState) const { - return pimpl_->getMultiplicities(key, p, locatedTriplesState); -} - // ____________________________________________________________________________ size_t Index::getResultSizeOfScan( const ScanSpecification& scanSpecification, diff --git a/src/index/Index.h b/src/index/Index.h index 985e922e59..de0ad509a3 100644 --- a/src/index/Index.h +++ b/src/index/Index.h @@ -221,11 +221,6 @@ class Index { bool hasAllPermutations() const; - // ___________________________________________________________________________ - std::vector getMultiplicities( - const TripleComponent& key, const Permutation& permutation, - const LocatedTriplesState& locatedTriplesState) const; - // ___________________________________________________________________________ std::vector getMultiplicities(const Permutation& permutation) const; diff --git a/src/index/IndexImpl.cpp b/src/index/IndexImpl.cpp index fad42c5d65..2924c4e505 100644 --- a/src/index/IndexImpl.cpp +++ b/src/index/IndexImpl.cpp @@ -1563,8 +1563,10 @@ Index::NumNormalAndInternal IndexImpl::numDistinctCol0( size_t IndexImpl::getCardinality( Id id, Permutation::Enum permutation, const LocatedTriplesState& locatedTriplesState) const { - if (const auto& meta = - getPermutation(permutation).getMetadata(id, locatedTriplesState); + const auto& perm = getPermutation(permutation); + if (const auto& meta = perm.getMetadata( + id, locatedTriplesState.getLocatedTriplesForPermutation( + permutation)); meta.has_value()) { return meta.value().numRows_; } @@ -1608,23 +1610,10 @@ Index::Vocab::PrefixRanges IndexImpl::prefixRanges( return vocab_.prefixRanges(prefix); } -// _____________________________________________________________________________ -std::vector IndexImpl::getMultiplicities( - const TripleComponent& key, const Permutation& permutation, - const LocatedTriplesState& locatedTriplesState) const { - if (auto keyId = key.toValueId(getVocab(), encodedIriManager())) { - auto meta = permutation.getMetadata(keyId.value(), locatedTriplesState); - if (meta.has_value()) { - return {meta.value().getCol1Multiplicity(), - meta.value().getCol2Multiplicity()}; - } - } - return {1.0f, 1.0f}; -} - // _____________________________________________________________________________ std::vector IndexImpl::getMultiplicities( const Permutation& permutation) const { + // TODO: these numbers are wrong if there are updates auto numTriples = static_cast(this->numTriples().normal); std::array multiplicities{numTriples / numDistinctSubjects().normal, numTriples / numDistinctPredicates().normal, @@ -1639,9 +1628,12 @@ size_t IndexImpl::getResultSizeOfScan( const Permutation::Enum& permutation, const LocatedTriplesState& locatedTriplesState) const { const auto& perm = getPermutation(permutation); + const auto& locatedTriples = + locatedTriplesState.getLocatedTriplesForPermutation(permutation); return perm.getResultSizeOfScan( - perm.getScanSpecAndBlocks(scanSpecification, locatedTriplesState), - locatedTriplesState); + CompressedRelationReader::ScanSpecAndBlocks::withUpdates( + scanSpecification, locatedTriples), + locatedTriples); } // _____________________________________________________________________________ diff --git a/src/index/IndexImpl.h b/src/index/IndexImpl.h index 9723fd0524..c402301098 100644 --- a/src/index/IndexImpl.h +++ b/src/index/IndexImpl.h @@ -480,11 +480,6 @@ class IndexImpl { bool hasAllPermutations() const { return SPO().isLoaded(); } - // ___________________________________________________________________________ - std::vector getMultiplicities( - const TripleComponent& key, const Permutation& permutation, - const LocatedTriplesState& locatedTriplesState) const; - // ___________________________________________________________________________ std::vector getMultiplicities(const Permutation& permutation) const; diff --git a/src/index/Permutation.cpp b/src/index/Permutation.cpp index b5b397ac6b..a95513f3db 100644 --- a/src/index/Permutation.cpp +++ b/src/index/Permutation.cpp @@ -18,14 +18,6 @@ Permutation::Permutation(Enum permutation, Allocator allocator) allocator_{std::move(allocator)}, permutation_{permutation} {} -// _____________________________________________________________________ -CompressedRelationReader::ScanSpecAndBlocks Permutation::getScanSpecAndBlocks( - const ScanSpecification& scanSpec, - const LocatedTriplesState& locatedTriplesState) const { - return {scanSpec, BlockMetadataRanges(getAugmentedMetadataForPermutation( - locatedTriplesState))}; -} - // _____________________________________________________________________ void Permutation::loadFromDisk(const std::string& onDiskBase, bool loadInternalPermutation) { @@ -73,55 +65,52 @@ void Permutation::setOriginalMetadataForDeltaTriples( IdTable Permutation::scan(const ScanSpecAndBlocks& scanSpecAndBlocks, ColumnIndicesRef additionalColumns, const CancellationHandle& cancellationHandle, - const LocatedTriplesState& locatedTriplesState, + const LocatedTriplesPerBlock& locatedTriples, const LimitOffsetClause& limitOffset) const { if (!isLoaded_) { throw std::runtime_error("This query requires the permutation " + readableName_ + ", which was not loaded"); } return reader().scan(scanSpecAndBlocks, additionalColumns, cancellationHandle, - getLocatedTriplesForPermutation(locatedTriplesState), - limitOffset); + locatedTriples, limitOffset); } // _____________________________________________________________________ size_t Permutation::getResultSizeOfScan( const ScanSpecAndBlocks& scanSpecAndBlocks, - const LocatedTriplesState& locatedTriplesState) const { - return reader().getResultSizeOfScan( - scanSpecAndBlocks, getLocatedTriplesForPermutation(locatedTriplesState)); + const LocatedTriplesPerBlock& locatedTriples) const { + return reader().getResultSizeOfScan(scanSpecAndBlocks, locatedTriples); } // _____________________________________________________________________ std::pair Permutation::getSizeEstimateForScan( const ScanSpecAndBlocks& scanSpecAndBlocks, - const LocatedTriplesState& locatedTriplesState) const { - return reader().getSizeEstimateForScan( - scanSpecAndBlocks, getLocatedTriplesForPermutation(locatedTriplesState)); + const LocatedTriplesPerBlock& locatedTriples) const { + return reader().getSizeEstimateForScan(scanSpecAndBlocks, locatedTriples); } // ____________________________________________________________________________ IdTable Permutation::getDistinctCol1IdsAndCounts( Id col0Id, const CancellationHandle& cancellationHandle, - const LocatedTriplesState& locatedTriplesState, + const LocatedTriplesPerBlock& locatedTriples, const LimitOffsetClause& limitOffset) const { return reader().getDistinctCol1IdsAndCounts( - getScanSpecAndBlocks( + CompressedRelationReader::ScanSpecAndBlocks::withUpdates( ScanSpecification{col0Id, std::nullopt, std::nullopt}, - locatedTriplesState), - cancellationHandle, getLocatedTriplesForPermutation(locatedTriplesState), - limitOffset); + locatedTriples), + cancellationHandle, locatedTriples, limitOffset); } // ____________________________________________________________________________ IdTable Permutation::getDistinctCol0IdsAndCounts( const CancellationHandle& cancellationHandle, - const LocatedTriplesState& locatedTriplesState, + const LocatedTriplesPerBlock& locatedTriples, const LimitOffsetClause& limitOffset) const { ScanSpecification scanSpec{std::nullopt, std::nullopt, std::nullopt}; return reader().getDistinctCol0IdsAndCounts( - getScanSpecAndBlocks(scanSpec, locatedTriplesState), cancellationHandle, - getLocatedTriplesForPermutation(locatedTriplesState), limitOffset); + CompressedRelationReader::ScanSpecAndBlocks::withUpdates(scanSpec, + locatedTriples), + cancellationHandle, locatedTriples, limitOffset); } // _____________________________________________________________________ @@ -166,23 +155,23 @@ std::string_view Permutation::toString(Permutation::Enum permutation) { // _____________________________________________________________________ std::optional Permutation::getMetadata( - Id col0Id, const LocatedTriplesState& locatedTriplesState) const { + Id col0Id, const LocatedTriplesPerBlock& locatedTriples) const { if (meta_.col0IdExists(col0Id)) { return meta_.getMetaData(col0Id); } return reader().getMetadataForSmallRelation( - getScanSpecAndBlocks( + CompressedRelationReader::ScanSpecAndBlocks::withUpdates( ScanSpecification{col0Id, std::nullopt, std::nullopt}, - locatedTriplesState), - col0Id, getLocatedTriplesForPermutation(locatedTriplesState)); + locatedTriples), + col0Id, locatedTriples); } // _____________________________________________________________________ std::optional Permutation::getMetadataAndBlocks( const ScanSpecAndBlocks& scanSpecAndBlocks, - const LocatedTriplesState& locatedTriplesState) const { + const LocatedTriplesPerBlock& locatedTriples) const { auto firstAndLastTriple = reader().getFirstAndLastTripleIgnoringGraph( - scanSpecAndBlocks, getLocatedTriplesForPermutation(locatedTriplesState)); + scanSpecAndBlocks, locatedTriples); if (!firstAndLastTriple.has_value()) { return std::nullopt; } @@ -196,35 +185,16 @@ CompressedRelationReader::IdTableGeneratorInputRange Permutation::lazyScan( std::optional> optBlocks, ColumnIndicesRef additionalColumns, const CancellationHandle& cancellationHandle, - const LocatedTriplesState& locatedTriplesState, + const LocatedTriplesPerBlock& locatedTriples, const LimitOffsetClause& limitOffset) const { ColumnIndices columns{additionalColumns.begin(), additionalColumns.end()}; if (!optBlocks.has_value()) { optBlocks = CompressedRelationReader::convertBlockMetadataRangesToVector( scanSpecAndBlocks.blockMetadata_); } - return reader().lazyScan( - scanSpecAndBlocks.scanSpec_, std::move(optBlocks.value()), - std::move(columns), cancellationHandle, - getLocatedTriplesForPermutation(locatedTriplesState), limitOffset); -} - -// ______________________________________________________________________ -const LocatedTriplesPerBlock& Permutation::getLocatedTriplesForPermutation( - const LocatedTriplesState& locatedTriplesState) const { - return isInternalPermutation_ - ? locatedTriplesState.getLocatedTriplesForPermutation( - permutation_) - : locatedTriplesState.getLocatedTriplesForPermutation( - permutation_); -} - -// ______________________________________________________________________ -BlockMetadataRanges Permutation::getAugmentedMetadataForPermutation( - const LocatedTriplesState& locatedTriplesState) const { - BlockMetadataSpan blocks(getLocatedTriplesForPermutation(locatedTriplesState) - .getAugmentedMetadata()); - return {{blocks.begin(), blocks.end()}}; + return reader().lazyScan(scanSpecAndBlocks.scanSpec_, + std::move(optBlocks.value()), std::move(columns), + cancellationHandle, locatedTriples, limitOffset); } // ______________________________________________________________________ diff --git a/src/index/Permutation.h b/src/index/Permutation.h index 13804c6244..92d489c47d 100644 --- a/src/index/Permutation.h +++ b/src/index/Permutation.h @@ -19,7 +19,7 @@ // Forward declaration of `IdTable` class IdTable; -// Forward declaration of `LocatedTriplesPerBlock` +// Forward declaration of `LocatedTriplesPerBlock` and `LocatedTriplesSnapshot` class LocatedTriplesPerBlock; struct LocatedTriplesState; class DeltaTriples; @@ -83,7 +83,7 @@ class Permutation { IdTable scan(const ScanSpecAndBlocks& scanSpecAndBlocks, ColumnIndicesRef additionalColumns, const CancellationHandle& cancellationHandle, - const LocatedTriplesState& locatedTriplesState, + const LocatedTriplesPerBlock& locatedTriples, const LimitOffsetClause& limitOffset = {}) const; // For a given relation, determine the `col1Id`s and their counts. This is @@ -91,12 +91,12 @@ class Permutation { // in `meta_`. IdTable getDistinctCol1IdsAndCounts( Id col0Id, const CancellationHandle& cancellationHandle, - const LocatedTriplesState& locatedTriplesState, + const LocatedTriplesPerBlock& locatedTriples, const LimitOffsetClause& limitOffset) const; IdTable getDistinctCol0IdsAndCounts( const CancellationHandle& cancellationHandle, - const LocatedTriplesState& locatedTriplesState, + const LocatedTriplesPerBlock& locatedTriples, const LimitOffsetClause& limitOffset) const; // Typedef to propagate the `MetadataAndblocks` and `IdTableGenerator` type. @@ -125,17 +125,11 @@ class Permutation { std::optional> optBlocks, ColumnIndicesRef additionalColumns, const CancellationHandle& cancellationHandle, - const LocatedTriplesState& locatedTriplesState, + const LocatedTriplesPerBlock& locatedTriples, const LimitOffsetClause& limitOffset = {}) const; - // Returns the corresponding `CompressedRelationReader::ScanSpecAndBlocks` - // with relevant `BlockMetadataRanges`. - ScanSpecAndBlocks getScanSpecAndBlocks( - const ScanSpecification& scanSpec, - const LocatedTriplesState& locatedTriplesState) const; - std::optional getMetadata( - Id col0Id, const LocatedTriplesState& locatedTriplesState) const; + Id col0Id, const LocatedTriplesPerBlock& locatedTriples) const; // Return the metadata for the scan specified by the `scanSpecification` // along with the metadata for all the blocks that are relevant for this @@ -143,21 +137,21 @@ class Permutation { // be empty) return `nullopt`. std::optional getMetadataAndBlocks( const ScanSpecAndBlocks& scanSpecAndBlocks, - const LocatedTriplesState& locatedTriplesState) const; + const LocatedTriplesPerBlock& locatedTriples) const; // Get the exact size of the result of a scan, taking into account the // given located triples. This requires an exact location of the delta // triples within the respective blocks. size_t getResultSizeOfScan( const ScanSpecAndBlocks& scanSpecAndBlocks, - const LocatedTriplesState& locatedTriplesState) const; + const LocatedTriplesPerBlock& locatedTriples) const; // Get a lower and upper bound for the size of the result of a scan, taking // into account the given `deltaTriples`. For this call, it is enough that // each delta triple know to which block it belongs. std::pair getSizeEstimateForScan( const ScanSpecAndBlocks& scanSpecAndBlocks, - const LocatedTriplesState& locatedTriplesState) const; + const LocatedTriplesPerBlock& locatedTriples) const; // _______________________________________________________ void setKbName(const std::string& name) { meta_.setName(name); } @@ -183,15 +177,6 @@ class Permutation { // _______________________________________________________ const MetaData& metaData() const { return meta_; } - // From the given snapshot, get the located triples for this permutation. - const LocatedTriplesPerBlock& getLocatedTriplesForPermutation( - const LocatedTriplesState& locatedTriplesState) const; - - // From the given snapshot, get the augmented block metadata for this - // permutation. - BlockMetadataRanges getAugmentedMetadataForPermutation( - const LocatedTriplesState& locatedTriplesState) const; - const CompressedRelationReader& reader() const { return reader_.value(); } Enum permutation() const { return permutation_; } @@ -226,4 +211,8 @@ class Permutation { bool isInternalPermutation_ = false; }; +namespace qlever { +using PermutationPtr = std::shared_ptr; +} + #endif // QLEVER_SRC_INDEX_PERMUTATION_H diff --git a/test/CompressedRelationsTest.cpp b/test/CompressedRelationsTest.cpp index bbd4998ae8..782d9f0016 100644 --- a/test/CompressedRelationsTest.cpp +++ b/test/CompressedRelationsTest.cpp @@ -971,11 +971,11 @@ TEST(CompressedRelationReader, getResultSizeImpl) { dt.insertTriples(handle, {IdTriple{{I(0), I(1), I(2), I(3)}}, IdTriple{{I(0), I(4), I(5), I(3)}}}); }); - auto sharedLocatedTriplesSnapshot = + auto locatedTriplesSharedState = deltaTriplesManager.getCurrentLocatedTriplesSharedState(); - const auto& locatedTriplesSnapshot = *sharedLocatedTriplesSnapshot; + const auto& locatedTriplesState = *locatedTriplesSharedState; auto& impl = index.getImpl(); - auto expectResultSizes = [&impl, &locatedTriplesSnapshot]( + auto expectResultSizes = [&impl, &locatedTriplesState]( Permutation::Enum p, const ScanSpecification& scanSpec, size_t lower, size_t upper, size_t exact, @@ -984,16 +984,16 @@ TEST(CompressedRelationReader, getResultSizeImpl) { auto loc = generateLocationTrace(sourceLocation); auto& perm = impl.getPermutation(p); auto& reader = perm.reader(); - auto augmentedBlocks = - perm.getAugmentedMetadataForPermutation(locatedTriplesSnapshot); - auto& ltpb = locatedTriplesSnapshot.getLocatedTriplesForPermutation( + auto scanSpecAndBlocks = ScanSpecAndBlocks::withUpdates( + scanSpec, + locatedTriplesState.getLocatedTriplesForPermutation(p)); + auto& ltpb = locatedTriplesState.getLocatedTriplesForPermutation( perm.permutation()); - auto [actual_lower, actual_upper] = reader.getSizeEstimateForScan( - ScanSpecAndBlocks{scanSpec, augmentedBlocks}, ltpb); + auto [actual_lower, actual_upper] = + reader.getSizeEstimateForScan(scanSpecAndBlocks, ltpb); EXPECT_THAT(actual_lower, testing::Eq(lower)); EXPECT_THAT(actual_upper, testing::Eq(upper)); - auto actual_exact = reader.getResultSizeOfScan( - ScanSpecAndBlocks{scanSpec, augmentedBlocks}, ltpb); + auto actual_exact = reader.getResultSizeOfScan(scanSpecAndBlocks, ltpb); EXPECT_THAT(actual_exact, testing::Eq(exact)); }; // The Scans request all triples of the one and only block. @@ -1035,12 +1035,12 @@ TEST(CompressedRelationReader, getFirstAndLastTripleIgnoringGraph) { " . . ."; auto index = ad_utility::testing::makeTestIndex( "getFirstAndLastTripleIgnoringGraph", std::move(testIndexConfig)); - auto currentSnapshot = + auto currentLocatedTriplesSharedState = index.deltaTriplesManager().getCurrentLocatedTriplesSharedState(); auto permutationEnum = Permutation::Enum::SPO; - const auto& permutation = index.getImpl().getPermutation(permutationEnum); const auto& locatedTriplesPerBlock = - currentSnapshot->getLocatedTriplesForPermutation(permutationEnum); + currentLocatedTriplesSharedState->getLocatedTriplesForPermutation( + permutationEnum); auto getId = [&index](std::string_view iri) { return TripleComponent{ad_utility::triple_component::Iri::fromIriref(iri)} @@ -1056,7 +1056,7 @@ TEST(CompressedRelationReader, getFirstAndLastTripleIgnoringGraph) { auto g2 = getId(""); auto g3 = getId(""); - auto getTriples = [&permutation, ¤tSnapshot, permutationEnum, &index, + auto getTriples = [¤tLocatedTriplesSharedState, permutationEnum, &index, &locatedTriplesPerBlock](std::optional col0, std::optional col1, std::optional graphId) { @@ -1068,9 +1068,11 @@ TEST(CompressedRelationReader, getFirstAndLastTripleIgnoringGraph) { graphId.has_value() ? ScanSpecification::GraphFilter::Whitelist( {std::move(graphId).value()}) : ScanSpecification::GraphFilter::All()}; - CompressedRelationReader::ScanSpecAndBlocks metadataAndBlocks{ - std::move(scanSpecification), - permutation.getAugmentedMetadataForPermutation(*currentSnapshot)}; + CompressedRelationReader::ScanSpecAndBlocks metadataAndBlocks = + CompressedRelationReader::ScanSpecAndBlocks::withUpdates( + std::move(scanSpecification), + currentLocatedTriplesSharedState + ->getLocatedTriplesForPermutation(permutationEnum)); const auto& reader = index.getImpl().getPermutation(permutationEnum).reader(); return reader.getFirstAndLastTripleIgnoringGraph(metadataAndBlocks, @@ -1122,19 +1124,20 @@ TEST(CompressedRelationReader, ensureDummyBlockWith6ColumnsDoesntCauseIssues) { deltaTriples.insertTriples(cancellationHandle, {IdTriple{{id, id, id, id}}}); }); - auto sharedLocatedTriplesSnapshot = + auto locatedTriplesSharedState = index.deltaTriplesManager().getCurrentLocatedTriplesSharedState(); for (bool usePatternPermutation : {false, true}) { auto permutationEnum = usePatternPermutation ? Permutation::Enum::PSO : Permutation::Enum::SPO; - const auto& permutation = index.getImpl().getPermutation(permutationEnum); + const auto& locatedTriples = + locatedTriplesSharedState->getLocatedTriplesForPermutation( + permutationEnum); ScanSpecification scanSpecification{std::nullopt, std::nullopt, std::nullopt}; - CompressedRelationReader::ScanSpecAndBlocks metadataAndBlocks{ - std::move(scanSpecification), - permutation.getAugmentedMetadataForPermutation( - *sharedLocatedTriplesSnapshot)}; + auto metadataAndBlocks = + CompressedRelationReader::ScanSpecAndBlocks::withUpdates( + std::move(scanSpecification), locatedTriples); std::vector additionalColumns{ADDITIONAL_COLUMN_GRAPH_ID}; if (usePatternPermutation) { @@ -1147,7 +1150,7 @@ TEST(CompressedRelationReader, ensureDummyBlockWith6ColumnsDoesntCauseIssues) { index.getImpl() .getPermutation(permutationEnum) .lazyScan(metadataAndBlocks, std::nullopt, additionalColumns, - cancellationHandle, *sharedLocatedTriplesSnapshot); + cancellationHandle, locatedTriples); for (const IdTable& block : blocks) { EXPECT_EQ(block.numColumns(), 3 + additionalColumns.size()); } diff --git a/test/IndexTest.cpp b/test/IndexTest.cpp index 3e37319c53..7a81bb0788 100644 --- a/test/IndexTest.cpp +++ b/test/IndexTest.cpp @@ -57,14 +57,16 @@ auto makeTestScanWidthOne = [](const IndexImpl& index, ad_utility::source_location l = AD_CURRENT_SOURCE_LOC()) { auto t = generateLocationTrace(l); const auto& actualPermutation = index.getPermutation(permutation); - auto locatedTriplesSnapshot = qec.locatedTriplesState(); + const auto& locatedTriples = + qec.locatedTriplesState().getLocatedTriplesForPermutation( + permutation); IdTable result = actualPermutation.scan( - actualPermutation.getScanSpecAndBlocks( + CompressedRelationReader::ScanSpecAndBlocks::withUpdates( ScanSpecificationAsTripleComponent{c0, c1, std::nullopt} .toScanSpecification(index), - locatedTriplesSnapshot), + locatedTriples), additionalColumns, std::make_shared>(), - locatedTriplesSnapshot); + locatedTriples); ASSERT_EQ(result.numColumns(), 1 + additionalColumns.size()); ASSERT_EQ(result, makeIdTableFromVector(expected)); }; @@ -81,15 +83,16 @@ auto makeTestScanWidthTwo = [](const IndexImpl& index, ad_utility::source_location l = AD_CURRENT_SOURCE_LOC()) { auto t = generateLocationTrace(l); const auto& actualPermutation = index.getPermutation(permutation); - auto locatedTriplesSnapshot = qec.locatedTriplesState(); + const auto& locatedTriples = + qec.locatedTriplesState().getLocatedTriplesForPermutation( + permutation); IdTable wol = actualPermutation.scan( - actualPermutation.getScanSpecAndBlocks( + CompressedRelationReader::ScanSpecAndBlocks::withUpdates( ScanSpecificationAsTripleComponent{c0, std::nullopt, std::nullopt} .toScanSpecification(index), - locatedTriplesSnapshot), + locatedTriples), Permutation::ColumnIndicesRef{}, - std::make_shared>(), - locatedTriplesSnapshot); + std::make_shared>(), locatedTriples); ASSERT_EQ(wol, makeIdTableFromVector(expected)); }; }; @@ -118,7 +121,7 @@ TEST(IndexTest, createFromTurtleTest) { return; } const auto& [index, qec] = getIndex(); - const auto& locatedTriplesSnapshot = qec.locatedTriplesState(); + const auto& locatedTriplesState = qec.locatedTriplesState(); auto getId = makeGetId(getQec(kb)->getIndex()); Id a = getId(""); @@ -129,49 +132,35 @@ TEST(IndexTest, createFromTurtleTest) { Id c2 = getId(""); // TODO We could also test the multiplicities here. - ASSERT_TRUE( - index.PSO().getMetadata(b, locatedTriplesSnapshot).has_value()); - ASSERT_TRUE( - index.PSO().getMetadata(b2, locatedTriplesSnapshot).has_value()); + const auto& pso = index.PSO(); + const auto& psoLTPB = + qec.locatedTriplesState().getLocatedTriplesForPermutation( + Permutation::PSO); + ASSERT_TRUE(pso.getMetadata(b, psoLTPB).has_value()); + ASSERT_TRUE(pso.getMetadata(b2, psoLTPB).has_value()); + ASSERT_FALSE(pso.getMetadata(a2, psoLTPB).has_value()); + ASSERT_FALSE(pso.getMetadata(c, psoLTPB).has_value()); ASSERT_FALSE( - index.PSO().getMetadata(a2, locatedTriplesSnapshot).has_value()); - ASSERT_FALSE( - index.PSO().getMetadata(c, locatedTriplesSnapshot).has_value()); - ASSERT_FALSE( - index.PSO() - .getMetadata(Id::makeFromVocabIndex(VocabIndex::make(735)), - locatedTriplesSnapshot) + pso.getMetadata(Id::makeFromVocabIndex(VocabIndex::make(735)), + psoLTPB) .has_value()); - ASSERT_FALSE(index.PSO() - .getMetadata(b, locatedTriplesSnapshot) - .value() - .isFunctional()); - ASSERT_TRUE(index.PSO() - .getMetadata(b2, locatedTriplesSnapshot) - .value() - .isFunctional()); - - ASSERT_TRUE( - index.POS().getMetadata(b, locatedTriplesSnapshot).has_value()); - ASSERT_TRUE( - index.POS().getMetadata(b2, locatedTriplesSnapshot).has_value()); - ASSERT_FALSE( - index.POS().getMetadata(a2, locatedTriplesSnapshot).has_value()); - ASSERT_FALSE( - index.POS().getMetadata(c, locatedTriplesSnapshot).has_value()); + ASSERT_FALSE(pso.getMetadata(b, psoLTPB).value().isFunctional()); + ASSERT_TRUE(pso.getMetadata(b2, psoLTPB).value().isFunctional()); + + const auto& pos = index.POS(); + const auto& posLTPB = + locatedTriplesState.getLocatedTriplesForPermutation( + Permutation::POS); + ASSERT_TRUE(pos.getMetadata(b, posLTPB).has_value()); + ASSERT_TRUE(pos.getMetadata(b2, posLTPB).has_value()); + ASSERT_FALSE(pos.getMetadata(a2, posLTPB).has_value()); + ASSERT_FALSE(pos.getMetadata(c, posLTPB).has_value()); ASSERT_FALSE( - index.POS() - .getMetadata(Id::makeFromVocabIndex(VocabIndex::make(735)), - locatedTriplesSnapshot) + pos.getMetadata(Id::makeFromVocabIndex(VocabIndex::make(735)), + posLTPB) .has_value()); - ASSERT_TRUE(index.POS() - .getMetadata(b, locatedTriplesSnapshot) - .value() - .isFunctional()); - ASSERT_TRUE(index.POS() - .getMetadata(b2, locatedTriplesSnapshot) - .value() - .isFunctional()); + ASSERT_TRUE(pos.getMetadata(b, posLTPB).value().isFunctional()); + ASSERT_TRUE(pos.getMetadata(b2, posLTPB).value().isFunctional()); // Relation b // Pair index @@ -221,15 +210,19 @@ TEST(IndexTest, createFromTurtleTest) { Id c = getId(""); Id isA = getId(""); - ASSERT_TRUE(index.PSO().getMetadata(isA, deltaTriples).has_value()); - ASSERT_FALSE(index.PSO().getMetadata(a, deltaTriples).has_value()); + const auto& pso = index.PSO(); + const auto& psoLTPB = deltaTriples.getLocatedTriplesForPermutation( + Permutation::Enum::PSO); + ASSERT_TRUE(pso.getMetadata(isA, psoLTPB).has_value()); + ASSERT_FALSE(pso.getMetadata(a, psoLTPB).has_value()); - ASSERT_FALSE( - index.PSO().getMetadata(isA, deltaTriples).value().isFunctional()); + ASSERT_FALSE(pso.getMetadata(isA, psoLTPB).value().isFunctional()); - ASSERT_TRUE(index.POS().getMetadata(isA, deltaTriples).has_value()); - ASSERT_FALSE( - index.POS().getMetadata(isA, deltaTriples).value().isFunctional()); + const auto& pos = index.POS(); + const auto& posLTPB = deltaTriples.getLocatedTriplesForPermutation( + Permutation::Enum::POS); + ASSERT_TRUE(pos.getMetadata(isA, posLTPB).has_value()); + ASSERT_FALSE(pos.getMetadata(isA, posLTPB).value().isFunctional()); auto testTwo = makeTestScanWidthTwo(index, qec); testTwo(iri(""), Permutation::PSO, @@ -274,19 +267,25 @@ TEST(IndexTest, createFromOnDiskIndexTest) { Id a = getId(""); Id c = getId(""); - ASSERT_TRUE(index.PSO().getMetadata(b, deltaTriples).has_value()); - ASSERT_TRUE(index.PSO().getMetadata(b2, deltaTriples).has_value()); - ASSERT_FALSE(index.PSO().getMetadata(a, deltaTriples).has_value()); - ASSERT_FALSE(index.PSO().getMetadata(c, deltaTriples).has_value()); - ASSERT_FALSE(index.PSO().getMetadata(b, deltaTriples).value().isFunctional()); - ASSERT_TRUE(index.PSO().getMetadata(b2, deltaTriples).value().isFunctional()); - - ASSERT_TRUE(index.POS().getMetadata(b, deltaTriples).has_value()); - ASSERT_TRUE(index.POS().getMetadata(b2, deltaTriples).has_value()); - ASSERT_FALSE(index.POS().getMetadata(a, deltaTriples).has_value()); - ASSERT_FALSE(index.POS().getMetadata(c, deltaTriples).has_value()); - ASSERT_TRUE(index.POS().getMetadata(b, deltaTriples).value().isFunctional()); - ASSERT_TRUE(index.POS().getMetadata(b2, deltaTriples).value().isFunctional()); + const auto& pso = index.PSO(); + const auto& psoLTPB = deltaTriples.getLocatedTriplesForPermutation( + Permutation::Enum::PSO); + ASSERT_TRUE(pso.getMetadata(b, psoLTPB).has_value()); + ASSERT_TRUE(pso.getMetadata(b2, psoLTPB).has_value()); + ASSERT_FALSE(pso.getMetadata(a, psoLTPB).has_value()); + ASSERT_FALSE(pso.getMetadata(c, psoLTPB).has_value()); + ASSERT_FALSE(pso.getMetadata(b, psoLTPB).value().isFunctional()); + ASSERT_TRUE(pso.getMetadata(b2, psoLTPB).value().isFunctional()); + + const auto& pos = index.POS(); + const auto& posLTPB = deltaTriples.getLocatedTriplesForPermutation( + Permutation::Enum::POS); + ASSERT_TRUE(pos.getMetadata(b, posLTPB).has_value()); + ASSERT_TRUE(pos.getMetadata(b2, posLTPB).has_value()); + ASSERT_FALSE(pos.getMetadata(a, posLTPB).has_value()); + ASSERT_FALSE(pos.getMetadata(c, posLTPB).has_value()); + ASSERT_TRUE(pos.getMetadata(b, posLTPB).value().isFunctional()); + ASSERT_TRUE(pos.getMetadata(b2, posLTPB).value().isFunctional()); }; TEST(IndexTest, indexIdAndGitHash) { @@ -512,12 +511,6 @@ TEST(IndexTest, NumDistinctEntities) { EXPECT_FLOAT_EQ(multiplicities[0], 7.0 / 3.0); EXPECT_FLOAT_EQ(multiplicities[1], 7.0 / 2.0); EXPECT_FLOAT_EQ(multiplicities[2], 7.0 / 7.0); - - multiplicities = index.getMultiplicities( - iri(""), index.getPermutation(Permutation::SPO), - qec.locatedTriplesState()); - EXPECT_FLOAT_EQ(multiplicities[0], 2.5); - EXPECT_FLOAT_EQ(multiplicities[1], 1); } TEST(IndexTest, NumDistinctEntitiesCornerCases) { diff --git a/test/MaterializedViewsTest.cpp b/test/MaterializedViewsTest.cpp index 30f20c421e..e47001eba2 100644 --- a/test/MaterializedViewsTest.cpp +++ b/test/MaterializedViewsTest.cpp @@ -375,7 +375,7 @@ TEST_F(MaterializedViewsTest, ManualConfigurations) { ASSERT_TRUE(view != nullptr); EXPECT_EQ(view->name(), "testView1"); EXPECT_EQ(view->permutation()->permutation(), Permutation::Enum::SPO); - EXPECT_NE(view->locatedTriplesState(), nullptr); + EXPECT_NE(view->locatedTriplesPerBlock(), nullptr); MaterializedViewsManager managerNoBaseName; AD_EXPECT_THROW_WITH_MESSAGE( diff --git a/test/engine/IndexScanTest.cpp b/test/engine/IndexScanTest.cpp index 4a92121d9a..5c41ec3355 100644 --- a/test/engine/IndexScanTest.cpp +++ b/test/engine/IndexScanTest.cpp @@ -1823,3 +1823,13 @@ TEST(IndexScanTest, StripColumnsWithPrefiltering) { << varsToKeep.size() << " variables"; } } + +TEST(IndexScanTest, getMultiplicities) { + auto* qec = getQec(); + auto s = Tc::Iri::fromIriref(""); + IndexScan scan{qec, Permutation::SPO, + SparqlTripleSimple{s, Var{"?y"}, Var{"?z"}}}; + auto multiplicities = scan.getMultiplicities(s); + EXPECT_FLOAT_EQ(multiplicities[0], 2.5); + EXPECT_FLOAT_EQ(multiplicities[1], 1); +} diff --git a/test/engine/PermutationSelectorTest.cpp b/test/engine/PermutationSelectorTest.cpp index 80fae6c2dd..1e3c077bba 100644 --- a/test/engine/PermutationSelectorTest.cpp +++ b/test/engine/PermutationSelectorTest.cpp @@ -4,14 +4,17 @@ #include +#include "../util/GTestHelpers.h" #include "../util/IndexTestHelpers.h" #include "engine/PermutationSelector.h" #include "index/IndexImpl.h" -// _____________________________________________________________________________ -TEST(PermutationSelectorTest, internalPrefixIsCorrectlyChosen) { - auto* qec = ad_utility::testing::getQec(); - const auto& index = qec->getIndex(); +auto testPermutationSelection = [](auto retrievalFunction, auto getExpected, + auto getExpectedInternal, + ad_utility::source_location sourceLocation = + AD_CURRENT_SOURCE_LOC()) { + auto l = generateLocationTrace(sourceLocation); + TripleComponent internalIri{ad_utility::triple_component::Iri::fromIriref( makeQleverInternalIri("something"))}; TripleComponent languageTaggedIri{ @@ -21,20 +24,17 @@ TEST(PermutationSelectorTest, internalPrefixIsCorrectlyChosen) { TripleComponent regularLiteral{1}; for (auto permutation : Permutation::ALL) { - const auto* permutationPtr = &index.getImpl().getPermutation(permutation); + const auto* expectedPtr = getExpected(permutation); for (const auto& triple : {SparqlTripleSimple{regularIri, regularIri, regularIri}, SparqlTripleSimple{regularLiteral, regularLiteral, regularLiteral}, SparqlTripleSimple{regularLiteral, regularIri, regularLiteral}}) { - EXPECT_EQ( - qlever::getPermutationForTriple(permutation, index, triple).get(), - permutationPtr); + EXPECT_EQ(retrievalFunction(permutation, triple).get(), expectedPtr); } } for (auto permutation : Permutation::INTERNAL) { - const auto* permutationPtr = - &index.getImpl().getPermutation(permutation).internalPermutation(); + const auto* expectedPtr = getExpectedInternal(permutation); for (const auto& triple : {SparqlTripleSimple{internalIri, regularIri, regularIri}, SparqlTripleSimple{regularIri, internalIri, regularIri}, @@ -42,19 +42,51 @@ TEST(PermutationSelectorTest, internalPrefixIsCorrectlyChosen) { SparqlTripleSimple{languageTaggedIri, regularIri, regularIri}, SparqlTripleSimple{regularIri, languageTaggedIri, regularIri}, SparqlTripleSimple{regularIri, regularIri, languageTaggedIri}}) { - EXPECT_EQ( - qlever::getPermutationForTriple(permutation, index, triple).get(), - permutationPtr); + EXPECT_EQ(retrievalFunction(permutation, triple).get(), expectedPtr); } } using enum Permutation::Enum; // Unsupported configurations. for (auto permutation : {OPS, OSP, SOP, SPO}) { - EXPECT_THROW(qlever::getPermutationForTriple( - permutation, index, - SparqlTripleSimple{languageTaggedIri, internalIri, - languageTaggedIri}), - ad_utility::Exception); + EXPECT_THROW( + retrievalFunction(permutation, + SparqlTripleSimple{languageTaggedIri, internalIri, + languageTaggedIri}), + ad_utility::Exception); } +}; + +// _____________________________________________________________________________ +TEST(PermutationSelectorTest, internalPrefixIsCorrectlyChosen) { + auto* qec = ad_utility::testing::getQec(); + const auto& index = qec->getIndex(); + + testPermutationSelection( + [&index](auto p, const auto& triple) { + return qlever::getPermutationForTriple(p, index, triple); + }, + + [&index](const auto& p) { return &index.getImpl().getPermutation(p); }, + [&index](const auto& p) { + return &index.getImpl().getPermutation(p).internalPermutation(); + }); +} + +// _____________________________________________________________________________ +TEST(PermutationSelectorTest, getLocatedTriplesPerBlockForTriple) { + const auto* qec = ad_utility::testing::getQec(); + const auto& locatedTriples = qec->locatedTriplesSharedState(); + + testPermutationSelection( + [&locatedTriples](auto p, const auto& triple) { + return qlever::getLocatedTriplesPerBlockForTriple(p, locatedTriples, + triple); + }, + [&locatedTriples](const auto& p) { + return &locatedTriples->getLocatedTriplesForPermutation(p); + }, + [&locatedTriples](const auto& p) { + return &locatedTriples->getLocatedTriplesForPermutation(p); + }); } diff --git a/test/util/IndexTestHelpers.cpp b/test/util/IndexTestHelpers.cpp index 061fdb1753..d27f17e044 100644 --- a/test/util/IndexTestHelpers.cpp +++ b/test/util/IndexTestHelpers.cpp @@ -82,15 +82,18 @@ void checkConsistencyBetweenPatternPredicateAndAdditionalColumn( auto checkSingleElement = [&cancellationDummy, &iriOfHasPattern, &locatedTriplesSnapshot, &indexImpl](size_t patternIdx, Id id) { - const auto& permutation = - indexImpl.getPermutation(Permutation::Enum::PSO).internalPermutation(); - auto scanResultHasPattern = - permutation.scan(permutation.getScanSpecAndBlocks( - ScanSpecificationAsTripleComponent{ - iriOfHasPattern, id, std::nullopt} - .toScanSpecification(indexImpl), - locatedTriplesSnapshot), - {}, cancellationDummy, locatedTriplesSnapshot); + const auto perm = Permutation::Enum::PSO; + const auto& actualPermutation = + indexImpl.getPermutation(perm).internalPermutation(); + const auto& locatedTriples = + locatedTriplesSnapshot.getLocatedTriplesForPermutation(perm); + auto scanResultHasPattern = actualPermutation.scan( + CompressedRelationReader::ScanSpecAndBlocks::withUpdates( + ScanSpecificationAsTripleComponent{iriOfHasPattern, id, + std::nullopt} + .toScanSpecification(indexImpl), + locatedTriples), + {}, cancellationDummy, locatedTriples); // Each ID has at most one pattern, it can have none if it doesn't // appear as a subject in the knowledge graph. AD_CORRECTNESS_CHECK(scanResultHasPattern.numRows() <= 1); @@ -103,16 +106,19 @@ void checkConsistencyBetweenPatternPredicateAndAdditionalColumn( } }; - auto checkConsistencyForCol0IdAndPermutation = + auto checkConsistencyForCol0IdAndExternalPermutation = [&](Id col0Id, const Permutation& permutation, size_t subjectColIdx, size_t objectColIdx) { + const auto& locatedTriples = + locatedTriplesSnapshot.getLocatedTriplesForPermutation( + permutation.permutation()); auto scanResult = permutation.scan( - permutation.getScanSpecAndBlocks( + CompressedRelationReader::ScanSpecAndBlocks::withUpdates( ScanSpecification{col0Id, std::nullopt, std::nullopt}, - locatedTriplesSnapshot), + locatedTriples), std::array{ColumnIndex{ADDITIONAL_COLUMN_INDEX_SUBJECT_PATTERN}, ColumnIndex{ADDITIONAL_COLUMN_INDEX_OBJECT_PATTERN}}, - cancellationDummy, locatedTriplesSnapshot); + cancellationDummy, locatedTriples); ASSERT_EQ(scanResult.numColumns(), 4u); for (const auto& row : scanResult) { auto patternIdx = row[2].getInt(); @@ -126,26 +132,33 @@ void checkConsistencyBetweenPatternPredicateAndAdditionalColumn( auto checkConsistencyForPredicate = [&](Id predicateId) { using enum Permutation::Enum; - checkConsistencyForCol0IdAndPermutation( + checkConsistencyForCol0IdAndExternalPermutation( predicateId, indexImpl.getPermutation(PSO), 0, 1); - checkConsistencyForCol0IdAndPermutation( + checkConsistencyForCol0IdAndExternalPermutation( predicateId, indexImpl.getPermutation(POS), 1, 0); }; auto checkConsistencyForObject = [&](Id objectId) { using enum Permutation::Enum; - checkConsistencyForCol0IdAndPermutation( + checkConsistencyForCol0IdAndExternalPermutation( objectId, indexImpl.getPermutation(OPS), 1, col0IdTag); - checkConsistencyForCol0IdAndPermutation( + checkConsistencyForCol0IdAndExternalPermutation( objectId, indexImpl.getPermutation(OSP), 0, col0IdTag); }; - - auto predicates = index.getImpl().PSO().getDistinctCol0IdsAndCounts( - cancellationDummy, locatedTriplesSnapshot, {}); + const auto& pso = index.getImpl().PSO(); + auto predicates = pso.getDistinctCol0IdsAndCounts( + cancellationDummy, + locatedTriplesSnapshot.getLocatedTriplesForPermutation( + Permutation::Enum::PSO), + {}); for (const auto& predicate : predicates.getColumn(0)) { checkConsistencyForPredicate(predicate); } - auto objects = index.getImpl().OSP().getDistinctCol0IdsAndCounts( - cancellationDummy, locatedTriplesSnapshot, {}); + const auto& osp = index.getImpl().OSP(); + auto objects = osp.getDistinctCol0IdsAndCounts( + cancellationDummy, + locatedTriplesSnapshot.getLocatedTriplesForPermutation( + Permutation::Enum::OSP), + {}); for (const auto& object : objects.getColumn(0)) { checkConsistencyForObject(object); }