-
Notifications
You must be signed in to change notification settings - Fork 107
Lazy and prefiltered OPTIONAL
#2695
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
4f6cf19
5330071
155480d
d7a871a
3661691
85e1e6b
1418f6c
7c50f82
afea692
e6ab9a1
30972e4
f9dd013
caef3f9
bb8f2c1
79737fc
237159e
b347b51
7e048d5
a4e6ba1
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -586,6 +586,9 @@ struct IndexScan::SharedGeneratorState { | |
| bool hasUndef_ = false; | ||
| // Indicates if the generator has been fully consumed. | ||
| bool doneFetching_ = false; | ||
| // If true, filter the left side (skip non-matching inputs). If false, pass | ||
| // through all inputs even if they don't match any blocks. | ||
| bool filterLeftSide_ = true; | ||
|
|
||
| // Advance the `iterator` to the next non-empty table. Set `hasUndef_` to true | ||
| // if the first table is undefined. Also set `doneFetching_` if the generator | ||
|
|
@@ -643,13 +646,59 @@ struct IndexScan::SharedGeneratorState { | |
| // We have seen entries in the join column that are larger than the | ||
| // largest block in the index scan, which means that there will be no | ||
| // more matches. | ||
| if (!filterLeftSide_) { | ||
| // Case B: Push current table before marking as done. | ||
| prefetchedValues_.push_back(std::move(*iterator_.value())); | ||
| } | ||
| doneFetching_ = true; | ||
| return; | ||
| } | ||
| // The current `joinColumn` has no matching block in the index, we can | ||
| // safely skip appending it to `prefetchedValues_`, but future values | ||
| // might require later blocks from the index. | ||
| continue; | ||
| // Case A: The current `joinColumn` has no matching block in the index. | ||
| if (filterLeftSide_) { | ||
| // We can safely skip appending it to `prefetchedValues_`, but future | ||
| // values might require later blocks from the index. | ||
| continue; | ||
| } else { | ||
| // When not filtering, push the table to prefetchedValues. | ||
| prefetchedValues_.push_back(std::move(*iterator_.value())); | ||
| // If buffer grows too large, find a dummy block to add. | ||
| if (prefetchedValues_.size() > 5) { | ||
| // Find the last value in the join column of the last prefetched | ||
| // table. | ||
| const auto& lastPrefetched = prefetchedValues_.back(); | ||
| auto lastJoinColumn = | ||
| lastPrefetched.idTable_.getColumn(joinColumn_); | ||
| AD_CORRECTNESS_CHECK(!lastJoinColumn.empty()); | ||
| Id lastValue = lastJoinColumn.back(); | ||
| // Find the smallest block whose first entry is larger than | ||
| // lastValue. | ||
| // TODO<joka921> This should always be the first block that is still | ||
| // available. also remove code duplication with the above code. | ||
| bool foundBlock = false; | ||
| size_t numBlocksHandled = 0; | ||
| for (const auto& block : metaBlocks_.getBlockMetadataView()) { | ||
| ++numBlocksHandled; | ||
| if (CompressedRelationReader::getRelevantIdFromTriple( | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This if statement can be asserted, and the meta blocks range is never empty at this point. |
||
| block.firstTriple_, metaBlocks_) > lastValue) { | ||
| // Found a suitable block, add it to pendingBlocks. | ||
| pendingBlocks_.push_back(block); | ||
| lastEntryInBlocks_ = | ||
| CompressedRelationReader::getRelevantIdFromTriple( | ||
| block.lastTriple_, metaBlocks_); | ||
| AD_CORRECTNESS_CHECK(numBlocksHandled == 1); | ||
| metaBlocks_.removePrefix(numBlocksHandled); | ||
| foundBlock = true; | ||
| break; | ||
| } | ||
| } | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This should be extracted to a helper function |
||
| if (!foundBlock) { | ||
| // No more blocks available, mark as done. | ||
| doneFetching_ = true; | ||
| return; | ||
| } | ||
| } | ||
| continue; | ||
| } | ||
| } | ||
| prefetchedValues_.push_back(std::move(*iterator_.value())); | ||
| ql::ranges::move(newBlocks, std::back_inserter(pendingBlocks_)); | ||
|
|
@@ -690,7 +739,19 @@ Result::LazyResult IndexScan::createPrefilteredJoinSide( | |
|
|
||
| if (prefetched.empty()) { | ||
| AD_CORRECTNESS_CHECK(state->doneFetching_); | ||
| return LoopControl::makeBreak(); | ||
| // If not filtering left side, yield all remaining elements. | ||
| AD_CORRECTNESS_CHECK(state->iterator_.has_value()); | ||
| auto it = state->iterator_.value(); | ||
| if (!state->filterLeftSide_ && it != state->generator_.end()) { | ||
| // Advance the iterator past the last value we already yielded. | ||
| ++it; | ||
|
Comment on lines
+746
to
+747
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What if you don't yield this value up until this point? I think this would simplify the logic in |
||
| return LoopControl::breakWithYieldAll( | ||
| ql::ranges::subrange(it, state->generator_.end()) | | ||
| ql::views::filter( | ||
| [](const auto& block) { return !block.idTable_.empty(); })); | ||
| } else { | ||
| return LoopControl::makeBreak(); | ||
| } | ||
| } | ||
|
|
||
| // Make a defensive copy of the values to avoid modification during | ||
|
|
@@ -769,17 +830,27 @@ Result::LazyResult IndexScan::createPrefilteredIndexScanSide( | |
|
|
||
| // _____________________________________________________________________________ | ||
| std::pair<Result::LazyResult, Result::LazyResult> IndexScan::prefilterTables( | ||
| Result::LazyResult input, ColumnIndex joinColumn) { | ||
| Result::LazyResult input, ColumnIndex joinColumn, bool filterLeftSide) { | ||
| AD_CORRECTNESS_CHECK(numVariables_ <= 3 && numVariables_ > 0); | ||
| auto metaBlocks = getMetadataForScan(); | ||
|
|
||
| if (!metaBlocks.has_value()) { | ||
| // Return empty results | ||
| return {Result::LazyResult{}, Result::LazyResult{}}; | ||
| return {filterLeftSide ? Result::LazyResult{} : std::move(input), | ||
| Result::LazyResult{}}; | ||
| } | ||
|
|
||
| auto state = std::make_shared<SharedGeneratorState>(SharedGeneratorState{ | ||
| std::move(input), joinColumn, std::move(metaBlocks.value())}); | ||
| auto state = std::make_shared<SharedGeneratorState>( | ||
| SharedGeneratorState{std::move(input), | ||
| joinColumn, | ||
| std::move(metaBlocks.value()), | ||
| std::nullopt, | ||
| {}, | ||
| {}, | ||
| std::nullopt, | ||
| false, | ||
| false, | ||
| filterLeftSide}); | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Consider moving this param up a bit in the struct, then you wouldn't have to re-define all of these default arguments again. |
||
| return {createPrefilteredJoinSide(state), | ||
| createPrefilteredIndexScanSide(state)}; | ||
| } | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The
5should be a named constant