Skip to content

Commit bb232e5

Browse files
hannahbastRobinTF
authored andcommitted
Fix live update of Analysis tree for join with one index scan (#2473)
So far, the `Analysis` tree is updated live for join operations when both child operations produce their output lazily. However, when one or both child operations are index scans, no live updates are shown for them. Here is a simple example query that demonstrates both cases: ```sparql SELECT * WHERE { ?s a ?o1, ?o2, ?o3 } ``` This change fixes the issue for the case where one child is an index scan and the other child produces its output lazily. Co-authored-by: Robin Textor-Falconi <[email protected]>
1 parent 910e1ed commit bb232e5

File tree

8 files changed

+115
-92
lines changed

8 files changed

+115
-92
lines changed

src/engine/IndexScan.cpp

Lines changed: 33 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,6 @@
1313
#include "engine/QueryExecutionTree.h"
1414
#include "index/IndexImpl.h"
1515
#include "parser/ParsedQuery.h"
16-
#include "util/Generator.h"
17-
#include "util/GeneratorConverter.h"
1816
#include "util/InputRangeUtils.h"
1917
#include "util/Iterators.h"
2018

@@ -366,7 +364,7 @@ Permutation::ScanSpecAndBlocks IndexScan::getScanSpecAndBlocks() const {
366364
}
367365

368366
// _____________________________________________________________________________
369-
Permutation::IdTableGenerator IndexScan::getLazyScan(
367+
CompressedRelationReader::IdTableGeneratorInputRange IndexScan::getLazyScan(
370368
std::optional<std::vector<CompressedBlockMetadata>> blocks) const {
371369
// If there is a LIMIT or OFFSET clause that constrains the scan
372370
// (which can happen with an explicit subquery), we cannot use the prefiltered
@@ -378,12 +376,12 @@ Permutation::IdTableGenerator IndexScan::getLazyScan(
378376
scanSpecAndBlocks_, filteredBlocks, additionalColumns(),
379377
cancellationHandle_, locatedTriplesSnapshot(), getLimitOffset());
380378

381-
return cppcoro::fromInputRange(
382-
ad_utility::InputRangeTypeErased<IdTable, LazyScanMetadata>(
383-
ad_utility::CachingTransformInputRange<
384-
ad_utility::OwningView<Permutation::IdTableGenerator>,
385-
decltype(makeApplyColumnSubset()), LazyScanMetadata>{
386-
std::move(lazyScanAllCols), makeApplyColumnSubset()}));
379+
return CompressedRelationReader::IdTableGeneratorInputRange{
380+
ad_utility::CachingTransformInputRange<
381+
ad_utility::OwningView<
382+
CompressedRelationReader::IdTableGeneratorInputRange>,
383+
decltype(makeApplyColumnSubset()), LazyScanMetadata>{
384+
std::move(lazyScanAllCols), makeApplyColumnSubset()}};
387385
};
388386

389387
// _____________________________________________________________________________
@@ -394,7 +392,7 @@ std::optional<Permutation::MetadataAndBlocks> IndexScan::getMetadataForScan()
394392
};
395393

396394
// _____________________________________________________________________________
397-
std::array<Permutation::IdTableGenerator, 2>
395+
std::array<CompressedRelationReader::IdTableGeneratorInputRange, 2>
398396
IndexScan::lazyScanForJoinOfTwoScans(const IndexScan& s1, const IndexScan& s2) {
399397
AD_CONTRACT_CHECK(s1.numVariables_ <= 3 && s2.numVariables_ <= 3);
400398
AD_CONTRACT_CHECK(s1.numVariables_ >= 1 && s2.numVariables_ >= 1);
@@ -442,7 +440,8 @@ IndexScan::lazyScanForJoinOfTwoScans(const IndexScan& s1, const IndexScan& s2) {
442440
}
443441

444442
// _____________________________________________________________________________
445-
Permutation::IdTableGenerator IndexScan::lazyScanForJoinOfColumnWithScan(
443+
CompressedRelationReader::IdTableGeneratorInputRange
444+
IndexScan::lazyScanForJoinOfColumnWithScan(
446445
ql::span<const Id> joinColumn) const {
447446
AD_EXPENSIVE_CHECK(ql::ranges::is_sorted(joinColumn));
448447
AD_CORRECTNESS_CHECK(numVariables_ <= 3 && numVariables_ > 0);
@@ -461,9 +460,8 @@ Permutation::IdTableGenerator IndexScan::lazyScanForJoinOfColumnWithScan(
461460

462461
// _____________________________________________________________________________
463462
void IndexScan::updateRuntimeInfoForLazyScan(const LazyScanMetadata& metadata) {
464-
updateRuntimeInformationWhenOptimizedOut(
465-
RuntimeInformation::Status::lazilyMaterialized);
466463
auto& rti = runtimeInfo();
464+
rti.status_ = RuntimeInformation::Status::lazilyMaterialized;
467465
rti.numRows_ = metadata.numElementsYielded_;
468466
rti.totalTime_ = metadata.blockingTime_;
469467
rti.addDetail("num-blocks-read", metadata.numBlocksRead_);
@@ -481,6 +479,7 @@ void IndexScan::updateRuntimeInfoForLazyScan(const LazyScanMetadata& metadata) {
481479
updateIfPositive(metadata.numBlocksPostprocessed_,
482480
"num-blocks-postprocessed");
483481
updateIfPositive(metadata.numBlocksWithUpdate_, "num-blocks-with-update");
482+
signalQueryUpdate();
484483
}
485484

486485
// Store a Generator and its corresponding iterator as well as unconsumed values
@@ -629,13 +628,24 @@ Result::LazyResult IndexScan::createPrefilteredJoinSide(
629628
Result::LazyResult IndexScan::createPrefilteredIndexScanSide(
630629
std::shared_ptr<SharedGeneratorState> innerState) {
631630
using LoopControl = ad_utility::LoopControl<Result::IdTableVocabPair>;
631+
using namespace std::chrono_literals;
632632

633633
auto range = ad_utility::InputRangeFromLoopControlGet{
634634
[this, state = std::move(innerState),
635635
metadata = LazyScanMetadata{}]() mutable {
636636
// Handle UNDEF case using LoopControl pattern
637637
if (state->hasUndef()) {
638-
return LoopControl::breakWithYieldAll(chunkedIndexScan());
638+
auto scan = std::make_shared<
639+
CompressedRelationReader::IdTableGeneratorInputRange>(
640+
getLazyScan());
641+
scan->details().numBlocksAll_ =
642+
getMetadataForScan().value().sizeBlockMetadata_;
643+
return LoopControl::breakWithYieldAll(
644+
ad_utility::CachingTransformInputRange(*scan, [this, scan](
645+
auto& table) {
646+
updateRuntimeInfoForLazyScan(scan->details());
647+
return Result::IdTableVocabPair{std::move(table), LocalVocab{}};
648+
}));
639649
}
640650

641651
auto& pendingBlocks = state->pendingBlocks_;
@@ -648,6 +658,8 @@ Result::LazyResult IndexScan::createPrefilteredIndexScanSide(
648658
}
649659
state->fetch();
650660
}
661+
metadata.numBlocksAll_ = state->metaBlocks_.sizeBlockMetadata_;
662+
updateRuntimeInfoForLazyScan(metadata);
651663

652664
// We now have non-empty pending blocks
653665
auto scan = getLazyScan(std::move(pendingBlocks));
@@ -658,19 +670,16 @@ Result::LazyResult IndexScan::createPrefilteredIndexScanSide(
658670

659671
// Transform the scan to Result::IdTableVocabPair and yield all
660672
auto transformedScan = ad_utility::CachingTransformInputRange(
661-
std::move(scan), [](auto& table) {
662-
return Result::IdTableVocabPair{std::move(table), LocalVocab{}};
663-
});
664-
665-
// Use CallbackOnEndView to aggregate metadata after scan is consumed
666-
auto callback = ad_utility::makeAssignableLambda(
667-
[&metadata, &scanDetails]() mutable {
673+
std::move(scan),
674+
[&metadata, &scanDetails,
675+
originalMetadata = metadata](auto& table) mutable {
676+
// Make sure we don't add everything more than once.
677+
metadata = originalMetadata;
668678
metadata.aggregate(scanDetails);
679+
return Result::IdTableVocabPair{std::move(table), LocalVocab{}};
669680
});
670681

671-
auto scanWithCallback = ad_utility::CallbackOnEndView{
672-
std::move(transformedScan), std::move(callback)};
673-
return LoopControl::yieldAll(std::move(scanWithCallback));
682+
return LoopControl::yieldAll(std::move(transformedScan));
674683
}};
675684
return Result::LazyResult{std::move(range)};
676685
}

src/engine/IndexScan.h

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -97,15 +97,15 @@ class IndexScan final : public Operation {
9797
// blocks, but only the blocks that can theoretically contain matching rows
9898
// when performing a join on the first column of the result of `s1` with the
9999
// first column of the result of `s2`.
100-
static std::array<Permutation::IdTableGenerator, 2> lazyScanForJoinOfTwoScans(
101-
const IndexScan& s1, const IndexScan& s2);
100+
static std::array<CompressedRelationReader::IdTableGeneratorInputRange, 2>
101+
lazyScanForJoinOfTwoScans(const IndexScan& s1, const IndexScan& s2);
102102

103103
// Return a generator that lazily yields the result in blocks, but only
104104
// the blocks that can theoretically contain matching rows when performing a
105105
// join between the first column of the result with the `joinColumn`.
106106
// Requires that the `joinColumn` is sorted, else the behavior is undefined.
107-
Permutation::IdTableGenerator lazyScanForJoinOfColumnWithScan(
108-
ql::span<const Id> joinColumn) const;
107+
CompressedRelationReader::IdTableGeneratorInputRange
108+
lazyScanForJoinOfColumnWithScan(ql::span<const Id> joinColumn) const;
109109

110110
// Return two generators, the first of which yields exactly the elements of
111111
// `input` and the second of which yields the matching blocks, skipping the
@@ -237,7 +237,7 @@ class IndexScan final : public Operation {
237237

238238
// Helper functions for the public `getLazyScanFor...` methods and
239239
// `chunkedIndexScan` (see above).
240-
Permutation::IdTableGenerator getLazyScan(
240+
CompressedRelationReader::IdTableGeneratorInputRange getLazyScan(
241241
std::optional<std::vector<CompressedBlockMetadata>> blocks =
242242
std::nullopt) const;
243243
std::optional<Permutation::MetadataAndBlocks> getMetadataForScan() const;

src/engine/JoinHelpers.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -92,10 +92,11 @@ using GeneratorWithDetails =
9292
// Convert a `generator<IdTable` to a `generator<IdTableAndFirstCol>` for more
9393
// efficient access in the join columns below.
9494
inline GeneratorWithDetails convertGenerator(
95-
Permutation::IdTableGenerator gen) {
95+
CompressedRelationReader::IdTableGeneratorInputRange gen) {
9696
// Store the generator in a wrapper so we can access its details after moving
9797
auto generatorStorage =
98-
std::make_shared<Permutation::IdTableGenerator>(std::move(gen));
98+
std::make_shared<CompressedRelationReader::IdTableGeneratorInputRange>(
99+
std::move(gen));
99100

100101
// Create the range with a pointer to the generator's details
101102
auto range = InputRangeTypeErased<IdTableAndFirstCol<IdTable>>(

src/index/Permutation.cpp

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88

99
#include "index/ConstantsIndexBuilding.h"
1010
#include "index/DeltaTriples.h"
11-
#include "util/GeneratorConverter.h"
1211
#include "util/StringUtils.h"
1312

1413
// _____________________________________________________________________
@@ -194,7 +193,7 @@ std::optional<Permutation::MetadataAndBlocks> Permutation::getMetadataAndBlocks(
194193
}
195194

196195
// _____________________________________________________________________
197-
Permutation::IdTableGenerator Permutation::lazyScan(
196+
CompressedRelationReader::IdTableGeneratorInputRange Permutation::lazyScan(
198197
const ScanSpecAndBlocks& scanSpecAndBlocks,
199198
std::optional<std::vector<CompressedBlockMetadata>> optBlocks,
200199
ColumnIndicesRef additionalColumns,
@@ -207,14 +206,10 @@ Permutation::IdTableGenerator Permutation::lazyScan(
207206
optBlocks = CompressedRelationReader::convertBlockMetadataRangesToVector(
208207
scanSpecAndBlocks.blockMetadata_);
209208
}
210-
auto lazyScan{p.reader().lazyScan(
209+
return p.reader().lazyScan(
211210
scanSpecAndBlocks.scanSpec_, std::move(optBlocks.value()),
212211
std::move(columns), cancellationHandle,
213-
p.getLocatedTriplesForPermutation(locatedTriplesSnapshot), limitOffset)};
214-
215-
return cppcoro::fromInputRange<IdTable,
216-
CompressedRelationReader::LazyScanMetadata>(
217-
std::move(lazyScan));
212+
p.getLocatedTriplesForPermutation(locatedTriplesSnapshot), limitOffset);
218213
}
219214

220215
// ______________________________________________________________________

src/index/Permutation.h

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -89,8 +89,6 @@ class Permutation {
8989
using MetadataAndBlocks =
9090
CompressedRelationReader::ScanSpecAndBlocksAndBounds;
9191

92-
using IdTableGenerator = CompressedRelationReader::IdTableGenerator;
93-
9492
// The function `lazyScan` is similar to `scan` (see above) with
9593
// the following differences:
9694
// - The result is returned as a lazy generator of blocks.
@@ -108,7 +106,7 @@ class Permutation {
108106
// TODO<joka921> We should only communicate this interface via the
109107
// `ScanSpecAndBlocksAndBounds` class and make this a strong class that always
110108
// maintains its invariants.
111-
IdTableGenerator lazyScan(
109+
CompressedRelationReader::IdTableGeneratorInputRange lazyScan(
112110
const ScanSpecAndBlocks& scanSpecAndBlocks,
113111
std::optional<std::vector<CompressedBlockMetadata>> optBlocks,
114112
ColumnIndicesRef additionalColumns,

src/util/GeneratorConverter.h

Lines changed: 0 additions & 37 deletions
This file was deleted.

src/util/http/HttpUtils.h

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414
#include "backports/three_way_comparison.h"
1515
#include "util/AsyncStream.h"
1616
#include "util/CompressorStream.h"
17-
#include "util/GeneratorConverter.h"
1817
#include "util/StringUtils.h"
1918
#include "util/TypeTraits.h"
2019
#include "util/http/MediaTypes.h"
@@ -168,7 +167,11 @@ CPP_template(typename RequestType)(
168167
ad_utility::content_encoding::getCompressionMethodForRequest(request);
169168

170169
auto asyncGenerator = streams::runStreamAsync(std::move(generator), 100);
171-
auto coroAsyncGenerator = cppcoro::fromInputRange(std::move(asyncGenerator));
170+
auto coroAsyncGenerator = [](auto range) -> cppcoro::generator<std::string> {
171+
for (auto& value : range) {
172+
co_yield value;
173+
}
174+
}(std::move(asyncGenerator));
172175

173176
if (method != CompressionMethod::NONE) {
174177
response.body() =

0 commit comments

Comments
 (0)