Skip to content

Commit a8c9d86

Browse files
hannahbastRobinTF
andauthored
Fix runtime information and live update for lazy join of two index scans (#2480)
Since #2256, the details of the runtime information of the index scans of a lazy join of two index scans were not properly updated anymore. In particular, `num-blocks-read` and `num-elements-read` were always zero, even after completion of the query. A live update of the runtime information of index scans in this scenario never worked so far. This is all fixed now. Together with #2473, live updates now fully work for any cascade of lazy joins. Here is simple example query, where we can now see live updates for all operations (the predicate must be large enough so that the joins are actually lazy): ```sparql SELECT * { ?s a ?o1, ?o2, ?o3 } ``` Co-authored-by: RobinTF <[email protected]>
1 parent 0ac2d96 commit a8c9d86

File tree

7 files changed

+119
-211
lines changed

7 files changed

+119
-211
lines changed

src/engine/IndexScan.cpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -459,7 +459,8 @@ IndexScan::lazyScanForJoinOfColumnWithScan(
459459
}
460460

461461
// _____________________________________________________________________________
462-
void IndexScan::updateRuntimeInfoForLazyScan(const LazyScanMetadata& metadata) {
462+
void IndexScan::updateRuntimeInfoForLazyScan(const LazyScanMetadata& metadata,
463+
bool signalUpdate) {
463464
auto& rti = runtimeInfo();
464465
rti.status_ = RuntimeInformation::Status::lazilyMaterialized;
465466
rti.numRows_ = metadata.numElementsYielded_;
@@ -479,7 +480,9 @@ void IndexScan::updateRuntimeInfoForLazyScan(const LazyScanMetadata& metadata) {
479480
updateIfPositive(metadata.numBlocksPostprocessed_,
480481
"num-blocks-postprocessed");
481482
updateIfPositive(metadata.numBlocksWithUpdate_, "num-blocks-with-update");
482-
signalQueryUpdate();
483+
if (signalUpdate) {
484+
signalQueryUpdate();
485+
}
483486
}
484487

485488
// Store a Generator and its corresponding iterator as well as unconsumed values

src/engine/IndexScan.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -181,7 +181,8 @@ class IndexScan final : public Operation {
181181
// Set the runtime info of the `scanTree` when it was lazily executed during a
182182
// join.
183183
void updateRuntimeInfoForLazyScan(
184-
const CompressedRelationReader::LazyScanMetadata& metadata);
184+
const CompressedRelationReader::LazyScanMetadata& metadata,
185+
bool signalUpdate = true);
185186

186187
bool columnOriginatesFromGraphOrUndef(
187188
const Variable& variable) const override;

src/engine/Join.cpp

Lines changed: 51 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -563,11 +563,44 @@ void Join::addCombinedRowToIdTable(const ROW_A& rowA, const ROW_B& rowB,
563563
}
564564
}
565565

566+
// _____________________________________________________________________________
567+
namespace {
568+
// Type alias for the general InputRangeTypeErased with specific types.
569+
using IteratorWithSingleCol = InputRangeTypeErased<IdTableAndFirstCol<IdTable>>;
570+
571+
// Convert a `CompressedRelationReader::IdTableGeneratorInputRange` to a
572+
// `InputRangeTypeErased<IdTableAndFirstCol<IdTable>>` for more efficient access
573+
// in the join columns below. This also makes sure the runtime information of
574+
// the passed `IndexScan` is updated properly as the range is consumed.
575+
IteratorWithSingleCol convertGenerator(
576+
CompressedRelationReader::IdTableGeneratorInputRange gen, IndexScan& scan,
577+
bool postUpdates) {
578+
// Store the generator in a wrapper so we can access its details after moving
579+
auto generatorStorage =
580+
std::make_shared<CompressedRelationReader::IdTableGeneratorInputRange>(
581+
std::move(gen));
582+
583+
auto range = CachingTransformInputRange(
584+
*generatorStorage, [generatorStorage, &scan, postUpdates,
585+
first = true](auto& table) mutable {
586+
scan.updateRuntimeInfoForLazyScan(generatorStorage->details(),
587+
first || postUpdates);
588+
first = false;
589+
// IndexScans don't have a local vocabulary, so we can just use an empty
590+
// one.
591+
return IdTableAndFirstCol{std::move(table), LocalVocab{}};
592+
});
593+
594+
return IteratorWithSingleCol{std::move(range)};
595+
}
596+
} // namespace
597+
566598
// ______________________________________________________________________________________________________
567599
Result Join::computeResultForTwoIndexScans(bool requestLaziness) const {
568600
return createResult(
569601
requestLaziness,
570-
[this](std::function<void(IdTable&, LocalVocab&)> yieldTable) {
602+
[this,
603+
requestLaziness](std::function<void(IdTable&, LocalVocab&)> yieldTable) {
571604
auto leftScan =
572605
std::dynamic_pointer_cast<IndexScan>(_left->getRootOperation());
573606
auto rightScan =
@@ -585,15 +618,17 @@ Result Join::computeResultForTwoIndexScans(bool requestLaziness) const {
585618
IndexScan::lazyScanForJoinOfTwoScans(*leftScan, *rightScan);
586619
runtimeInfo().addDetail("time-for-filtering-blocks", timer.msecs());
587620

588-
auto leftBlocks = convertGenerator(std::move(leftBlocksInternal));
589-
auto rightBlocks = convertGenerator(std::move(rightBlocksInternal));
621+
// If requestLaziness, we don't need to serialize json for every update
622+
// of the child. If we serialize it whenever the join operation yields a
623+
// table that's frequent enough and reduces the overhead.
624+
auto leftBlocks = convertGenerator(std::move(leftBlocksInternal),
625+
*leftScan, !requestLaziness);
626+
auto rightBlocks = convertGenerator(std::move(rightBlocksInternal),
627+
*rightScan, !requestLaziness);
590628

591629
ad_utility::zipperJoinForBlocksWithoutUndef(leftBlocks, rightBlocks,
592630
std::less{}, rowAdder);
593631

594-
leftScan->updateRuntimeInfoForLazyScan(leftBlocks.details());
595-
rightScan->updateRuntimeInfoForLazyScan(rightBlocks.details());
596-
597632
auto localVocab = std::move(rowAdder.localVocab());
598633
return Result::IdTableVocabPair{std::move(rowAdder).resultTable(),
599634
std::move(localVocab)};
@@ -611,7 +646,7 @@ Result Join::computeResultForIndexScanAndIdTable(
611646
auto resultPermutation = joinColMap.permutationResult();
612647
return createResult(
613648
requestLaziness,
614-
[this, scan = std::move(scan),
649+
[this, requestLaziness, scan = std::move(scan),
615650
resultWithIdTable = std::move(resultWithIdTable),
616651
joinColMap = std::move(joinColMap)](
617652
std::function<void(IdTable&, LocalVocab&)> yieldTable) {
@@ -632,9 +667,9 @@ Result Join::computeResultForIndexScanAndIdTable(
632667
.isUndefined();
633668
std::optional<std::shared_ptr<const Result>> indexScanResult =
634669
std::nullopt;
635-
auto rightBlocks = [&scan, idTableHasUndef, &permutationIdTable,
636-
&indexScanResult]()
637-
-> std::variant<LazyInputView, GeneratorWithDetails> {
670+
auto rightBlocks = [requestLaziness, &scan, idTableHasUndef,
671+
&permutationIdTable,
672+
&indexScanResult]() -> LazyInputView {
638673
if (idTableHasUndef) {
639674
indexScanResult =
640675
scan->getResult(false, ComputationMode::LAZY_IF_SUPPORTED);
@@ -644,7 +679,8 @@ Result Join::computeResultForIndexScanAndIdTable(
644679
} else {
645680
auto rightBlocksInternal =
646681
scan->lazyScanForJoinOfColumnWithScan(permutationIdTable.col());
647-
return convertGenerator(std::move(rightBlocksInternal));
682+
return convertGenerator(std::move(rightBlocksInternal), *scan,
683+
!requestLaziness);
648684
}
649685
}();
650686

@@ -657,19 +693,10 @@ Result Join::computeResultForIndexScanAndIdTable(
657693
left, right, std::less{}, rowAdder);
658694
};
659695
auto blockForIdTable = std::array{std::move(permutationIdTable)};
660-
std::visit(
661-
[&doJoin, &blockForIdTable](auto& blocks) {
662-
if constexpr (idTableIsRightInput) {
663-
doJoin(blocks, blockForIdTable);
664-
} else {
665-
doJoin(blockForIdTable, blocks);
666-
}
667-
},
668-
rightBlocks);
669-
670-
if (std::holds_alternative<GeneratorWithDetails>(rightBlocks)) {
671-
scan->updateRuntimeInfoForLazyScan(
672-
std::get<GeneratorWithDetails>(rightBlocks).details());
696+
if constexpr (idTableIsRightInput) {
697+
doJoin(rightBlocks, blockForIdTable);
698+
} else {
699+
doJoin(blockForIdTable, rightBlocks);
673700
}
674701

675702
auto localVocab = std::move(rowAdder.localVocab());

src/engine/JoinHelpers.h

Lines changed: 3 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1-
// Copyright 2025, University of Freiburg,
2-
// Chair of Algorithms and Data Structures.
3-
// Author: Robin Textor-Falconi <[email protected]>
1+
// Copyright 2025, University of Freiburg,
2+
// Chair of Algorithms and Data Structures.
3+
// Author: Robin Textor-Falconi <[email protected]>
44
//
55
// Copyright 2025, Bayerische Motoren Werke Aktiengesellschaft (BMW AG)
66

@@ -83,33 +83,6 @@ inline std::variant<LazyInputView, MaterializedInputView> resultToView(
8383
return convertGenerator(result.idTables(), permutation);
8484
}
8585

86-
// Type alias for the general InputRangeTypeErasedWithDetails with specific
87-
// types
88-
using GeneratorWithDetails =
89-
InputRangeTypeErasedWithDetails<IdTableAndFirstCol<IdTable>,
90-
CompressedRelationReader::LazyScanMetadata>;
91-
92-
// Convert a `generator<IdTable` to a `generator<IdTableAndFirstCol>` for more
93-
// efficient access in the join columns below.
94-
inline GeneratorWithDetails convertGenerator(
95-
CompressedRelationReader::IdTableGeneratorInputRange gen) {
96-
// Store the generator in a wrapper so we can access its details after moving
97-
auto generatorStorage =
98-
std::make_shared<CompressedRelationReader::IdTableGeneratorInputRange>(
99-
std::move(gen));
100-
101-
// Create the range with a pointer to the generator's details
102-
auto range = InputRangeTypeErased<IdTableAndFirstCol<IdTable>>(
103-
CachingTransformInputRange(
104-
*generatorStorage, [generatorStorage](auto& table) {
105-
(void)generatorStorage; // Only captured for lifetime reasons.
106-
// IndexScans don't have a local vocabulary, so we can just use an
107-
return IdTableAndFirstCol{std::move(table), LocalVocab{}};
108-
}));
109-
110-
return GeneratorWithDetails{std::move(range), generatorStorage->details()};
111-
}
112-
11386
// Part of the implementation of `createResult`. This function is called when
11487
// the result should be yielded lazily.
11588
// Action is a lambda that itself runs the join operation in a blocking

src/util/Iterators.h

Lines changed: 0 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -538,63 +538,6 @@ template <typename ValueType, typename DetailsType = NoDetails>
538538
InputRangeTypeErased(std::unique_ptr<InputRangeFromGet<ValueType, DetailsType>>)
539539
-> InputRangeTypeErased<ValueType, DetailsType>;
540540

541-
// A general type-erased input range with details. This combines an
542-
// InputRangeTypeErased with additional metadata/details of arbitrary type.
543-
template <typename ValueType, typename DetailsType>
544-
class InputRangeTypeErasedWithDetails {
545-
private:
546-
InputRangeTypeErased<ValueType> range_;
547-
// Use variant to support both owned details and external details pointer
548-
std::variant<DetailsType, const DetailsType*> details_;
549-
550-
public:
551-
// Constructor that takes the range and owned details
552-
template <typename Range>
553-
explicit InputRangeTypeErasedWithDetails(Range range, DetailsType details)
554-
: range_(std::move(range)), details_(std::move(details)) {}
555-
556-
// Constructor that takes the range and a pointer to external details
557-
template <typename Range>
558-
explicit InputRangeTypeErasedWithDetails(Range range,
559-
const DetailsType* detailsPtr)
560-
: range_(std::move(range)), details_(detailsPtr) {}
561-
562-
// Delegate iterator methods to the underlying range
563-
auto begin() { return range_.begin(); }
564-
auto end() { return range_.end(); }
565-
566-
// Provide access to the details
567-
const DetailsType& details() const {
568-
return std::visit(
569-
[](const auto& d) -> const DetailsType& {
570-
if constexpr (std::is_same_v<std::decay_t<decltype(d)>,
571-
DetailsType>) {
572-
return d;
573-
} else {
574-
return *d;
575-
}
576-
},
577-
details_);
578-
}
579-
580-
// Note: Mutable access only available for owned details
581-
DetailsType& details() {
582-
AD_CONTRACT_CHECK(std::holds_alternative<DetailsType>(details_),
583-
"Cannot get mutable reference to external details");
584-
return std::get<DetailsType>(details_);
585-
}
586-
587-
// Additional type aliases for compatibility
588-
using value_type = ValueType;
589-
using iterator = typename InputRangeTypeErased<ValueType>::iterator;
590-
};
591-
592-
// Deduction guide
593-
template <typename Range, typename DetailsType>
594-
InputRangeTypeErasedWithDetails(Range, DetailsType)
595-
-> InputRangeTypeErasedWithDetails<ql::ranges::range_value_t<Range>,
596-
DetailsType>;
597-
598541
// A view that takes an iterator and a sentinel (similar to
599542
// `ql::ranges::subrange`, but yields the iterators instead of the values when
600543
// being iterated over. Currently, the iterators must be random-access and the

test/IteratorTest.cpp

Lines changed: 0 additions & 97 deletions
Original file line numberDiff line numberDiff line change
@@ -261,100 +261,3 @@ TEST(Iterator, IteratorRange) {
261261
EXPECT_EQ(beg[3], v.begin() + 3);
262262
EXPECT_EQ(*beg[3], 7);
263263
}
264-
265-
//_____________________________________________________________________________
266-
TEST(Iterator, InputRangeTypeErasedWithDetails) {
267-
using namespace ad_utility;
268-
269-
// Define a simple details type for testing
270-
struct TestDetails {
271-
std::string name;
272-
int count = 0;
273-
};
274-
275-
// Create a simple input range
276-
std::vector<int> values{1, 2, 3, 4, 5};
277-
TestDetails details{"test_range", 42};
278-
279-
// Test construction and basic functionality
280-
InputRangeTypeErasedWithDetails<int, TestDetails> rangeWithDetails{values,
281-
details};
282-
283-
// Test details access
284-
EXPECT_EQ(rangeWithDetails.details().name, "test_range");
285-
EXPECT_EQ(rangeWithDetails.details().count, 42);
286-
287-
// Test that the range functionality works
288-
std::vector<int> result;
289-
for (auto& value : rangeWithDetails) {
290-
result.push_back(value);
291-
}
292-
EXPECT_EQ(result, values);
293-
294-
// Test modifying details
295-
rangeWithDetails.details().count = 100;
296-
EXPECT_EQ(rangeWithDetails.details().count, 100);
297-
298-
// Test with a custom InputRangeFromGet implementation
299-
struct CountingRange : InputRangeFromGet<int> {
300-
int current_ = 0;
301-
int max_;
302-
explicit CountingRange(int max) : max_(max) {}
303-
std::optional<int> get() override {
304-
if (current_ >= max_) return std::nullopt;
305-
return current_++;
306-
}
307-
};
308-
309-
TestDetails countingDetails{"counting", 999};
310-
InputRangeTypeErasedWithDetails<int, TestDetails> countingRangeWithDetails{
311-
CountingRange{3}, countingDetails};
312-
313-
std::vector<int> countingResult;
314-
for (auto& value : countingRangeWithDetails) {
315-
countingResult.push_back(value);
316-
}
317-
EXPECT_EQ(countingResult, (std::vector<int>{0, 1, 2}));
318-
EXPECT_EQ(countingRangeWithDetails.details().name, "counting");
319-
EXPECT_EQ(countingRangeWithDetails.details().count, 999);
320-
321-
// Test deduction guide
322-
auto deducedRange =
323-
InputRangeTypeErasedWithDetails{values, TestDetails{"deduced", 123}};
324-
static_assert(
325-
std::is_same_v<decltype(deducedRange),
326-
InputRangeTypeErasedWithDetails<int, TestDetails>>);
327-
EXPECT_EQ(deducedRange.details().name, "deduced");
328-
EXPECT_EQ(deducedRange.details().count, 123);
329-
330-
// Test external details (pointer-based) scenario
331-
TestDetails externalDetails{"external", 456};
332-
DISABLE_FREE_NONHEAP_WARNINGS
333-
InputRangeTypeErasedWithDetails<int, TestDetails> rangeWithExternalDetails{
334-
values, &externalDetails};
335-
GCC_REENABLE_WARNINGS
336-
337-
// Test that external details are accessible (const access)
338-
const auto& constRangeRef = rangeWithExternalDetails;
339-
EXPECT_EQ(constRangeRef.details().name, "external");
340-
EXPECT_EQ(constRangeRef.details().count, 456);
341-
342-
// Test that modifying the external details affects the range
343-
externalDetails.count = 789;
344-
EXPECT_EQ(constRangeRef.details().count, 789);
345-
346-
// Test that mutable access throws for external details
347-
EXPECT_THROW(
348-
{
349-
auto& mutableDetails = rangeWithExternalDetails.details();
350-
mutableDetails.count = 999; // This should throw
351-
},
352-
ad_utility::Exception);
353-
354-
// Test range functionality with external details
355-
std::vector<int> externalResult;
356-
for (auto& value : rangeWithExternalDetails) {
357-
externalResult.push_back(value);
358-
}
359-
EXPECT_EQ(externalResult, values);
360-
}

0 commit comments

Comments
 (0)