From dee3cc254a41a91bfa9d9f8cb33e4e8f835aa4c4 Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Mon, 2 Feb 2026 10:46:06 +0100 Subject: [PATCH 1/3] Use aliasing shared pointers instead of extra lifetime object --- src/engine/CartesianProductJoin.cpp | 40 ++++++++++++++++------------- 1 file changed, 22 insertions(+), 18 deletions(-) diff --git a/src/engine/CartesianProductJoin.cpp b/src/engine/CartesianProductJoin.cpp index 04175953bd..41dc99ca18 100644 --- a/src/engine/CartesianProductJoin.cpp +++ b/src/engine/CartesianProductJoin.cpp @@ -369,51 +369,55 @@ Result::LazyResult CartesianProductJoin::createLazyConsumer( std::vector> subresults, std::shared_ptr lazyResult) const { AD_CONTRACT_CHECK(lazyResult); - std::vector> idTables; + std::vector> idTables; idTables.reserve(subresults.size() + 1); - for (const auto& result : subresults) { - idTables.emplace_back(result->idTable()); + for (auto& result : subresults) { + auto* idTable = &result->idTable(); + idTables.emplace_back(std::move(result), idTable); } + // Placeholder, will be replaced with the current `IdTable` from the lazy + // result. + idTables.emplace_back(nullptr); + + auto generatedTables = lazyResult->idTables(); + auto get = [self = this, staticMergedVocab = std::move(staticMergedVocab), limit = getLimitOffset().limitOrDefault(), offset = getLimitOffset()._offset, idTables = std::move(idTables), - subresults = std::move(subresults), lastTableOffset = size_t{0}, - producedTableSize = size_t{0}, - idTableOpt = std::optional{}]( - auto& idTableVocabPair) mutable { + lastTableOffset = size_t{0}, producedTableSize = size_t{0}, + lazyResult = + std::move(lazyResult)](auto& idTableVocabPair) mutable { // These things have to be done after handling a single input, so we do them // at the beginning of each but the last iteration. - if (idTableOpt.has_value()) { - idTables.pop_back(); - lastTableOffset += idTableOpt->idTable_.size(); + if (idTables.back() != nullptr) { + lastTableOffset += idTables.back()->size(); limit -= producedTableSize; offset += producedTableSize; producedTableSize = 0; } - idTableOpt = std::move(idTableVocabPair); - auto& [idTable, localVocab] = idTableOpt.value(); - if (idTable.empty()) { + auto& [idTable, localVocab] = idTableVocabPair; + idTables.pop_back(); + idTables.push_back(std::make_shared(std::move(idTable))); + if (idTables.back()->empty()) { return Result::IdTableLoopControl::makeContinue(); } - idTables.emplace_back(idTable); localVocab.mergeWith(staticMergedVocab); return Result::IdTableLoopControl::yieldAll( ad_utility::InputRangeTypeErased{ ad_utility::OwningView{self->produceTablesLazily( std::move(localVocab), - ql::views::transform(idTables, - ad_utility::staticCast), - offset, limit, lastTableOffset)} | + ql::views::transform(idTables, ad_utility::dereference), offset, + limit, lastTableOffset)} | ql::views::transform([&producedTableSize](auto& tableAndVocab) { producedTableSize += tableAndVocab.idTable_.size(); return std::move(tableAndVocab); })}); }; return Result::LazyResult(ad_utility::CachingContinuableTransformInputRange( - lazyResult->idTables(), std::move(get))); + std::move(generatedTables), std::move(get))); } // _____________________________________________________________________________ From ce2a6b5f711c2b2a65e04dee240443e4c88871be Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Tue, 3 Feb 2026 10:47:58 +0100 Subject: [PATCH 2/3] Polish code a little bit more --- src/engine/CartesianProductJoin.cpp | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/src/engine/CartesianProductJoin.cpp b/src/engine/CartesianProductJoin.cpp index 41dc99ca18..e601928abc 100644 --- a/src/engine/CartesianProductJoin.cpp +++ b/src/engine/CartesianProductJoin.cpp @@ -377,7 +377,8 @@ Result::LazyResult CartesianProductJoin::createLazyConsumer( } // Placeholder, will be replaced with the current `IdTable` from the lazy // result. - idTables.emplace_back(nullptr); + idTables.emplace_back( + std::make_shared(IdTable{0, allocator()})); auto generatedTables = lazyResult->idTables(); @@ -389,16 +390,13 @@ Result::LazyResult CartesianProductJoin::createLazyConsumer( std::move(lazyResult)](auto& idTableVocabPair) mutable { // These things have to be done after handling a single input, so we do them // at the beginning of each but the last iteration. - if (idTables.back() != nullptr) { - lastTableOffset += idTables.back()->size(); - limit -= producedTableSize; - offset += producedTableSize; - producedTableSize = 0; - } + lastTableOffset += idTables.back()->size(); + limit -= producedTableSize; + offset += producedTableSize; + producedTableSize = 0; auto& [idTable, localVocab] = idTableVocabPair; - idTables.pop_back(); - idTables.push_back(std::make_shared(std::move(idTable))); + idTables.back() = std::make_shared(std::move(idTable)); if (idTables.back()->empty()) { return Result::IdTableLoopControl::makeContinue(); } From 3ed553e58a92cd6582d63e34bde35f082cacbd78 Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Tue, 3 Feb 2026 10:50:50 +0100 Subject: [PATCH 3/3] Perform a micro-optimization --- src/engine/CartesianProductJoin.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/engine/CartesianProductJoin.cpp b/src/engine/CartesianProductJoin.cpp index e601928abc..8c24210c1b 100644 --- a/src/engine/CartesianProductJoin.cpp +++ b/src/engine/CartesianProductJoin.cpp @@ -377,8 +377,7 @@ Result::LazyResult CartesianProductJoin::createLazyConsumer( } // Placeholder, will be replaced with the current `IdTable` from the lazy // result. - idTables.emplace_back( - std::make_shared(IdTable{0, allocator()})); + idTables.emplace_back(std::make_shared(IdTable{0, allocator()})); auto generatedTables = lazyResult->idTables(); @@ -396,7 +395,10 @@ Result::LazyResult CartesianProductJoin::createLazyConsumer( producedTableSize = 0; auto& [idTable, localVocab] = idTableVocabPair; - idTables.back() = std::make_shared(std::move(idTable)); + // We know that the last id table is mutable (we inserted it ourselves), so + // we can replace it without paying the cost of creating and destroying a + // shared pointer. + const_cast(*idTables.back()) = std::move(idTable); if (idTables.back()->empty()) { return Result::IdTableLoopControl::makeContinue(); }