Skip to content

Commit cca3768

Browse files
hannahbastHannah Bast
authored andcommitted
Fix dangling reference in CartesianProductJoin (#2684)
A `CartesianProductJoin` materializes its inputs, except for the one with the largest size estimate, which is consumed lazily if possible. The materialized inputs are stored in the cache when there is enough space available. Because of a regression introduced in #2307, it could happen that the materialized input was evicted from the cache while the `CartesianProductJoin` still held a reference to it, leading to a dangling reference and undefined behavior. This is now fixed by keeping the materialized inputs alive for the duration of the `CartesianProductJoin`. Also add a corresponding regression test. In particular, fixes #2683 NOTE: Results being kept alive while already evicted from the cache means that we use more memory than intended. However, that is a more general problem that is not specific to this case and should be addressed separately.
1 parent f4301d5 commit cca3768

File tree

3 files changed

+89
-12
lines changed

3 files changed

+89
-12
lines changed

src/engine/CartesianProductJoin.cpp

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,12 @@
1-
// Copyright 2023, University of Freiburg,
2-
// Chair of Algorithms and Data Structures.
3-
// Author: Johannes Kalmbach <kalmbach@cs.uni-freiburg.de>
1+
// Copyright 2024 - 2026 The QLever Authors, in particular:
2+
//
3+
// 2023 - 2025 Johannes Kalmbach <kalmbach@cs.uni-freiburg.de>, UFR
4+
// 2026 Hannah Bast <bast@cs.uni-freiburg.de>, UFR
5+
//
6+
// UFR = University of Freiburg, Chair of Algorithms and Data Structures
7+
8+
// You may not use this file except in compliance with the Apache 2.0 License,
9+
// which can be found in the `LICENSE` file at the root of the QLever project.
410

511
#include "engine/CartesianProductJoin.h"
612

@@ -360,7 +366,7 @@ CPP_template_def(typename R)(requires ql::ranges::range<R>) Result::LazyResult
360366
// _____________________________________________________________________________
361367
Result::LazyResult CartesianProductJoin::createLazyConsumer(
362368
LocalVocab staticMergedVocab,
363-
ql::span<const std::shared_ptr<const Result>> subresults,
369+
std::vector<std::shared_ptr<const Result>> subresults,
364370
std::shared_ptr<const Result> lazyResult) const {
365371
AD_CONTRACT_CHECK(lazyResult);
366372
std::vector<std::reference_wrapper<const IdTable>> idTables;
@@ -371,7 +377,8 @@ Result::LazyResult CartesianProductJoin::createLazyConsumer(
371377
auto get = [self = this, staticMergedVocab = std::move(staticMergedVocab),
372378
limit = getLimitOffset().limitOrDefault(),
373379
offset = getLimitOffset()._offset, idTables = std::move(idTables),
374-
lastTableOffset = size_t{0}, producedTableSize = size_t{0},
380+
subresults = std::move(subresults), lastTableOffset = size_t{0},
381+
producedTableSize = size_t{0},
375382
idTableOpt = std::optional<Result::IdTableVocabPair>{}](
376383
auto& idTableVocabPair) mutable {
377384
// These things have to be done after handling a single input, so we do them

src/engine/CartesianProductJoin.h

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,12 @@
1-
// Copyright 2023, University of Freiburg,
2-
// Chair of Algorithms and Data Structures.
3-
// Author: Johannes Kalmbach <kalmbach@cs.uni-freiburg.de>
1+
// Copyright 2024 - 2026 The QLever Authors, in particular:
2+
//
3+
// 2023 - 2025 Johannes Kalmbach <kalmbach@cs.uni-freiburg.de>, UFR
4+
// 2026 Hannah Bast <bast@cs.uni-freiburg.de>, UFR
5+
//
6+
// UFR = University of Freiburg, Chair of Algorithms and Data Structures
7+
8+
// You may not use this file except in compliance with the Apache 2.0 License,
9+
// which can be found in the `LICENSE` file at the root of the QLever project.
410

511
#ifndef QLEVER_SRC_ENGINE_CARTESIANPRODUCTJOIN_H
612
#define QLEVER_SRC_ENGINE_CARTESIANPRODUCTJOIN_H
@@ -132,7 +138,7 @@ class CartesianProductJoin : public Operation {
132138
// Similar to `produceTablesLazily` but can handle a single lazy result.
133139
Result::LazyResult createLazyConsumer(
134140
LocalVocab staticMergedVocab,
135-
ql::span<const std::shared_ptr<const Result>> subresults,
141+
std::vector<std::shared_ptr<const Result>> subresults,
136142
std::shared_ptr<const Result> lazyResult) const;
137143
};
138144

test/engine/CartesianProductJoinTest.cpp

Lines changed: 67 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,12 @@
1-
// Copyright 2024, University of Freiburg,
2-
// Chair of Algorithms and Data Structures.
3-
// Author: Johannes Kalmbach <kalmbach@cs.uni-freiburg.de>
1+
// Copyright 2024 - 2026 The QLever Authors, in particular:
2+
//
3+
// 2024 Johannes Kalmbach <kalmbach@cs.uni-freiburg.de>, UFR
4+
// 2025 Hannah Bast <bast@cs.uni-freiburg.de>, UFR
5+
//
6+
// UFR = University of Freiburg, Chair of Algorithms and Data Structures
7+
8+
// You may not use this file except in compliance with the Apache 2.0 License,
9+
// which can be found in the `LICENSE` file at the root of the QLever project.
410

511
#include <gmock/gmock.h>
612

@@ -647,6 +653,64 @@ INSTANTIATE_TEST_SUITE_P(
647653
return std::move(stream).str();
648654
});
649655

656+
// Test that `CartesianProductJoin::createLazyConsumer` keeps the materialized
657+
// child results alive even after the cache evicts them.
658+
//
659+
// NOTE: This is part of https://github.com/ad-freiburg/qlever/pull/2684, which
660+
// fixes the regression from https://github.com/ad-freiburg/qlever/issues/2307,
661+
// where `createLazyConsumer` stored `reference_wrapper<const IdTable>` without
662+
// capturing the owning `shared_ptr<const Result>`, leading to use-after-free
663+
// when the cache evicted the child results under concurrent load.
664+
TEST(CartesianProductJoin, createLazyConsumerKeepsChildResultsAlive) {
665+
auto* qec = ad_utility::testing::getQec();
666+
using Vars = std::vector<std::optional<Variable>>;
667+
668+
// Clear the cache to avoid interference from other tests.
669+
qec->getQueryTreeCache().clearAll();
670+
671+
// Create two inputs for a `CartesianProductJoin`, a materialized input with
672+
// small size estimate, and a lazy input with larger size estimate.
673+
//
674+
// NOTE: The `CartesianProductJoin` sorts its children by size estimate,
675+
// and consumes only the child with the largest size estimate lazily.
676+
CartesianProductJoin::Children children;
677+
IdTable materializedInput = makeIdTableFromVector({{1}, {2}, {3}});
678+
children.push_back(ad_utility::makeExecutionTree<ValuesForTesting>(
679+
qec, materializedInput.clone(), Vars{Variable{"?a"}}));
680+
std::dynamic_pointer_cast<ValuesForTesting>(
681+
children.back()->getRootOperation())
682+
->sizeEstimate() = 1;
683+
std::vector<IdTable> lazyInput;
684+
lazyInput.push_back(makeIdTableFromVector({{10}, {11}}));
685+
lazyInput.push_back(makeIdTableFromVector({{12}, {13}}));
686+
children.push_back(ad_utility::makeExecutionTree<ValuesForTesting>(
687+
qec, std::move(lazyInput), Vars{Variable{"?b"}}));
688+
std::dynamic_pointer_cast<ValuesForTesting>(
689+
children.back()->getRootOperation())
690+
->sizeEstimate() = 100;
691+
692+
// Pre-populate the cache with the materialized input and keep a `weak_ptr`
693+
// to track its lifetime.
694+
auto childResult = children[0]->getRootOperation()->getResult(
695+
false, ComputationMode::FULLY_MATERIALIZED);
696+
std::weak_ptr<const Result> weakChildResult = childResult;
697+
childResult.reset();
698+
699+
// Now perform the Cartesian product join and get the lazy result.
700+
// Internally, `calculateSubResults` finds the materialized input in the
701+
// cache, and `createLazyConsumer` stores a `reference_wrapper` to the
702+
// `IdTable` of the materialized input.
703+
CartesianProductJoin join{qec, std::move(children)};
704+
auto result = join.computeResultOnlyForTesting(true);
705+
ASSERT_FALSE(result.isFullyMaterialized());
706+
707+
// Clear the cache and check that the materialized input has been
708+
// kept alive by the lazy consumer. In an earlier version of the code, any
709+
// reference to it would have been dangling at this point.
710+
qec->getQueryTreeCache().clearAll();
711+
ASSERT_FALSE(weakChildResult.expired());
712+
}
713+
650714
// _____________________________________________________________________________
651715
TEST(CartesianProductJoin, clone) {
652716
auto qec = getQec();

0 commit comments

Comments
 (0)