Skip to content

Commit 910e1ed

Browse files
authored
Fix the atomic_flag backport (#2474)
The backport of `std::atomic_flag` from #2431 was prone to a deadlock in the `wait` method. This was discovered by our Clang 17 debug-with-thread-sanitizer build, which failed or all recent commits because it was cancelled after six hours. The reason for the deadlock was an overly optimistic use of an `std::atomic<bool>` flag. This is now fixed by synchronizing access to that flag via a mutex. NOTE: In principle, this makes the implementation of the `atomic_flag` class slightly less efficient in C++17 mode. However, we only use that class for synchronizing relatively large batches of computation, so that we don't expect any significant performance degradation anywhere.
1 parent 78988ae commit 910e1ed

File tree

2 files changed

+31
-24
lines changed

2 files changed

+31
-24
lines changed

src/backports/atomic_flag.h

Lines changed: 26 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -16,16 +16,15 @@
1616
namespace ql::backports {
1717

1818
// C++17 backport of C++20's `std::atomic_flag` with wait/notify functionality.
19-
// This implementation uses std::atomic<bool>, std::mutex, and
20-
// std::condition_variable to provide the same interface as C++20's
19+
// This implementation uses `std::mutex`, and
20+
// `std::condition_variable` to provide the same interface as C++20's
2121
// std::atomic_flag including wait(), notify_one(), and notify_all().
22-
// Note: Same as `std::atomic_flag`, this implementation is prone to the ABA
23-
// problem, i.e. if the value of the flag is changed, and then immediately
24-
// changed back, threads waiting for that change might miss it. This allows to
25-
// implement many operations without acquiring a lock.
22+
// Note: This implementation is less efficient than the C++20 implementation,
23+
// which is implemented using low-level compiler and OS intrinsics, which are
24+
// more efficient than using a mutex.
2625
class atomic_flag {
2726
private:
28-
std::atomic<bool> flag_{false};
27+
bool flag_{false};
2928
mutable std::mutex mutex_;
3029
mutable std::condition_variable cv_;
3130

@@ -42,37 +41,41 @@ class atomic_flag {
4241
atomic_flag& operator=(atomic_flag&&) = delete;
4342

4443
// Clear the flag (set to false)
45-
void clear(std::memory_order order = std::memory_order_seq_cst) noexcept {
46-
flag_.store(false, order);
44+
void clear([[maybe_unused]] std::memory_order order =
45+
std::memory_order_seq_cst) noexcept {
46+
{
47+
std::lock_guard lock{mutex_};
48+
flag_ = false;
49+
}
4750
// Notify waiters that the flag has changed
4851
notify_all();
4952
}
5053

5154
// Test and set the flag (returns previous value)
52-
bool test_and_set(
53-
std::memory_order order = std::memory_order_seq_cst) noexcept {
54-
bool result = flag_.exchange(true, order);
55-
// Notify waiters that the flag has changed
55+
bool test_and_set([[maybe_unused]] std::memory_order order =
56+
std::memory_order_seq_cst) noexcept {
57+
bool result = [this]
58+
59+
{
60+
std::lock_guard lock{mutex_};
61+
return std::exchange(flag_, true);
62+
}();
5663
notify_all();
5764
return result;
5865
}
5966

6067
// Test the flag without modifying it (C++20 feature)
61-
bool test(
62-
std::memory_order order = std::memory_order_seq_cst) const noexcept {
63-
return flag_.load(order);
68+
bool test([[maybe_unused]] std::memory_order order =
69+
std::memory_order_seq_cst) const noexcept {
70+
std::lock_guard lock{mutex_};
71+
return flag_;
6472
}
6573

6674
// Wait for the flag to become different from the given value
67-
void wait(bool old, std::memory_order order =
75+
void wait(bool old, [[maybe_unused]] std::memory_order order =
6876
std::memory_order_seq_cst) const noexcept {
69-
// Fast path: check without locking first
70-
if (flag_.load(order) != old) {
71-
return;
72-
}
73-
// Slow path: lock and wait
7477
std::unique_lock<std::mutex> lock(mutex_);
75-
cv_.wait(lock, [this, old, order]() { return flag_.load(order) != old; });
78+
cv_.wait(lock, [this, old]() { return flag_ != old; });
7679
}
7780

7881
// Notify one waiting thread

src/util/Generators.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,6 @@ InputRangeTypeErased<T> generatorFromActionWithCallback(F functionWithCallback)
8989
requires ql::concepts::invocable<F, std::function<void(T)>> {
9090
class CallbackToRangeAdapter : public InputRangeFromGet<T> {
9191
F functionWithCallback_;
92-
ad_utility::JThread thread_;
9392
std::mutex mutex_;
9493
std::condition_variable cv_;
9594
// Only one of the threads can run at the same time. The semantics are
@@ -106,6 +105,11 @@ InputRangeTypeErased<T> generatorFromActionWithCallback(F functionWithCallback)
106105
// encountered an exception, which is then rethrown by the outer generator.
107106
std::variant<std::monostate, T, std::exception_ptr> storage_;
108107

108+
// IMPORTANT NOTE: It is crucial that this `thread_` is the last data member
109+
// of this class, as it references several of the above members, and thus
110+
// has to be joined/destroyed before these other members are destroyed!!!
111+
ad_utility::JThread thread_;
112+
109113
public:
110114
explicit CallbackToRangeAdapter(F functionWithCallback)
111115
: functionWithCallback_(std::move(functionWithCallback)) {}

0 commit comments

Comments
 (0)