From 264779015c6ff0261c52fdbc644b48eef33fa4a1 Mon Sep 17 00:00:00 2001 From: Hannah Bast Date: Fri, 8 Nov 2024 01:18:50 +0100 Subject: [PATCH] Review Hannah, some more changes + two more TODOs --- src/engine/QueryExecutionContext.h | 16 +++++----- src/engine/QueryExecutionTree.cpp | 9 +++--- src/engine/QueryExecutionTree.h | 8 +++-- src/index/DeltaTriples.cpp | 13 ++++---- src/index/DeltaTriples.h | 32 ++++++++----------- src/index/Permutation.h | 18 ++++++----- test/DeltaTriplesTest.cpp | 49 ++++++++++++++++++++---------- 7 files changed, 79 insertions(+), 66 deletions(-) diff --git a/src/engine/QueryExecutionContext.h b/src/engine/QueryExecutionContext.h index c3b1d5ee2c..76bfe1181e 100644 --- a/src/engine/QueryExecutionContext.h +++ b/src/engine/QueryExecutionContext.h @@ -1,8 +1,7 @@ -// Copyright 2011, University of Freiburg, -// Chair of Algorithms and Data Structures. -// Author: -// 2011-2017 Björn Buchhold (buchhold@informatik.uni-freiburg.de) -// 2018- Johannes Kalmbach (kalmbach@informatik.uni-freiburg.de) +// Copyright 2011 - 2024, University of Freiburg +// Chair of Algorithms and Data Structures +// Authors: Björn Buchhold [2011 - 2017] +// Johannes Kalmbach [2017 - 2024] #pragma once @@ -127,9 +126,10 @@ class QueryExecutionContext { private: const Index& _index; - // When the `QueryExecutionContext` is constructed, get a stable snapshot of - // the current UPDATE status from the `DeltaTriplesManager`, which can then by - // used by the query without interfering with concurrent UPDATEs. + // When the `QueryExecutionContext` is constructed, get a stable read-only + // snapshot of the current (located) delta triples. These can then be used + // by the respective query without interfering with further incoming + // update operations. SharedLocatedTriplesSnapshot sharedLocatedTriplesSnapshot{ _index.deltaTriplesManager().getCurrentSnapshot()}; QueryResultCache* const _subtreeCache; diff --git a/src/engine/QueryExecutionTree.cpp b/src/engine/QueryExecutionTree.cpp index aed58d4fde..2b2c393928 100644 --- a/src/engine/QueryExecutionTree.cpp +++ b/src/engine/QueryExecutionTree.cpp @@ -1,8 +1,7 @@ -// Copyright 2015, University of Freiburg, -// Chair of Algorithms and Data Structures. -// Author: -// 2015-2017 Björn Buchhold (buchhold@informatik.uni-freiburg.de) -// 2018- Johannes Kalmbach (kalmbach@informatik.uni-freiburg.de) +// Copyright 2015 - 2024, University of Freiburg +// Chair of Algorithms and Data Structures +// Authors: Björn Buchhold [2015 - 2017] +// Johannes Kalmbach [2017 - 2024] #include "./QueryExecutionTree.h" diff --git a/src/engine/QueryExecutionTree.h b/src/engine/QueryExecutionTree.h index 0519082b78..6a4b63c712 100644 --- a/src/engine/QueryExecutionTree.h +++ b/src/engine/QueryExecutionTree.h @@ -1,6 +1,8 @@ -// Copyright 2015, University of Freiburg, -// Chair of Algorithms and Data Structures. -// Author: Björn Buchhold (buchhold@informatik.uni-freiburg.de) +// Copyright 2015 - 2024, University of Freiburg +// Chair of Algorithms and Data Structures +// Authors: Björn Buchhold +// Johannes Kalmbach + #pragma once #include diff --git a/src/index/DeltaTriples.cpp b/src/index/DeltaTriples.cpp index de8a474602..080b4cf89d 100644 --- a/src/index/DeltaTriples.cpp +++ b/src/index/DeltaTriples.cpp @@ -1,8 +1,8 @@ // Copyright 2023 - 2024, University of Freiburg -// Chair of Algorithms and Data Structures. -// Authors: -// 2023 Hannah Bast -// 2024 Julian Mundhahs +// Chair of Algorithms and Data Structures +// Authors: Hannah Bast +// Julian Mundhahs +// Johannes Kalmbach #include "index/DeltaTriples.h" @@ -179,8 +179,9 @@ LocatedTriplesSnapshot::getLocatedTriplesForPermutation( // ____________________________________________________________________________ SharedLocatedTriplesSnapshot DeltaTriples::getSnapshot() const { - // Note: Semantically, both the members are copied, but the `localVocab_` has - // no explicit copy constructor, hence the explicit `clone`. + // NOTE: Both members of the `LocatedTriplesSnapshot` are copied, but the + // `localVocab_` has no copy constructor (in order to avoid accidental + // copies), hence the explicit `clone`. return SharedLocatedTriplesSnapshot{std::make_shared( locatedTriples(), localVocab_.clone())}; } diff --git a/src/index/DeltaTriples.h b/src/index/DeltaTriples.h index 10323ebfd2..7edac3a0a5 100644 --- a/src/index/DeltaTriples.h +++ b/src/index/DeltaTriples.h @@ -1,8 +1,8 @@ // Copyright 2023 - 2024, University of Freiburg -// Chair of Algorithms and Data Structures. -// Authors: -// 2023 Hannah Bast -// 2024 Julian Mundhahs +// Chair of Algorithms and Data Structures +// Authors: Hannah Bast +// Julian Mundhahs +// Johannes Kalmbach #pragma once @@ -31,8 +31,8 @@ struct LocatedTriplesSnapshot { Permutation::Enum permutation) const; }; -// A `shared_ptr` to a const `LocatedTriplesSnapshot`, but as an explicit class, -// s.t. it can be forward-declared. +// A shared pointer to a constant `LocatedTriplesSnapshot`, but as an explicit +// class, such that it can be forward-declared. class SharedLocatedTriplesSnapshot : public std::shared_ptr {}; @@ -194,21 +194,13 @@ class DeltaTriplesManager { explicit DeltaTriplesManager(const IndexImpl& index); FRIEND_TEST(DeltaTriplesTest, DeltaTriplesManager); - // Modify the underlying `DeltaTriples` by applying the `function` to them. - // Then update the current snapshot, s.t. subsequent calls to - // `getCurrentSnapshot` will observe the modifications. All this is done in a - // thread-safe way, meaning that there can be only one call to `modify` at the - // same time. + // Modify the underlying `DeltaTriples` by applying `function` and then update + // the current snapshot. Concurrent calls to `modify` will be serialized, and + // each call to `getCurrentSnapshot` will either return the snapshot before or + // after a modification, but never one of an ongoing modification. void modify(std::function function); - // Return a `SharedLocatedTriplesSnapshot` that contains a deep copy of the - // state of the underlying `DeltaTriples` after the last completed UPDATE, and - // thus is not affected by future UPDATE requests. It can therefore be used to - // execute a query in a consistent way. + // Return a shared pointer to a deep copy of the current snapshot. This can + // be safely used to execute a query without interfering with future updates. SharedLocatedTriplesSnapshot getCurrentSnapshot() const; }; - -// DELTA TRIPLES AND THE CACHE -// -// Changes to the DeltaTriples invalidate all cache results that have an index -// scan in their subtree, which is almost all entries in practice. diff --git a/src/index/Permutation.h b/src/index/Permutation.h index 0044ae4dc9..118153708b 100644 --- a/src/index/Permutation.h +++ b/src/index/Permutation.h @@ -1,6 +1,7 @@ -// Copyright 2018, University of Freiburg, -// Chair of Algorithms and Data Structures. -// Author: Johannes Kalmbach (johannes.kalmbach@gmail.com) +// Copyright 2018 - 2024, University of Freiburg +// Chair of Algorithms and Data Structures +// Author: Johannes Kalmbach + #pragma once #include @@ -148,20 +149,21 @@ class Permutation { const Permutation& getActualPermutation(const ScanSpecification& spec) const; const Permutation& getActualPermutation(Id id) const; + // From the given snapshot, get the located triples for this permutation. const LocatedTriplesPerBlock& getLocatedTriplesForPermutation( const LocatedTriplesSnapshot& locatedTriplesSnapshot) const; const CompressedRelationReader& reader() const { return reader_.value(); } private: - // for Log output, e.g. "POS" + // Readable name for this permutation, e.g., `POS`. std::string readableName_; - // e.g. ".pos" + // File name suffix for this permutation, e.g., `.pos`. std::string fileSuffix_; - // order of the 3 keys S(0), P(1), and O(2) for which this permutation is - // sorted, for example {1, 0, 2} for PSO. + // The order of the three components (S=0, P=1, O=2) in this permutation, + // e.g., `{1, 0, 2}` for `PSO`. array keyOrder_; - + // The metadata for this permutation. MetaData meta_; // This member is `optional` because we initialize it in a deferred way in the diff --git a/test/DeltaTriplesTest.cpp b/test/DeltaTriplesTest.cpp index 48476e2841..f442185cef 100644 --- a/test/DeltaTriplesTest.cpp +++ b/test/DeltaTriplesTest.cpp @@ -322,23 +322,25 @@ TEST_F(DeltaTriplesTest, rewriteLocalVocabEntriesAndBlankNodes) { // _____________________________________________________________________________ TEST_F(DeltaTriplesTest, DeltaTriplesManager) { + // Preparation. DeltaTriplesManager deltaTriplesManager(testQec->getIndex().getImpl()); auto& vocab = testQec->getIndex().getVocab(); auto cancellationHandle = std::make_shared>(); - std::vector threads; static constexpr size_t numThreads = 18; static constexpr size_t numIterations = 21; - // The following lambda inserts and deletes some triples and checks the state - // of the `DeltaTriples` for consistency. + + // Insert and delete a well-defined set of triples, some independent and some + // dependent on the thread index. Check that the snapshot before in the + // middle of these updates is as expected. auto insertAndDelete = [&](size_t threadIdx) { LocalVocab localVocab; SharedLocatedTriplesSnapshot beforeUpdate = deltaTriplesManager.getCurrentSnapshot(); for (size_t i = 0; i < numIterations; ++i) { - // The first triple in both vectors is shared between all threads, the - // other triples are exclusive to this thread via the `threadIdx`. + // The first triple in both vectors is the same for all threads, the + // others are exclusive to this thread via the `threadIdx`. auto triplesToInsert = makeIdTriples( vocab, localVocab, {" ", absl::StrCat(" "), @@ -347,25 +349,26 @@ TEST_F(DeltaTriplesTest, DeltaTriplesManager) { vocab, localVocab, {" ", absl::StrCat(" "), absl::StrCat(" ")}); - + // Insert the `triplesToInsert`. deltaTriplesManager.modify([&](DeltaTriples& deltaTriples) { deltaTriples.insertTriples(cancellationHandle, triplesToInsert); }); - - // We have successfully complete an update, so we expect the snapshot - // pointer to change. + // We should have successfully completed an update, so the snapshot + // pointer should have changed. EXPECT_NE(beforeUpdate, deltaTriplesManager.getCurrentSnapshot()); - + // Delete the `triplesToDelete`. deltaTriplesManager.modify([&](DeltaTriples& deltaTriples) { deltaTriples.deleteTriples(cancellationHandle, triplesToDelete); }); + // Make some checks in the middle of these updates (while the other + // threads are likely to be in the middle of their updates as well). if (i == numIterations / 2) { { - // Before the first iteration, none of the thread-exclusive triples - // are contained in the snapshot returned by the - // `locatedTriplesSnapshot_`. As the snapshot is persistent over time, - // this doesn't change in further iterations. + // None of the thread-exclusive triples should be contained in the + // original snapshot and this should not change over time. The + // Boolean argument specifies whether the triple was inserted (`true`) + // or deleted (`false`). const auto& locatedSPO = beforeUpdate->getLocatedTriplesForPermutation(Permutation::SPO); EXPECT_FALSE(locatedSPO.containsTriple(triplesToInsert.at(1), true)); @@ -376,6 +379,11 @@ TEST_F(DeltaTriplesTest, DeltaTriplesManager) { EXPECT_FALSE(locatedSPO.containsTriple(triplesToDelete.at(2), false)); } { + // Check for several of the thread-exclusive triples that they are + // properly contained in the current snapshot. + // + // TODO(Hannah): I don't understand the `false` for the second + // `containsTriple`. auto p = deltaTriplesManager.getCurrentSnapshot(); const auto& locatedSPO = p->getLocatedTriplesForPermutation(Permutation::SPO); @@ -386,17 +394,26 @@ TEST_F(DeltaTriplesTest, DeltaTriplesManager) { } } }; - // Run the above lambda in multiple threads to detect + + // Run the above for each of `numThreads` threads, where each thread knows + // its index (used to create the thread-exclusive triples). for (size_t i = 0; i < numThreads; ++i) { threads.emplace_back(insertAndDelete, i); } threads.clear(); - // If there are no updates, then the snapshot pointer doesn't change. + // Check that without updates, the snapshot pointer does not change. auto p1 = deltaTriplesManager.getCurrentSnapshot(); auto p2 = deltaTriplesManager.getCurrentSnapshot(); EXPECT_EQ(p1, p2); + // Each of the threads above inserts on thread-exclusive triple, deletes one + // thread-exclusive triple and inserts one thread-exclusive triple that is + // deleted right after. Additionally, there is one common triple inserted by + // all the threads and one common triple that is deleted by all the threads. + // + // TODO(Hannah): I don't understand why the number of thread-exclusive delted + // triples is twice that of the thread-exclusive inserted triples. auto deltaImpl = deltaTriplesManager.deltaTriples_.rlock(); EXPECT_THAT(*deltaImpl, NumTriples(numThreads + 1, 2 * numThreads + 1, 3 * numThreads + 2));