Skip to content

Commit 9b3c115

Browse files
committed
json: Adds ref-counted string
The string class wraps the previously added blob class. It decides at creation time whether to create a new blob, or bump up the refcount of an existing blob in the pool. The pool is thread local, and is used by the strings on that thread to store pointers to allocated blobs. When a string is destroyed it decrements the ref-count, dropping the blob if necessary (ie refcount == 0). A lot of boilerplate in this class is from trial and error, adding methods that jsoncons required to be able to use this class as a key. Signed-off-by: Abhijat Malviya <[email protected]>
1 parent 8f93e94 commit 9b3c115

File tree

4 files changed

+347
-3
lines changed

4 files changed

+347
-3
lines changed

src/core/json/CMakeLists.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,8 @@ cur_gen_dir(gen_dir)
66
add_library(jsonpath lexer_impl.cc driver.cc path.cc
77
${gen_dir}/jsonpath_lexer.cc ${gen_dir}/jsonpath_grammar.cc json_object.cc
88
detail/jsoncons_dfs.cc detail/flat_dfs.cc
9-
detail/interned_blob.cc)
9+
detail/interned_blob.cc
10+
detail/interned_string.cc)
1011
target_link_libraries(jsonpath base absl::strings TRDP::reflex TRDP::jsoncons TRDP::flatbuffers dfly_page_usage)
1112

1213
helio_cxx_test(jsonpath_test jsonpath LABELS DFLY)
Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
// Copyright 2025, DragonflyDB authors. All rights reserved.
2+
// See LICENSE for licensing terms.
3+
4+
#include "core/json/detail/interned_string.h"
5+
6+
namespace dfly::detail {
7+
8+
InternedString& InternedString::operator=(const InternedString& other) {
9+
if (this != &other) {
10+
Release();
11+
entry_ = other.entry_;
12+
Acquire();
13+
}
14+
return *this;
15+
}
16+
17+
InternedString& InternedString::operator=(InternedString&& other) noexcept {
18+
if (this != &other) {
19+
Release();
20+
entry_ = other.entry_;
21+
other.entry_ = {};
22+
}
23+
return *this;
24+
}
25+
26+
int InternedString::compare(const InternedString& other) const {
27+
return std::string_view{*this}.compare(other);
28+
}
29+
30+
int InternedString::compare(std::string_view other) const {
31+
return std::string_view{*this}.compare(other);
32+
}
33+
34+
void InternedString::ResetPool() {
35+
InternedBlobPool& pool = GetPoolRef();
36+
for (InternedBlobHandle handle : pool) {
37+
InternedBlobHandle::Destroy(handle);
38+
}
39+
pool.clear();
40+
}
41+
42+
InternedBlobHandle InternedString::Intern(const std::string_view sv) {
43+
if (sv.empty())
44+
return {};
45+
46+
InternedBlobPool& pool_ref = GetPoolRef();
47+
if (const auto it = pool_ref.find(sv); it != pool_ref.end()) {
48+
InternedBlobHandle blob = *it;
49+
blob.IncrRefCount();
50+
return blob;
51+
}
52+
53+
InternedBlobHandle handle = InternedBlobHandle::Create(sv);
54+
pool_ref.emplace(handle);
55+
return handle;
56+
}
57+
58+
void InternedString::Acquire() { // NOLINT
59+
if (entry_) {
60+
entry_.IncrRefCount();
61+
}
62+
}
63+
64+
void InternedString::Release() {
65+
if (!entry_)
66+
return;
67+
68+
entry_.DecrRefCount();
69+
70+
if (entry_.RefCount() == 0) {
71+
GetPoolRef().erase(entry_);
72+
InternedBlobHandle::Destroy(entry_);
73+
}
74+
}
75+
76+
InternedBlobPool& InternedString::GetPoolRef() {
77+
// Note on lifetimes: this pool is thread local and depends on the thread local memory resource
78+
// defined in the stateless allocator in src/core/detail/stateless_allocator.h. Since there is no
79+
// well-defined order of destruction, this pool must be manually reset before the memory resource
80+
// destruction.
81+
thread_local InternedBlobPool pool;
82+
return pool;
83+
}
84+
85+
} // namespace dfly::detail
Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,127 @@
1+
// Copyright 2025, DragonflyDB authors. All rights reserved.
2+
// See LICENSE for licensing terms.
3+
4+
#pragma once
5+
6+
#include "core/detail/stateless_allocator.h"
7+
#include "core/json/detail/interned_blob.h"
8+
9+
namespace dfly::detail {
10+
11+
// InternedString handles incrementing and decrementing reference counts of the blobs tied to its
12+
// own lifecycle. It deletes the blob from a shard local pool when refcount is 0.
13+
// TODO examine cross shard json object interactions. Can a pool end up access from another shard?
14+
class InternedString {
15+
public:
16+
using allocator_type = StatelessAllocator<char>;
17+
18+
InternedString() = default;
19+
20+
explicit InternedString(const std::string_view sv) : entry_(Intern(sv)) {
21+
}
22+
23+
// The following constructors and members are added because they are required by jsoncons for
24+
// keys. Each of these is added in response to compiler errors and should not be removed, even if
25+
// they are seemingly a no-op or duplicated.
26+
27+
// jsoncons sometimes creates empty obj with custom allocator. If it creates an object with any
28+
// other allocator, we should fail during compilation.
29+
template <typename T> explicit InternedString(StatelessAllocator<T> /*unused*/) {
30+
}
31+
32+
template <typename Alloc> InternedString(const char* data, size_t size, Alloc alloc);
33+
34+
template <std::contiguous_iterator It> InternedString(It begin, It end);
35+
36+
InternedString(const InternedString& other) : entry_{other.entry_} {
37+
Acquire();
38+
}
39+
40+
InternedString(InternedString&& other) noexcept : entry_{other.entry_} {
41+
other.entry_ = {};
42+
}
43+
44+
InternedString& operator=(const InternedString& other);
45+
InternedString& operator=(InternedString&& other) noexcept;
46+
47+
~InternedString() {
48+
Release();
49+
}
50+
51+
operator std::string_view() const {
52+
return entry_;
53+
}
54+
55+
[[nodiscard]] const char* data() const {
56+
return entry_ ? entry_.Data() : "";
57+
}
58+
59+
[[nodiscard]] const char* c_str() const {
60+
return data();
61+
}
62+
63+
void swap(InternedString& other) noexcept {
64+
std::swap(entry_, other.entry_);
65+
}
66+
67+
[[nodiscard]] size_t length() const {
68+
return size();
69+
}
70+
71+
[[nodiscard]] size_t size() const {
72+
return entry_.Size();
73+
}
74+
75+
[[nodiscard]] int compare(const InternedString& other) const;
76+
[[nodiscard]] int compare(std::string_view) const;
77+
78+
// lex. comparison
79+
auto operator<=>(const InternedString& other) const {
80+
return std::string_view{*this} <=> std::string_view{other};
81+
}
82+
83+
bool operator==(const InternedString& other) const = default;
84+
85+
void shrink_to_fit() { // NOLINT (must be non-const to align with jsoncons usage)
86+
}
87+
88+
// Destroys all strings in the pool. Must be called on process shutdown before the backing memory
89+
// resource is destroyed.
90+
static void ResetPool();
91+
static InternedBlobPool& GetPoolRef();
92+
93+
[[nodiscard]] size_t MemUsed() const {
94+
return entry_ ? entry_.MemUsed() : 0;
95+
}
96+
97+
private:
98+
// If a string exists in the pool, increments its refcount. If not, adds the string to the pool.
99+
// Returns a handle wrapping the string.
100+
static InternedBlobHandle Intern(std::string_view sv);
101+
102+
// Increments the refcount if the entry is not null
103+
void Acquire();
104+
105+
// Decrements the refcount, removes entry from the pool if necessary, destroying the interned blob
106+
void Release();
107+
108+
// Wraps a null pointer by default
109+
InternedBlobHandle entry_;
110+
};
111+
112+
template <typename Alloc>
113+
InternedString::InternedString(const char* data, size_t size, Alloc /*unused*/)
114+
: InternedString(std::string_view{data, size}) {
115+
}
116+
117+
template <std::contiguous_iterator It> InternedString::InternedString(It begin, It end) {
118+
if (begin == end) {
119+
return;
120+
}
121+
122+
const auto size = std::distance(begin, end);
123+
const auto data_ptr = &*begin;
124+
entry_ = Intern(std::string_view(data_ptr, size));
125+
}
126+
127+
} // namespace dfly::detail

src/core/json/interned_blob_test.cc

Lines changed: 133 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,9 @@
11
// Copyright 2025, DragonflyDB authors. All rights reserved.
22
// See LICENSE for licensing terms.
33

4-
#include "core/json/detail/interned_blob.h"
5-
64
#include "base/gtest.h"
75
#include "core/detail/stateless_allocator.h"
6+
#include "core/json/detail/interned_string.h"
87
#include "core/mi_memory_resource.h"
98

109
using namespace std::literals;
@@ -99,3 +98,135 @@ TEST_F(InternedBlobTest, Pool) {
9998
EXPECT_TRUE(pool.contains("foo"));
10099
InternedBlobHandle::Destroy(b1);
101100
}
101+
102+
using detail::InternedString;
103+
104+
namespace {
105+
106+
void StringCheck(const InternedString& s, const char* ptr) {
107+
std::string_view sv{ptr};
108+
109+
EXPECT_STREQ(s.data(), ptr);
110+
EXPECT_STREQ(s.c_str(), ptr);
111+
112+
EXPECT_EQ(s.size(), sv.size());
113+
EXPECT_EQ(s.length(), sv.size());
114+
115+
EXPECT_EQ(std::string_view(s), sv);
116+
EXPECT_EQ(std::string_view(s.data(), s.size()), sv);
117+
EXPECT_EQ(std::string_view(s.c_str(), s.size()), sv);
118+
}
119+
120+
} // namespace
121+
122+
TEST_F(InternedBlobTest, StringPool) {
123+
const auto& pool = InternedString::GetPoolRef();
124+
EXPECT_TRUE(pool.empty());
125+
{
126+
const InternedString s1{"foobar"};
127+
StringCheck(s1, "foobar");
128+
EXPECT_EQ(pool.size(), 1);
129+
{
130+
const InternedString s2{"foobar"};
131+
StringCheck(s2, "foobar");
132+
EXPECT_EQ(pool.size(), 1);
133+
}
134+
EXPECT_EQ(pool.size(), 1);
135+
}
136+
EXPECT_TRUE(pool.empty());
137+
138+
std::vector<InternedString> strings;
139+
for (auto i = 0; i < 1000; ++i) {
140+
strings.emplace_back(std::to_string(i));
141+
}
142+
143+
EXPECT_EQ(pool.size(), 1000);
144+
strings.clear();
145+
EXPECT_TRUE(pool.empty());
146+
147+
for (auto i = 0; i < 1000; ++i) {
148+
strings.emplace_back("zyx");
149+
}
150+
EXPECT_EQ(pool.size(), 1);
151+
strings.clear();
152+
EXPECT_TRUE(pool.empty());
153+
154+
InternedString empty;
155+
EXPECT_TRUE(pool.empty());
156+
}
157+
158+
TEST_F(InternedBlobTest, StringApi) {
159+
InternedString s1{"foobar"};
160+
EXPECT_EQ(std::string_view{s1}, "foobar"sv);
161+
StringCheck(s1, "foobar");
162+
163+
const auto& pool = InternedString::GetPoolRef();
164+
InternedString s2{"psi"};
165+
StringCheck(s2, "psi");
166+
167+
EXPECT_EQ(pool.size(), 2);
168+
169+
// swap pointers into the pool
170+
s1.swap(s2);
171+
172+
EXPECT_EQ(pool.size(), 2);
173+
174+
StringCheck(s1, "psi");
175+
StringCheck(s2, "foobar");
176+
177+
EXPECT_NE(s1, s2);
178+
EXPECT_EQ(s1, s1);
179+
// foobar < psi lexicographically
180+
EXPECT_LT(s2, s1);
181+
}
182+
183+
TEST_F(InternedBlobTest, StringCtors) {
184+
const auto& pool = InternedString::GetPoolRef();
185+
InternedString s1{"foobar"};
186+
EXPECT_EQ(pool.size(), 1);
187+
188+
// move ctor
189+
auto to = std::move(s1);
190+
EXPECT_EQ(pool.size(), 1);
191+
192+
StringCheck(to, "foobar");
193+
StringCheck(s1, "");
194+
195+
// These tests exercise self-move and self-copy behavior. This causes errors on newer GCC when
196+
// warnings are treated as errors (on CI). We need to version gate this because on older GCC this
197+
// check is not present.
198+
#if defined(__GNUC__) && !defined(__clang__) && __GNUC__ >= 13
199+
#pragma GCC diagnostic push
200+
#pragma GCC diagnostic ignored "-Wself-move"
201+
#endif
202+
to = std::move(to);
203+
StringCheck(to, "foobar");
204+
205+
auto copied = to;
206+
EXPECT_EQ(pool.size(), 1);
207+
208+
StringCheck(to, "foobar");
209+
StringCheck(copied, "foobar");
210+
211+
copied = copied;
212+
#if defined(__GNUC__) && !defined(__clang__) && __GNUC__ >= 13
213+
#pragma GCC diagnostic pop
214+
#endif
215+
StringCheck(copied, "foobar");
216+
EXPECT_EQ(pool.size(), 1);
217+
218+
const auto* mr = MemoryResource();
219+
const auto before = mr->used();
220+
221+
std::string_view sv{"......."};
222+
// ptr and size with some allocator, allocator will be ignored
223+
InternedString x{sv.data(), sv.size(), std::allocator<char>{}};
224+
StringCheck(x, ".......");
225+
EXPECT_EQ(pool.size(), 2);
226+
227+
EXPECT_GE(mr->used(), before + x.MemUsed());
228+
229+
InternedString k{sv.begin(), sv.end()};
230+
StringCheck(k, ".......");
231+
EXPECT_EQ(pool.size(), 2);
232+
}

0 commit comments

Comments
 (0)