Skip to content

Commit 960ddf4

Browse files
authored
feat: fix lua memory deallocation and lua_mem_gc_threshold=0 disable lua force GC calls (#5587)
* fix: lua script memory free
1 parent 0800b78 commit 960ddf4

File tree

3 files changed

+93
-8
lines changed

3 files changed

+93
-8
lines changed

src/core/interpreter.cc

Lines changed: 22 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ ABSL_FLAG(LuaGcFlag, luagc, {},
5858

5959
ABSL_FLAG(uint64_t, lua_mem_gc_threshold, 10000000,
6060
"Specifies Lua interpreter's per thread memory limit in bytes after which the GC will be "
61-
"called forcefully.");
61+
"called forcefully. 0 value remove forced GC calls");
6262

6363
static bool AbslParseFlag(std::string_view in, LuaGcFlag* flag, std::string* err) {
6464
if (in.empty()) {
@@ -562,19 +562,24 @@ int RedisLogCommand(lua_State* lua) {
562562

563563
// See https://www.lua.org/manual/5.3/manual.html#lua_Alloc
564564
void* mimalloc_glue(void* ud, void* ptr, size_t osize, size_t nsize) {
565-
(void)ud;
565+
int64_t& used_bytes = *static_cast<int64_t*>(ud);
566+
566567
if (nsize == 0) {
567-
InterpreterManager::tl_stats().used_bytes -= mi_usable_size(ptr);
568+
used_bytes -= mi_usable_size(ptr);
568569
mi_free_size(ptr, osize);
569570
return nullptr;
570571
} else if (ptr == nullptr) {
571572
ptr = mi_malloc(nsize);
572-
InterpreterManager::tl_stats().used_bytes += mi_usable_size(ptr);
573+
used_bytes += mi_usable_size(ptr);
573574
return ptr;
574575
} else {
575-
InterpreterManager::tl_stats().used_bytes -= mi_usable_size(ptr);
576+
const auto old_size = mi_usable_size(ptr);
576577
ptr = mi_realloc(ptr, nsize);
577-
InterpreterManager::tl_stats().used_bytes += mi_usable_size(ptr);
578+
if (ptr) {
579+
used_bytes -= old_size;
580+
used_bytes += mi_usable_size(ptr);
581+
}
582+
578583
return ptr;
579584
}
580585
}
@@ -584,7 +589,9 @@ void* mimalloc_glue(void* ud, void* ptr, size_t osize, size_t nsize) {
584589
Interpreter::Interpreter() {
585590
InterpreterManager::tl_stats().interpreter_cnt++;
586591

587-
lua_ = lua_newstate(mimalloc_glue, nullptr);
592+
// interpreter can be runnned in different threads so we need to calculate
593+
// used memory via &used_bytes_ additional parameter
594+
lua_ = lua_newstate(mimalloc_glue, &used_bytes_);
588595
InitLua(lua_);
589596
void** ptr = static_cast<void**>(lua_getextraspace(lua_));
590597
*ptr = this;
@@ -1211,10 +1218,17 @@ void InterpreterManager::Return(Interpreter* ir) {
12111218
const uint64_t max_memory_usage = absl::GetFlag(FLAGS_lua_mem_gc_threshold);
12121219
using namespace chrono;
12131220
++tl_stats().interpreter_return;
1214-
if (tl_stats().used_bytes > max_memory_usage) {
1221+
tl_stats().used_bytes += ir->TakeUsedBytes();
1222+
if (max_memory_usage != 0 && tl_stats().used_bytes > max_memory_usage) {
12151223
++tl_stats().force_gc_calls;
12161224
auto before = steady_clock::now();
12171225
tl_stats().gc_freed_memory += ir->RunGC();
1226+
1227+
VLOG(2) << "stats_used_bytes: " << tl_stats().used_bytes
1228+
<< " lua_mem_gc_threshold: " << max_memory_usage
1229+
<< " force_gc_calls: " << tl_stats().force_gc_calls
1230+
<< " freed_mem: " << tl_stats().gc_freed_memory;
1231+
12181232
auto after = steady_clock::now();
12191233
tl_stats().gc_duration_ns += duration_cast<nanoseconds>(after - before).count();
12201234
}

src/core/interpreter.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,10 @@ class Interpreter {
8989
// Add function with sha and body to interpreter.
9090
AddResult AddFunction(std::string_view sha, std::string_view body, std::string* error);
9191

92+
int64_t TakeUsedBytes() {
93+
return std::exchange(used_bytes_, 0);
94+
}
95+
9296
bool Exists(std::string_view sha) const;
9397

9498
enum RunResult {
@@ -150,6 +154,7 @@ class Interpreter {
150154
unsigned cmd_depth_ = 0;
151155
RedisFunc redis_func_;
152156
std::string buffer_;
157+
int64_t used_bytes_ = 0;
153158
char name_buffer_[32]; // backing storage for cmd name
154159
};
155160

src/core/interpreter_test.cc

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@ extern "C" {
1313
#include <absl/strings/str_replace.h>
1414
#include <gmock/gmock.h>
1515

16+
#include <thread>
17+
1618
#include "base/gtest.h"
1719
#include "base/logging.h"
1820

@@ -526,4 +528,68 @@ TEST_F(InterpreterTest, LuaIntOverflow) {
526528
EXPECT_FALSE(Execute("EVAL \"struct.pack('>I2147483648', '10')\" 0"));
527529
}
528530

531+
TEST_F(InterpreterTest, LuaGcStatistic) {
532+
InterpreterManager im(1);
533+
auto* interpreter = im.Get();
534+
535+
std::string_view keys[] = {"key1", "key2", "key3", "key4", "key5", "key6", "key7"};
536+
interpreter->SetGlobalArray("KEYS", SliceSpan{keys});
537+
538+
auto cb = [](Interpreter::CallArgs ca) {
539+
auto* reply = ca.translator;
540+
reply->OnInt(1);
541+
};
542+
interpreter->SetRedisFunc(cb);
543+
// next script generate several big values and set them to the keys
544+
// after the script is finished, GM isn't called for all values and
545+
// in the most cases we have more than 300k allocated memory
546+
// that will be cleaned later in the separate thread
547+
std::string script = R"(
548+
for i = 1, 7 do
549+
local str = string.rep(i, 1024 * 100)
550+
redis.call('SET', KEYS[1], str .. str)
551+
end
552+
)";
553+
554+
char sha_buf[64];
555+
Interpreter::FuncSha1(script, sha_buf);
556+
string_view sha{sha_buf, std::strlen(sha_buf)};
557+
558+
string result;
559+
Interpreter::AddResult add_res = interpreter->AddFunction(sha, script, &result);
560+
EXPECT_EQ(Interpreter::ADD_OK, add_res);
561+
562+
// When script is executed in the most cases we see that not all memory was deallocated
563+
// immediately and can be deallocated later
564+
Interpreter::RunResult run_res = interpreter->RunFunction(sha, &error_);
565+
EXPECT_EQ(Interpreter::RUN_OK, run_res);
566+
567+
// check that after script is finished not the all memory was deallocated
568+
uint64_t used_bytes = InterpreterManager::tl_stats().used_bytes;
569+
EXPECT_GE(used_bytes, 0);
570+
571+
auto force_gc_calls = InterpreterManager::tl_stats().force_gc_calls;
572+
// we need return interpreter to update statistic
573+
// force_gc_calls shouldn't be called
574+
im.Return(interpreter);
575+
EXPECT_EQ(force_gc_calls, InterpreterManager::tl_stats().force_gc_calls);
576+
EXPECT_LE(used_bytes, InterpreterManager::tl_stats().used_bytes);
577+
578+
used_bytes = InterpreterManager::tl_stats().used_bytes;
579+
580+
// we get the same interpeter again to call GC in separate thread
581+
auto* new_interpreter = im.Get();
582+
EXPECT_EQ(interpreter, new_interpreter);
583+
584+
// check that even if memory is deallocated in separate thread our statistic is correct
585+
std::thread t([&] {
586+
interpreter->RunGC();
587+
EXPECT_EQ(InterpreterManager::tl_stats().used_bytes, 0);
588+
});
589+
t.join();
590+
591+
im.Return(interpreter);
592+
EXPECT_GE(used_bytes, InterpreterManager::tl_stats().used_bytes);
593+
}
594+
529595
} // namespace dfly

0 commit comments

Comments
 (0)