Skip to content

Commit 95119af

Browse files
authored
Make dbengine the default memory mode (netdata#6977)
* Basic functionality for dbengine stress test. * Fix coverity defects * Refactored dbengine stress test to be configurable * Added benchmark results and evaluation in dbengine documentation * Make dbengine the default memory mode
1 parent 06cdca8 commit 95119af

File tree

8 files changed

+376
-88
lines changed

8 files changed

+376
-88
lines changed

daemon/main.c

Lines changed: 29 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -306,7 +306,13 @@ int help(int exitcode) {
306306
" -W stacksize=N Set the stacksize (in bytes).\n\n"
307307
" -W debug_flags=N Set runtime tracing to debug.log.\n\n"
308308
" -W unittest Run internal unittests and exit.\n\n"
309+
#ifdef ENABLE_DBENGINE
309310
" -W createdataset=N Create a DB engine dataset of N seconds and exit.\n\n"
311+
" -W stresstest=A,B,C,D,E Run a DB engine stress test for A seconds,\n"
312+
" with B writers and C readers, with a ramp up\n"
313+
" time of D seconds for writers, a page cache\n"
314+
" size of E MiB, and exit.\n\n"
315+
#endif
310316
" -W set section option value\n"
311317
" set netdata.conf option from the command line.\n\n"
312318
" -W simple-pattern pattern string\n"
@@ -887,6 +893,7 @@ int main(int argc, char **argv) {
887893
char* stacksize_string = "stacksize=";
888894
char* debug_flags_string = "debug_flags=";
889895
char* createdataset_string = "createdataset=";
896+
char* stresstest_string = "stresstest=";
890897

891898
if(strcmp(optarg, "unittest") == 0) {
892899
if(unit_test_buffer()) return 1;
@@ -905,14 +912,33 @@ int main(int argc, char **argv) {
905912
fprintf(stderr, "\n\nALL TESTS PASSED\n\n");
906913
return 0;
907914
}
915+
#ifdef ENABLE_DBENGINE
908916
else if(strncmp(optarg, createdataset_string, strlen(createdataset_string)) == 0) {
909917
optarg += strlen(createdataset_string);
910-
#ifdef ENABLE_DBENGINE
911-
unsigned history_seconds = (unsigned )strtoull(optarg, NULL, 0);
918+
unsigned history_seconds = strtoul(optarg, NULL, 0);
912919
generate_dbengine_dataset(history_seconds);
913-
#endif
914920
return 0;
915921
}
922+
else if(strncmp(optarg, stresstest_string, strlen(stresstest_string)) == 0) {
923+
char *endptr;
924+
unsigned test_duration_sec = 0, dset_charts = 0, query_threads = 0, ramp_up_seconds = 0,
925+
page_cache_mb = 0;
926+
927+
optarg += strlen(stresstest_string);
928+
test_duration_sec = (unsigned)strtoul(optarg, &endptr, 0);
929+
if (',' == *endptr)
930+
dset_charts = (unsigned)strtoul(endptr + 1, &endptr, 0);
931+
if (',' == *endptr)
932+
query_threads = (unsigned)strtoul(endptr + 1, &endptr, 0);
933+
if (',' == *endptr)
934+
ramp_up_seconds = (unsigned)strtoul(endptr + 1, &endptr, 0);
935+
if (',' == *endptr)
936+
page_cache_mb = (unsigned)strtoul(endptr + 1, &endptr, 0);
937+
dbengine_stress_test(test_duration_sec, dset_charts, query_threads, ramp_up_seconds,
938+
page_cache_mb);
939+
return 0;
940+
}
941+
#endif
916942
else if(strcmp(optarg, "simple-pattern") == 0) {
917943
if(optind + 2 > argc) {
918944
fprintf(stderr, "%s", "\nUSAGE: -W simple-pattern 'pattern' 'string'\n\n"

daemon/unit_test.c

Lines changed: 279 additions & 33 deletions
Large diffs are not rendered by default.

daemon/unit_test.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,9 @@ extern int unit_test_buffer(void);
1111
#ifdef ENABLE_DBENGINE
1212
extern int test_dbengine(void);
1313
extern void generate_dbengine_dataset(unsigned history_seconds);
14+
extern void dbengine_stress_test(unsigned TEST_DURATION_SEC, unsigned DSET_CHARTS, unsigned QUERY_THREADS,
15+
unsigned RAMP_UP_SECONDS, unsigned PAGE_CACHE_MB);
16+
1417
#endif
1518

1619
#endif /* NETDATA_UNIT_TEST_H */

database/README.md

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ Currently Netdata supports 6 memory modes:
2525

2626
1. `ram`, data are purely in memory. Data are never saved on disk. This mode uses `mmap()` and supports [KSM](#ksm).
2727

28-
2. `save`, (the default) data are only in RAM while Netdata runs and are saved to / loaded from disk on Netdata
28+
2. `save`, data are only in RAM while Netdata runs and are saved to / loaded from disk on Netdata
2929
restart. It also uses `mmap()` and supports [KSM](#ksm).
3030

3131
3. `map`, data are in memory mapped files. This works like the swap. Keep in mind though, this will have a constant
@@ -39,11 +39,12 @@ Currently Netdata supports 6 memory modes:
3939
5. `alloc`, like `ram` but it uses `calloc()` and does not support [KSM](#ksm). This mode is the fallback for all
4040
others except `none`.
4141

42-
6. `dbengine`, data are in database files. The [Database Engine](engine/) works like a traditional database. There is
43-
some amount of RAM dedicated to data caching and indexing and the rest of the data reside compressed on disk. The
44-
number of history entries is not fixed in this case, but depends on the configured disk space and the effective
45-
compression ratio of the data stored. This is the **only mode** that supports changing the data collection update
46-
frequency (`update_every`) **without losing** the previously stored metrics. For more details see [here](engine/).
42+
6. `dbengine`, (the default) data are in database files. The [Database Engine](engine/) works like a traditional
43+
database. There is some amount of RAM dedicated to data caching and indexing and the rest of the data reside
44+
compressed on disk. The number of history entries is not fixed in this case, but depends on the configured disk
45+
space and the effective compression ratio of the data stored. This is the **only mode** that supports changing the
46+
data collection update frequency (`update_every`) **without losing** the previously stored metrics. For more details
47+
see [here](engine/).
4748

4849
You can select the memory mode by editing `netdata.conf` and setting:
4950

@@ -63,7 +64,7 @@ Embedded devices usually have very limited RAM resources available.
6364
There are 2 settings for you to tweak:
6465

6566
1. `update every`, which controls the data collection frequency
66-
2. `history`, which controls the size of the database in RAM
67+
2. `history`, which controls the size of the database in RAM (except for `memory mode = dbengine`)
6768

6869
By default `update every = 1` and `history = 3600`. This gives you an hour of data with per second updates.
6970

database/engine/README.md

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -141,4 +141,55 @@ kern.maxfiles=65536
141141

142142
You can apply the settings by running `sysctl -p` or by rebooting.
143143

144+
## Evaluation
145+
146+
We have evaluated the performance of the `dbengine` API that the netdata daemon uses internally. This is **not** the
147+
web API of netdata. Our benchmarks ran on a **single** `dbengine` instance, multiple of which can be running in a
148+
netdata master server. We used a server with an AMD Ryzen Threadripper 2950X 16-Core Processor and 2 disk drives, a
149+
Seagate Constellation ES.3 2TB magnetic HDD and a SAMSUNG MZQLB960HAJR-00007 960GB NAND Flash SSD.
150+
151+
For our workload, we defined 32 charts with 128 metrics each, giving us a total of 4096 metrics. We defined 1 worker
152+
thread per chart (32 threads) that generates new data points with a data generation interval of 1 second. The time axis
153+
of the time-series is emulated and accelerated so that the worker threads can generate as many data points as possible
154+
without delays.
155+
156+
We also defined 32 worker threads that perform queries on random metrics with semi-random time ranges. The
157+
starting time of the query is randomly selected between the beginning of the time-series and the time of the latest data
158+
point. The ending time is randomly selected between 1 second and 1 hour after the starting time. The pseudo-random
159+
numbers are generated with a uniform distribution.
160+
161+
The data are written to the database at the same time as they are read from it. This is a concurrent read/write mixed
162+
workload with a duration of 60 seconds. The faster `dbengine` runs, the bigger the dataset size becomes since more
163+
data points will be generated. We set a page cache size of 64MiB for the two disk-bound scenarios. This way, the dataset
164+
size of the metric data is much bigger than the RAM that is being used for caching so as to trigger I/O requests most
165+
of the time. In our final scenario, we set the page cache size to 16 GiB. That way, the dataset fits in the page cache
166+
so as to avoid all disk bottlenecks.
167+
168+
The reported numbers are the following:
169+
170+
| device | page cache | dataset | reads/sec | writes/sec |
171+
| :---: | :---: | ---: | ---: | ---: |
172+
| HDD | 64 MiB | 4.1 GiB | 813K | 18.0M |
173+
| SSD | 64 MiB | 9.8 GiB | 1.7M | 43.0M |
174+
| N/A | 16 GiB | 6.8 GiB |118.2M | 30.2M |
175+
176+
where "reads/sec" is the number of metric data points being read from the database via its API per second and
177+
"writes/sec" is the number of metric data points being written to the database per second.
178+
179+
Notice that the HDD numbers are pretty high and not much slower than the SSD numbers. This is thanks to the database
180+
engine design being optimized for rotating media. In the database engine disk I/O requests are:
181+
182+
- asynchronous to mask the high I/O latency of HDDs.
183+
- mostly large to reduce the amount of HDD seeking time.
184+
- mostly sequential to reduce the amount of HDD seeking time.
185+
- compressed to reduce the amount of required throughput.
186+
187+
As a result, the HDD is not thousands of times slower than the SSD, which is typical for other workloads.
188+
189+
An interesting observation to make is that the CPU-bound run (16 GiB page cache) generates fewer data than the SSD run
190+
(6.8 GiB vs 9.8 GiB). The reason is that the 32 reader threads in the SSD scenario are more frequently blocked by I/O,
191+
and generate a read load of 1.7M/sec, whereas in the CPU-bound scenario the read load is 70 times higher at 118M/sec.
192+
Consequently, there is a significant degree of interference by the reader threads, that slow down the writer threads.
193+
This is also possible because the interference effects are greater than the SSD impact on data generation throughput.
194+
144195
[![analytics](https://www.google-analytics.com/collect?v=1&aip=1&t=pageview&_s=1&ds=github&dr=https%3A%2F%2Fgithub.com%2Fnetdata%2Fnetdata&dl=https%3A%2F%2Fmy-netdata.io%2Fgithub%2Fdatabase%2Fengine%2FREADME&_u=MAC~&cid=5792dfd7-8dc4-476b-af31-da2fdb9f93d2&tid=UA-64295674-3)](<>)

database/engine/rrdengine.c

Lines changed: 0 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -815,47 +815,6 @@ void rrdeng_worker(void* arg)
815815
complete(&ctx->rrdengine_completion);
816816
}
817817

818-
819-
#define NR_PAGES (256)
820-
static void basic_functional_test(struct rrdengine_instance *ctx)
821-
{
822-
int i, j, failed_validations;
823-
uuid_t uuid[NR_PAGES];
824-
void *buf;
825-
struct rrdeng_page_descr *handle[NR_PAGES];
826-
char uuid_str[UUID_STR_LEN];
827-
char backup[NR_PAGES][UUID_STR_LEN * 100]; /* backup storage for page data verification */
828-
829-
for (i = 0 ; i < NR_PAGES ; ++i) {
830-
uuid_generate(uuid[i]);
831-
uuid_unparse_lower(uuid[i], uuid_str);
832-
// fprintf(stderr, "Generated uuid[%d]=%s\n", i, uuid_str);
833-
buf = rrdeng_create_page(ctx, &uuid[i], &handle[i]);
834-
/* Each page contains 10 times its own UUID stringified */
835-
for (j = 0 ; j < 100 ; ++j) {
836-
strcpy(buf + UUID_STR_LEN * j, uuid_str);
837-
strcpy(backup[i] + UUID_STR_LEN * j, uuid_str);
838-
}
839-
rrdeng_commit_page(ctx, handle[i], (Word_t)i);
840-
}
841-
fprintf(stderr, "\n********** CREATED %d METRIC PAGES ***********\n\n", NR_PAGES);
842-
failed_validations = 0;
843-
for (i = 0 ; i < NR_PAGES ; ++i) {
844-
buf = rrdeng_get_latest_page(ctx, &uuid[i], (void **)&handle[i]);
845-
if (NULL == buf) {
846-
++failed_validations;
847-
fprintf(stderr, "Page %d was LOST.\n", i);
848-
}
849-
if (memcmp(backup[i], buf, UUID_STR_LEN * 100)) {
850-
++failed_validations;
851-
fprintf(stderr, "Page %d data comparison with backup FAILED validation.\n", i);
852-
}
853-
rrdeng_put_page(ctx, handle[i]);
854-
}
855-
fprintf(stderr, "\n********** CORRECTLY VALIDATED %d/%d METRIC PAGES ***********\n\n",
856-
NR_PAGES - failed_validations, NR_PAGES);
857-
858-
}
859818
/* C entry point for development purposes
860819
* make "LDFLAGS=-errdengine_main"
861820
*/
@@ -868,8 +827,6 @@ void rrdengine_main(void)
868827
if (ret) {
869828
exit(ret);
870829
}
871-
basic_functional_test(ctx);
872-
873830
rrdeng_exit(ctx);
874831
fprintf(stderr, "Hello world!");
875832
exit(0);

database/engine/rrdenginelib.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ void print_page_cache_descr(struct rrdeng_page_descr *descr)
88
{
99
struct page_cache_descr *pg_cache_descr = descr->pg_cache_descr;
1010
char uuid_str[UUID_STR_LEN];
11-
char str[BUFSIZE];
11+
char str[BUFSIZE + 1];
1212
int pos = 0;
1313

1414
uuid_unparse_lower(*descr->id, uuid_str);
@@ -31,7 +31,7 @@ void print_page_cache_descr(struct rrdeng_page_descr *descr)
3131
void print_page_descr(struct rrdeng_page_descr *descr)
3232
{
3333
char uuid_str[UUID_STR_LEN];
34-
char str[BUFSIZE];
34+
char str[BUFSIZE + 1];
3535
int pos = 0;
3636

3737
uuid_unparse_lower(*descr->id, uuid_str);

database/rrd.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,11 @@ int rrd_delete_unupdated_dimensions = 0;
1515

1616
int default_rrd_update_every = UPDATE_EVERY;
1717
int default_rrd_history_entries = RRD_DEFAULT_HISTORY_ENTRIES;
18+
#ifdef ENABLE_DBENGINE
19+
RRD_MEMORY_MODE default_rrd_memory_mode = RRD_MEMORY_MODE_DBENGINE;
20+
#else
1821
RRD_MEMORY_MODE default_rrd_memory_mode = RRD_MEMORY_MODE_SAVE;
22+
#endif
1923
int gap_when_lost_iterations_above = 1;
2024

2125

0 commit comments

Comments
 (0)