Skip to content

Commit a3cdcb2

Browse files
author
Jesse S
authored
feat: TOOLS-2873 add indexes-memory-budget healthcheck (#285)
* feat: TOOLS-2873 add indexes-memory-budget healthcheck * fix: TOOLS-2895 available_bin_names healthcheck trigger
1 parent 3031818 commit a3cdcb2

File tree

2 files changed

+26
-15
lines changed

2 files changed

+26
-15
lines changed

lib/health/health_checker.py

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -100,15 +100,15 @@ def _create_health_result_dict(self):
100100
res[HealthResultType.STATUS_COUNTERS] = copy.deepcopy(self.status_counters)
101101

102102
res[HealthResultType.EXCEPTIONS] = {}
103-
res[HealthResultType.EXCEPTIONS][
104-
HealthResultType.EXCEPTIONS_SYNTAX
105-
] = copy.deepcopy(self.syntax_exceptions)
106-
res[HealthResultType.EXCEPTIONS][
107-
HealthResultType.EXCEPTIONS_PROCESSING
108-
] = copy.deepcopy(self.health_exceptions)
109-
res[HealthResultType.EXCEPTIONS][
110-
HealthResultType.EXCEPTIONS_OTHER
111-
] = copy.deepcopy(self.other_exceptions)
103+
res[HealthResultType.EXCEPTIONS][HealthResultType.EXCEPTIONS_SYNTAX] = (
104+
copy.deepcopy(self.syntax_exceptions)
105+
)
106+
res[HealthResultType.EXCEPTIONS][HealthResultType.EXCEPTIONS_PROCESSING] = (
107+
copy.deepcopy(self.health_exceptions)
108+
)
109+
res[HealthResultType.EXCEPTIONS][HealthResultType.EXCEPTIONS_OTHER] = (
110+
copy.deepcopy(self.other_exceptions)
111+
)
112112

113113
res[HealthResultType.ASSERT] = copy.deepcopy(self.assert_outputs)
114114
res[HealthResultType.DEBUG_MESSAGES] = copy.deepcopy(self.debug_outputs)
@@ -349,6 +349,7 @@ def _execute_queries(self, query_source=None, is_source_file=True):
349349
if self.no_valid_version:
350350
self._increment_counter(HealthResultCounter.QUERY_SKIPPED_COUNTER)
351351
continue
352+
352353
if self._is_assert_query(query):
353354
self._increment_counter(HealthResultCounter.ASSERT_QUERY_COUNTER)
354355

lib/health/query.py

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -215,12 +215,12 @@
215215
216216
/* NB : ADD CHECKS IF NODES ARE NOT HOMOGENOUS MEM / NUM CPU etc */
217217
218-
SET CONSTRAINT VERSION < 7.0
218+
SET CONSTRAINT VERSION < 7.0;
219219
220220
s = select "available_bin_names", "available-bin-names" from NAMESPACE save;
221-
r = group by NAMESPACE do s > 3200;
221+
r = group by NAMESPACE do s > 6400;
222222
ASSERT(r, True, "Low namespace available bin names.", "LIMITS", WARNING,
223-
"Listed node[s] have low available bin name (< 3200) for corresponding namespace[s]. Maximum unique bin names allowed per namespace are 32k. Please run 'show statistics namespace like available' to get actual values. Possible improperly modeled data.",
223+
"Listed node[s] have low available bin name (< 6400) for corresponding namespace[s]. Maximum unique bin names allowed per namespace are 64k. Please run 'show statistics namespace like available' to get actual values. Possible improperly modeled data.",
224224
"Namespace available bin names check.");
225225
226226
@@ -329,6 +329,15 @@
329329
ASSERT(warn, True, "Low namespace disk available pct.", "OPERATIONS", WARNING,
330330
"Listed namespace[s] have lower than normal (< 20 %) available disk space. Probable cause - namespace size misconfiguration.",
331331
"Namespace disk available pct check.");
332+
333+
SET CONSTRAINT VERSION >= 7.1.0;
334+
used_bytes = select "index_used_bytes" as "stats" from NAMESPACE.STATISTICS save;
335+
stop_used_bytes = select "indexes-memory-budget" as "stats" from NAMESPACE.CONFIG save;
336+
budget_configured = do stop_used_bytes > 0;
337+
critical = do used_bytes <= stop_used_bytes;
338+
ASSERT(critical, True, "High namespace index memory used pct (stop-write enabled).", "OPERATIONS", CRITICAL,
339+
"Listed namespace[s] have higher than normal memory usage for indexes. Probable cause - namespace size misconfiguration.",
340+
"Critical Namespace index memory used pct check.", budget_configured);
332341
333342
SET CONSTRAINT VERSION >= 7.0.0;
334343
used = select "data_used_pct" as "stats" from NAMESPACE.STATISTICS save;
@@ -1567,7 +1576,7 @@
15671576
"Non-zero sindex background ops query error check");
15681577
15691578
// Should be constrained to just 5.7
1570-
SET CONSTRAINT VERSION < 6.0
1579+
SET CONSTRAINT VERSION < 6.0;
15711580
15721581
// Scan Background OPS statistics
15731582
s = select "scan_ops_bg_complete" as "cnt" from NAMESPACE.STATISTICS;
@@ -1592,7 +1601,7 @@
15921601
"Listed namespace[s] show non-zero scan background ops errors. Please run 'show statistics namespace like scan_ops_bg' to see values.",
15931602
"Non-zero scan background ops error check");
15941603
1595-
SET CONSTRAINT VERSION > 3.9
1604+
SET CONSTRAINT VERSION > 3.9;
15961605
15971606
// Scan Agg statistics
15981607
s = select "scan_aggr_complete" as "cnt" from NAMESPACE.STATISTICS;
@@ -1754,7 +1763,7 @@
17541763
17551764
// XDR Write statistics
17561765
1757-
SET CONSTRAINT VERSION < 4.5.1
1766+
SET CONSTRAINT VERSION < 4.5.1;
17581767
17591768
s = select "xdr_write_success" as "cnt", "xdr_client_write_success" as "cnt" from NAMESPACE.STATISTICS;
17601769
t = select "xdr_write_timeout" as "cnt" from NAMESPACE.STATISTICS;
@@ -1950,6 +1959,7 @@
19501959
"Namespace partition-tree-sprigs check for Community edition",
19511960
e);
19521961
1962+
# Should be further restricted to < 7.0;
19531963
SET CONSTRAINT VERSION >= 4.2;
19541964
19551965
cs = select "cluster_size" from SERVICE.STATISTICS;

0 commit comments

Comments
 (0)