Skip to content

Commit 1b0ce98

Browse files
OhadRevahdshchedr
andauthored
Fix MemoryDeltaFromRequestedBytes tests flakiness (#762) (#1140)
* Fix MemoryDeltaFromRequestedBytes tests flakiness Fixed MemoryDeltaFromRequestedBytes tests by coverting the fixtures used into utils that used in sampler so the expected value will be updated, before the expected value pulled one time and sometime it is higher than the metric value and it needs to be updated. * Update tests/observability/metrics/utils.py * Refactored functions and vars names Renamed functions and vars according to comments. * Combined expected kubvievirt memory delta functions * Combine MemoryDeltaFromRequestedBytes tests under the same test with parametrize. * changed get_highest_memory_usage_virt_api_pod_dict to return tuple * Fixed get_metrics_value --------- Co-authored-by: Den Shchedrivyi <[email protected]>
1 parent 16e9d46 commit 1b0ce98

File tree

5 files changed

+179
-95
lines changed

5 files changed

+179
-95
lines changed

tests/observability/metrics/conftest.py

Lines changed: 1 addition & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
KUBEVIRT_VMI_STATUS_ADDRESSES,
3131
KUBEVIRT_VMSNAPSHOT_PERSISTENTVOLUMECLAIM_LABELS,
3232
KUBEVIRT_VNC_ACTIVE_CONNECTIONS_BY_VMI,
33+
RSS_MEMORY_COMMAND,
3334
)
3435
from tests.observability.metrics.utils import (
3536
SINGLE_VM,
@@ -106,7 +107,6 @@
106107
CDI_UPLOAD_PRIME = "cdi-upload-prime"
107108
IP_RE_PATTERN_FROM_INTERFACE = r"eth0.*?inet (\d+\.\d+\.\d+\.\d+)/\d+"
108109
IP_ADDR_SHOW_COMMAND = shlex.split("ip addr show")
109-
RSS_MEMORY_COMMAND = shlex.split("bash -c \"cat /sys/fs/cgroup/memory.stat | grep '^anon ' | awk '{print $2}'\"")
110110
LOGGER = logging.getLogger(__name__)
111111

112112

@@ -824,53 +824,6 @@ def vm_for_test_with_resource_limits(namespace):
824824
yield vm
825825

826826

827-
@pytest.fixture(scope="class")
828-
def highest_memory_usage_virt_api_pod(hco_namespace, admin_client):
829-
oc_adm_top_pod_output = (
830-
run_command(command=shlex.split(f"oc adm top pod -n {hco_namespace.name} -l kubevirt.io=virt-api"))[1]
831-
.strip()
832-
.split("\n")[1:]
833-
)
834-
virt_api_with_highest_memory_usage = max(
835-
{pod.split()[0]: int(bitmath.parse_string_unsafe(pod.split()[2])) for pod in oc_adm_top_pod_output}.items(),
836-
key=lambda pod: pod[1],
837-
)
838-
return {
839-
"virt_api_pod": virt_api_with_highest_memory_usage[0],
840-
"memory_usage": virt_api_with_highest_memory_usage[1],
841-
}
842-
843-
844-
@pytest.fixture(scope="class")
845-
def virt_api_requested_memory(hco_namespace, admin_client, highest_memory_usage_virt_api_pod):
846-
return float(
847-
bitmath.parse_string_unsafe(
848-
get_pod_by_name_prefix(
849-
dyn_client=admin_client,
850-
pod_prefix=highest_memory_usage_virt_api_pod["virt_api_pod"],
851-
namespace=hco_namespace.name,
852-
)
853-
.instance.spec.containers[0]
854-
.resources.requests.memory
855-
)
856-
)
857-
858-
859-
@pytest.fixture()
860-
def virt_api_rss_memory(admin_client, hco_namespace, highest_memory_usage_virt_api_pod):
861-
return int(
862-
bitmath.Byte(
863-
int(
864-
get_pod_by_name_prefix(
865-
dyn_client=admin_client,
866-
pod_prefix=highest_memory_usage_virt_api_pod["virt_api_pod"],
867-
namespace=hco_namespace.name,
868-
).execute(command=RSS_MEMORY_COMMAND)
869-
)
870-
).MiB
871-
)
872-
873-
874827
@pytest.fixture()
875828
def vm_memory_working_set_bytes(vm_for_test, virt_launcher_pod_metrics_resource_exists):
876829
samples = TimeoutSampler(

tests/observability/metrics/constants.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
import shlex
2+
13
from ocp_resources.resource import Resource
24

35
from utilities.constants import (
@@ -80,3 +82,7 @@
8082
KUBEVIRT_VMSNAPSHOT_PERSISTENTVOLUMECLAIM_LABELS = (
8183
"kubevirt_vmsnapshot_persistentvolumeclaim_labels{{vm_name='{vm_name}'}}"
8284
)
85+
KUBEVIRT_VMI_MIGRATION_DATA_TOTAL_BYTES = "kubevirt_vmi_migration_data_total_bytes{{name='{vm_name}'}}"
86+
BINDING_NAME = "binding_name"
87+
BINDING_TYPE = "binding_type"
88+
RSS_MEMORY_COMMAND = shlex.split("bash -c \"cat /sys/fs/cgroup/memory.stat | grep '^anon ' | awk '{print $2}'\"")

tests/observability/metrics/test_metrics.py

Lines changed: 36 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
import bitmath
21
import pytest
32

43
from tests.observability.metrics.constants import (
@@ -14,7 +13,7 @@
1413
assert_vmi_dommemstat_with_metric_value,
1514
compare_kubevirt_vmi_info_metric_with_vm_info,
1615
get_vm_metrics,
17-
validate_metric_value_within_range,
16+
validate_memory_delta_metrics_value_within_range,
1817
)
1918
from tests.observability.utils import validate_metrics_value
2019
from utilities.constants import KUBEVIRT_HCO_HYPERCONVERGED_CR_EXISTS, VIRT_API, VIRT_HANDLER
@@ -177,48 +176,44 @@ def test_kubevirt_vmi_info(self, prometheus, single_metric_vm, single_metric_vmi
177176

178177

179178
class TestMemoryDeltaFromRequestedBytes:
180-
@pytest.mark.polarion("CNV-11632")
181-
def test_metric_kubevirt_memory_delta_from_requested_bytes_working_set(
182-
self, prometheus, highest_memory_usage_virt_api_pod, virt_api_requested_memory
183-
):
184-
validate_metric_value_within_range(
185-
prometheus=prometheus,
186-
metric_name=f"kubevirt_memory_delta_from_requested_bytes{{container='{VIRT_API}', "
187-
f"reason='memory_working_set_delta_from_request'}}",
188-
expected_value=float(
189-
bitmath.MiB(highest_memory_usage_virt_api_pod["memory_usage"] - virt_api_requested_memory).Byte
179+
@pytest.mark.parametrize(
180+
"metric, rss",
181+
[
182+
pytest.param(
183+
f"kubevirt_memory_delta_from_requested_bytes{{container='{VIRT_API}', "
184+
f"reason='memory_working_set_delta_from_request'}}",
185+
False,
186+
marks=pytest.mark.polarion("CNV-11632"),
187+
id="test_metric_kubevirt_memory_delta_from_requested_bytes_working_set",
190188
),
191-
)
192-
193-
@pytest.mark.polarion("CNV-11633")
194-
def test_metric_kubevirt_memory_delta_from_requested_bytes_rss(
195-
self, prometheus, virt_api_rss_memory, virt_api_requested_memory
196-
):
197-
validate_metric_value_within_range(
198-
prometheus=prometheus,
199-
metric_name=f"kubevirt_memory_delta_from_requested_bytes{{container='{VIRT_API}', "
200-
f"reason='memory_rss_delta_from_request'}}",
201-
expected_value=float(bitmath.MiB(virt_api_rss_memory - virt_api_requested_memory).Byte),
202-
)
203-
204-
@pytest.mark.polarion("CNV-11690")
205-
def test_metric_cnv_abnormal_working_set(
206-
self, prometheus, highest_memory_usage_virt_api_pod, virt_api_requested_memory
207-
):
208-
validate_metric_value_within_range(
209-
prometheus=prometheus,
210-
metric_name=f"cnv_abnormal{{container='{VIRT_API}', reason='memory_working_set_delta_from_request'}}",
211-
expected_value=float(
212-
bitmath.MiB(highest_memory_usage_virt_api_pod["memory_usage"] - virt_api_requested_memory).Byte
189+
pytest.param(
190+
f"kubevirt_memory_delta_from_requested_bytes{{container='{VIRT_API}', "
191+
f"reason='memory_rss_delta_from_request'}}",
192+
True,
193+
marks=pytest.mark.polarion("CNV-11633"),
194+
id="test_metric_kubevirt_memory_delta_from_requested_bytes_rss",
213195
),
214-
)
215-
216-
@pytest.mark.polarion("CNV-11691")
217-
def test_metric_cnv_abnormal_rss(self, prometheus, virt_api_rss_memory, virt_api_requested_memory):
218-
validate_metric_value_within_range(
196+
pytest.param(
197+
f"cnv_abnormal{{container='{VIRT_API}', reason='memory_working_set_delta_from_request'}}",
198+
False,
199+
marks=pytest.mark.polarion("CNV-11690"),
200+
id="test_metric_cnv_abnormal_working_set",
201+
),
202+
pytest.param(
203+
f"cnv_abnormal{{container='{VIRT_API}', reason='memory_rss_delta_from_request'}}",
204+
True,
205+
marks=pytest.mark.polarion("CNV-11691"),
206+
id="test_metric_cnv_abnormal_rss",
207+
),
208+
],
209+
)
210+
def test_memory_delta_from_requested_bytes(self, prometheus, admin_client, hco_namespace, metric, rss):
211+
validate_memory_delta_metrics_value_within_range(
219212
prometheus=prometheus,
220-
metric_name=f"cnv_abnormal{{container='{VIRT_API}', reason='memory_rss_delta_from_request'}}",
221-
expected_value=float(bitmath.MiB(virt_api_rss_memory - virt_api_requested_memory).Byte),
213+
metric_name=metric,
214+
rss=rss,
215+
admin_client=admin_client,
216+
hco_namespace=hco_namespace.name,
222217
)
223218

224219

tests/observability/metrics/utils.py

Lines changed: 133 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
11
import logging
2+
import math
23
import re
34
import shlex
45
import urllib
56
from collections import Counter
67
from datetime import datetime, timezone
7-
from typing import Any, Optional, Union
8+
from typing import Any, Optional
89

910
import bitmath
1011
import pytest
@@ -26,6 +27,7 @@
2627
KUBEVIRT_VMI_FILESYSTEM_BYTES,
2728
KUBEVIRT_VMI_FILESYSTEM_BYTES_WITH_MOUNT_POINT,
2829
METRIC_SUM_QUERY,
30+
RSS_MEMORY_COMMAND,
2931
)
3032
from tests.observability.utils import validate_metrics_value
3133
from utilities.constants import (
@@ -903,7 +905,7 @@ def validate_metric_value_within_range(
903905
prometheus=prometheus,
904906
metrics_name=metric_name,
905907
)
906-
sample: Union[int, float] = 0
908+
sample: int | float = 0
907909
try:
908910
for sample in samples:
909911
if sample:
@@ -1221,7 +1223,7 @@ def validate_metric_value_with_round_down(
12211223
prometheus=prometheus,
12221224
metrics_name=metric_name,
12231225
)
1224-
sample: Union[int, float] = 0
1226+
sample: int | float = 0
12251227
try:
12261228
for sample in samples:
12271229
sample = round(float(sample))
@@ -1230,3 +1232,131 @@ def validate_metric_value_with_round_down(
12301232
except TimeoutExpiredError:
12311233
LOGGER.info(f"Metric int value of: {metric_name} is: {sample}, expected value:{expected_value}")
12321234
raise
1235+
1236+
1237+
def get_pod_memory_stats(admin_client: DynamicClient, hco_namespace: str, pod_prefix: str) -> float:
1238+
return float(
1239+
bitmath.Byte(
1240+
float(
1241+
get_pod_by_name_prefix(
1242+
dyn_client=admin_client,
1243+
pod_prefix=pod_prefix,
1244+
namespace=hco_namespace,
1245+
)
1246+
.execute(command=RSS_MEMORY_COMMAND)
1247+
.strip()
1248+
)
1249+
)
1250+
)
1251+
1252+
1253+
def get_highest_memory_usage_virt_api_pod_tuple(hco_namespace: str) -> tuple[str, int]:
1254+
"""
1255+
This function returns pod name and memory value tuple of virt-api pod with the highest memory usage.
1256+
Args:
1257+
hco_namespace: Hco namespacem
1258+
Returns:
1259+
tuple: containing the name of the virt-api pod with the highest memory usage and value of the memory.
1260+
"""
1261+
virt_api_with_highest_memory_usage = (
1262+
run_command(
1263+
command=shlex.split(
1264+
f"bash -c 'oc adm top pod -n {hco_namespace} --sort-by memory "
1265+
f"--no-headers -l kubevirt.io=virt-api | head -n 1'"
1266+
),
1267+
)[1]
1268+
.strip()
1269+
.split()
1270+
)
1271+
return (
1272+
virt_api_with_highest_memory_usage[0],
1273+
int(bitmath.parse_string_unsafe(virt_api_with_highest_memory_usage[2]).Byte),
1274+
)
1275+
1276+
1277+
def get_pod_requested_memory(hco_namespace: str, admin_client: DynamicClient, pod_prefix: str) -> float:
1278+
"""
1279+
Get the requested memory for a pod.
1280+
1281+
Args:
1282+
hco_namespace: Hco namespace
1283+
admin_client: The Kubernetes admin client
1284+
pod_prefix: Prefix of the pod name to get requested memory from
1285+
1286+
Returns:
1287+
float: Requested memory in bytes
1288+
"""
1289+
return float(
1290+
bitmath.parse_string_unsafe(
1291+
get_pod_by_name_prefix(
1292+
dyn_client=admin_client,
1293+
pod_prefix=pod_prefix,
1294+
namespace=hco_namespace,
1295+
)
1296+
.instance.spec.containers[0]
1297+
.resources.requests.memory
1298+
).Byte
1299+
)
1300+
1301+
1302+
def expected_kubevirt_memory_delta_from_requested_bytes(
1303+
hco_namespace: str, admin_client: DynamicClient, rss: bool
1304+
) -> int:
1305+
"""
1306+
Calculate the expected memory delta between actual and requested memory.
1307+
1308+
Args:
1309+
hco_namespace: The namespace where virt-api pods are running
1310+
admin_client: The Kubernetes admin client
1311+
rss: If True, use RSS memory, otherwise use total memory usage
1312+
1313+
Returns:
1314+
int: The memory delta in bytes
1315+
"""
1316+
pod_name, pod_memory = get_highest_memory_usage_virt_api_pod_tuple(hco_namespace=hco_namespace)
1317+
virt_api_requested_memory = get_pod_requested_memory(
1318+
hco_namespace=hco_namespace,
1319+
admin_client=admin_client,
1320+
pod_prefix=pod_name,
1321+
)
1322+
if rss:
1323+
virt_api_rss_memory = get_pod_memory_stats(
1324+
admin_client=admin_client,
1325+
hco_namespace=hco_namespace,
1326+
pod_prefix=pod_name,
1327+
)
1328+
return int(virt_api_rss_memory - virt_api_requested_memory)
1329+
return int(pod_memory - virt_api_requested_memory)
1330+
1331+
1332+
def validate_memory_delta_metrics_value_within_range(
1333+
prometheus: Prometheus,
1334+
metric_name: str,
1335+
rss: bool,
1336+
admin_client: DynamicClient,
1337+
hco_namespace: str,
1338+
timeout: int = TIMEOUT_4MIN,
1339+
) -> None:
1340+
samples = TimeoutSampler(
1341+
wait_timeout=timeout,
1342+
sleep=TIMEOUT_15SEC,
1343+
func=get_metrics_value,
1344+
prometheus=prometheus,
1345+
metrics_name=metric_name,
1346+
)
1347+
sample: int | float = 0
1348+
expected_value = None
1349+
try:
1350+
for sample in samples:
1351+
if sample:
1352+
sample = abs(float(sample))
1353+
expected_value = abs(
1354+
expected_kubevirt_memory_delta_from_requested_bytes(
1355+
admin_client=admin_client, hco_namespace=hco_namespace, rss=rss
1356+
)
1357+
)
1358+
if math.isclose(sample, expected_value, rel_tol=0.05):
1359+
return
1360+
except TimeoutExpiredError:
1361+
LOGGER.error(f"{sample} should be within 5% of {expected_value}")
1362+
raise

utilities/monitoring.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -167,9 +167,9 @@ def get_all_firing_alerts(prometheus):
167167

168168

169169
def get_metrics_value(prometheus, metrics_name):
170-
metric_results = prometheus.query(query=metrics_name)["data"]["result"]
171-
if metric_results:
172-
metric_values_list = [value for metric_val in metric_results for value in metric_val.get("value")]
170+
metric_results = prometheus.query(query=metrics_name).get("data", {})
171+
if metric_results and (metric_res := metric_results["result"]):
172+
metric_values_list = [value for metric_val in metric_res for value in metric_val.get("value")]
173173
return metric_values_list[1]
174174
LOGGER.warning(f"For Query {metrics_name}, empty results found.")
175175
return 0

0 commit comments

Comments
 (0)