From 9479f36986655b90edbf00f3e9e823d73b62724c Mon Sep 17 00:00:00 2001 From: Brice Dutheil Date: Fri, 6 Jun 2025 18:26:20 +0200 Subject: [PATCH 1/5] build(ci): Show cgroup memory after run --- .gitlab-ci.yml | 1 + .gitlab/cgroup-report.sh | 54 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 55 insertions(+) create mode 100644 .gitlab/cgroup-report.sh diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 2a5a6492368..a1158912fd6 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -407,6 +407,7 @@ muzzle-dep-report: - if [ "$PROFILE_TESTS" == "true" ]; then .circleci/collect_profiles.sh; fi - .circleci/collect_results.sh - .circleci/upload_ciapp.sh $CACHE_TYPE $testJvm + - .gitlab/cgroup-report.sh - gitlab_section_end "collect-reports" - URL_ENCODED_JOB_NAME=$(jq -rn --arg x "$CI_JOB_NAME" '$x|@uri') - echo -e "${TEXT_BOLD}${TEXT_YELLOW}See test results in Datadog:${TEXT_CLEAR} https://app.datadoghq.com/ci/test/runs?query=test_level%3Atest%20%40test.service%3Add-trace-java%20%40ci.pipeline.id%3A${CI_PIPELINE_ID}%20%40ci.job.name%3A%22${URL_ENCODED_JOB_NAME}%22" diff --git a/.gitlab/cgroup-report.sh b/.gitlab/cgroup-report.sh new file mode 100644 index 00000000000..bd5fd6df1fa --- /dev/null +++ b/.gitlab/cgroup-report.sh @@ -0,0 +1,54 @@ +#!/usr/bin/env bash + +print_metric() { + local label="$1" + local raw_value="$2" + local trimmed_value + + # Use read -rd '' to trim leading/trailing IFS whitespace (space, tab, newline) + read -rd '' trimmed_value <<< "$raw_value" || : + + # Check if trimmed_value contains a newline character for formatting + if [[ "$trimmed_value" == *$'\n'* ]]; then + local indent=" " + # Using a more robust way to handle potential leading/trailing newlines in raw_value for printf + printf "%-35s :\n" "$label" + printf "%s\n" "$indent${trimmed_value//$'\n'/$'\n'$indent}" # Indent and print the value on new lines + else + printf "%-35s : %s\n" "$label" "$trimmed_value" + fi +} + +cat_file() { + cat "$1" 2>/dev/null || echo 'not found' +} + +# Show cgroup memory usage +if [ -f /sys/fs/cgroup/cgroup.controllers ]; then + # cgroup v2 + print_metric "cgroup v2 memory.peak" "$(cat_file /sys/fs/cgroup/memory.peak)" + print_metric "cgroup v2 memory.max" "$(cat_file /sys/fs/cgroup/memory.max)" + print_metric "cgroup v2 memory.high" "$(cat_file /sys/fs/cgroup/memory.high)" + print_metric "cgroup v2 memory.current" "$(cat_file /sys/fs/cgroup/memory.current)" + if [ -f /sys/fs/cgroup/memory.pressure ]; then + print_metric "cgroup v2 memory.pressure" "$(cat_file /sys/fs/cgroup/memory.pressure)" + fi + if [ -f /sys/fs/cgroup/memory.events ]; then + print_metric "cgroup v2 memory.events oom" "$( (grep -E '^oom\\s' /sys/fs/cgroup/memory.events | cut -d' ' -f2) 2>/dev/null || echo 'not found')" + print_metric "cgroup v2 memory.events oom_kill" "$( (grep -E '^oom_kill\\s' /sys/fs/cgroup/memory.events | cut -d' ' -f2) 2>/dev/null || echo 'not found')" + print_metric "cgroup v2 memory.events high" "$( (grep -E '^high\\s' /sys/fs/cgroup/memory.events | cut -d' ' -f2) 2>/dev/null || echo 'not found')" + fi +elif [ -d "/sys/fs/cgroup/memory" ]; then + # cgroup v1 + # Note: In cgroup v1, memory stats are typically found under /sys/fs/cgroup/memory/ + # The specific path might vary if inside a nested cgroup. + # This script assumes it's running in a context where /sys/fs/cgroup/memory/ points to the relevant cgroup. + print_metric "cgroup v1 memory.usage_in_bytes" "$(cat_file /sys/fs/cgroup/memory/memory.usage_in_bytes)" + print_metric "cgroup v1 memory.limit_in_bytes" "$(cat_file /sys/fs/cgroup/memory/memory.limit_in_bytes)" + print_metric "cgroup v1 memory.failcnt" "$(cat_file /sys/fs/cgroup/memory/memory.failcnt)" + print_metric "cgroup v1 memory.max_usage_in_bytes" "$(cat_file /sys/fs/cgroup/memory/memory.max_usage_in_bytes)" +else + printf "cgroup memory paths not found. Neither cgroup v2 controller file nor cgroup v1 memory directory detected.\n" +fi + +print_metric "ram memory" "$( (grep MemTotal /proc/meminfo | tr -s ' ' | cut -d ' ' -f 2) 2>/dev/null || echo 'not found')" From 5c5914d7386f23e580e796ca2193a933c1d2d007 Mon Sep 17 00:00:00 2001 From: Brice Dutheil Date: Tue, 10 Jun 2025 11:35:35 +0200 Subject: [PATCH 2/5] build(ci): Show cgroup cpu after run --- .gitlab/cgroup-report.sh | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/.gitlab/cgroup-report.sh b/.gitlab/cgroup-report.sh index bd5fd6df1fa..382fd6a0a3e 100644 --- a/.gitlab/cgroup-report.sh +++ b/.gitlab/cgroup-report.sh @@ -38,7 +38,17 @@ if [ -f /sys/fs/cgroup/cgroup.controllers ]; then print_metric "cgroup v2 memory.events oom_kill" "$( (grep -E '^oom_kill\\s' /sys/fs/cgroup/memory.events | cut -d' ' -f2) 2>/dev/null || echo 'not found')" print_metric "cgroup v2 memory.events high" "$( (grep -E '^high\\s' /sys/fs/cgroup/memory.events | cut -d' ' -f2) 2>/dev/null || echo 'not found')" fi -elif [ -d "/sys/fs/cgroup/memory" ]; then + + # CPU metrics + print_metric "cgroup v2 cpu.max" "$(cat_file /sys/fs/cgroup/cpu.max)" + print_metric "cgroup v2 cpu.nr_throttled" "$( (grep -E "^nr_throttled[[:space:]]+" /sys/fs/cgroup/cpu.stat | cut -d' ' -f2) 2>/dev/null || echo 'not found')" + print_metric "cgroup v2 cpu.throttled_usec" "$( (grep -E "^throttled_usec[[:space:]]+" /sys/fs/cgroup/cpu.stat | cut -d' ' -f2) 2>/dev/null || echo 'not found')" + print_metric "cgroup v2 cpu.usage_usec" "$( (grep -E "^usage_usec[[:space:]]+" /sys/fs/cgroup/cpu.stat | cut -d' ' -f2) 2>/dev/null || echo 'not found')" + if [ -f /sys/fs/cgroup/cpu.pressure ]; then # cpu.pressure might not exist on older kernels/setups + print_metric "cgroup v2 cpu.pressure" "$(cat_file /sys/fs/cgroup/cpu.pressure)" + fi + +elif [ -d "/sys/fs/cgroup/memory" ]; then # Assuming if memory cgroup v1 exists, cpu might too # cgroup v1 # Note: In cgroup v1, memory stats are typically found under /sys/fs/cgroup/memory/ # The specific path might vary if inside a nested cgroup. @@ -47,6 +57,24 @@ elif [ -d "/sys/fs/cgroup/memory" ]; then print_metric "cgroup v1 memory.limit_in_bytes" "$(cat_file /sys/fs/cgroup/memory/memory.limit_in_bytes)" print_metric "cgroup v1 memory.failcnt" "$(cat_file /sys/fs/cgroup/memory/memory.failcnt)" print_metric "cgroup v1 memory.max_usage_in_bytes" "$(cat_file /sys/fs/cgroup/memory/memory.max_usage_in_bytes)" + + # Throttling stats from /sys/fs/cgroup/cpu/cpu.stat + if [ -f /sys/fs/cgroup/cpu/cpu.stat ]; then + print_metric "cgroup v1 cpu.nr_throttled" "$( (grep -E "^nr_throttled[[:space:]]+" /sys/fs/cgroup/cpu/cpu.stat | cut -d' ' -f2) 2>/dev/null || echo 'not found')" + print_metric "cgroup v1 cpu.throttled_time_ns" "$( (grep -E "^throttled_time[[:space:]]+" /sys/fs/cgroup/cpu/cpu.stat | cut -d' ' -f2) 2>/dev/null || echo 'not found')" + else + # Print not found for these specific metrics if cpu.stat is missing, to avoid ambiguity + print_metric "cgroup v1 cpu.nr_throttled" "not found (cpu.stat)" + print_metric "cgroup v1 cpu.throttled_time_ns" "not found (cpu.stat)" + fi + # CPU Quota settings from /sys/fs/cgroup/cpu/ + print_metric "cgroup v1 cpu.cfs_period_us" "$(cat_file /sys/fs/cgroup/cpu/cpu.cfs_period_us)" + print_metric "cgroup v1 cpu.cfs_quota_us" "$(cat_file /sys/fs/cgroup/cpu/cpu.cfs_quota_us)" + # CPU usage from /sys/fs/cgroup/cpuacct/ (usually same hierarchy as cpu) + print_metric "cgroup v1 cpuacct.usage_ns" "$(cat_file /sys/fs/cgroup/cpuacct/cpuacct.usage)" + print_metric "cgroup v1 cpuacct.usage_user_ns" "$(cat_file /sys/fs/cgroup/cpuacct/cpuacct.usage_user)" + print_metric "cgroup v1 cpuacct.usage_sys_ns" "$(cat_file /sys/fs/cgroup/cpuacct/cpuacct.usage_sys)" + else printf "cgroup memory paths not found. Neither cgroup v2 controller file nor cgroup v1 memory directory detected.\n" fi From c1d6915ef5fe4940f1930a735b0158dba67ce842 Mon Sep 17 00:00:00 2001 From: Brice Dutheil Date: Tue, 10 Jun 2025 11:41:00 +0200 Subject: [PATCH 3/5] chore(ci): Make script executable --- .gitlab/cgroup-report.sh | 0 1 file changed, 0 insertions(+), 0 deletions(-) mode change 100644 => 100755 .gitlab/cgroup-report.sh diff --git a/.gitlab/cgroup-report.sh b/.gitlab/cgroup-report.sh old mode 100644 new mode 100755 From bcde6bba50fde4ea945020bf39ed49e0fad757ff Mon Sep 17 00:00:00 2001 From: Brice Dutheil Date: Tue, 10 Jun 2025 13:32:07 +0200 Subject: [PATCH 4/5] build(ci): Print RAM first --- .gitlab/cgroup-report.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.gitlab/cgroup-report.sh b/.gitlab/cgroup-report.sh index 382fd6a0a3e..2ddedac326b 100755 --- a/.gitlab/cgroup-report.sh +++ b/.gitlab/cgroup-report.sh @@ -24,6 +24,8 @@ cat_file() { } # Show cgroup memory usage +print_metric "RAM memory" "$( (grep MemTotal /proc/meminfo | tr -s ' ' | cut -d ' ' -f 2) 2>/dev/null || echo 'not found')" + if [ -f /sys/fs/cgroup/cgroup.controllers ]; then # cgroup v2 print_metric "cgroup v2 memory.peak" "$(cat_file /sys/fs/cgroup/memory.peak)" @@ -79,4 +81,3 @@ else printf "cgroup memory paths not found. Neither cgroup v2 controller file nor cgroup v1 memory directory detected.\n" fi -print_metric "ram memory" "$( (grep MemTotal /proc/meminfo | tr -s ' ' | cut -d ' ' -f 2) 2>/dev/null || echo 'not found')" From b3bc64ab37d0d0d78dc19b953431ea4179a926df Mon Sep 17 00:00:00 2001 From: Brice Dutheil Date: Tue, 10 Jun 2025 12:13:12 +0200 Subject: [PATCH 5/5] build(ci): Move cgroup info to .gradle_build --- .gitlab-ci.yml | 14 +++++++++++++- .gitlab/{cgroup-report.sh => cgroup-info.sh} | 0 2 files changed, 13 insertions(+), 1 deletion(-) rename .gitlab/{cgroup-report.sh => cgroup-info.sh} (100%) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index a1158912fd6..75c6d12dc46 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -96,6 +96,12 @@ default: - ONE_INDEXED_NODE_INDEX=${CI_NODE_INDEX:-1}; export NORMALIZED_NODE_INDEX=$((ONE_INDEXED_NODE_INDEX - 1)) - echo "NORMALIZED_NODE_TOTAL=${NORMALIZED_NODE_TOTAL}, NORMALIZED_NODE_INDEX=$NORMALIZED_NODE_INDEX" +.cgroup_info: &cgroup_info + - source .gitlab/gitlab-utils.sh + - gitlab_section_start "cgroup-info" "cgroup info" + - .gitlab/cgroup-info.sh + - gitlab_section_end "cgroup-info" + .gradle_build: &gradle_build image: ghcr.io/datadog/dd-trace-java-docker-build:${BUILDER_IMAGE_VERSION_PREFIX}base stage: build @@ -141,6 +147,8 @@ default: - mv .gradle-copy .gradle - ls -la - gitlab_section_end "gradle-dance" + after_script: + - *cgroup_info build: extends: .gradle_build @@ -244,6 +252,7 @@ test_published_artifacts: - export GRADLE_OPTS="-Dorg.gradle.jvmargs='-Xmx1G -Xms1G -XX:ErrorFile=/tmp/hs_err_pid%p.log -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=/tmp'" - ./gradlew check --info $GRADLE_ARGS after_script: + - *cgroup_info - source .gitlab/gitlab-utils.sh - gitlab_section_start "collect-reports" "Collecting reports" - .circleci/collect_reports.sh @@ -262,6 +271,7 @@ test_published_artifacts: script: - ./gradlew $GRADLE_TARGET -PskipTests -PrunBuildSrcTests -PskipSpotless -PtaskPartitionCount=$NORMALIZED_NODE_TOTAL -PtaskPartition=$NORMALIZED_NODE_INDEX $GRADLE_ARGS after_script: + - *cgroup_info - source .gitlab/gitlab-utils.sh - gitlab_section_start "collect-reports" "Collecting reports" - .circleci/collect_reports.sh --destination ./check_reports --move @@ -322,6 +332,7 @@ muzzle: - split --number=l/$NORMALIZED_NODE_TOTAL --suffix-length=1 --numeric-suffixes sortedMuzzleTasks muzzleSplit - ./gradlew `cat muzzleSplit${NORMALIZED_NODE_INDEX} | xargs` $GRADLE_ARGS after_script: + - *cgroup_info - source .gitlab/gitlab-utils.sh - gitlab_section_start "collect-reports" "Collecting reports" - .circleci/collect_reports.sh @@ -342,6 +353,7 @@ muzzle-dep-report: - export SKIP_BUILDSCAN="true" - ./gradlew generateMuzzleReport muzzleInstrumentationReport $GRADLE_ARGS after_script: + - *cgroup_info - .circleci/collect_muzzle_deps.sh artifacts: when: always @@ -401,13 +413,13 @@ muzzle-dep-report: after_script: - *restore_pretest_env - *set_datadog_api_keys + - *cgroup_info - source .gitlab/gitlab-utils.sh - gitlab_section_start "collect-reports" "Collecting reports" - .circleci/collect_reports.sh - if [ "$PROFILE_TESTS" == "true" ]; then .circleci/collect_profiles.sh; fi - .circleci/collect_results.sh - .circleci/upload_ciapp.sh $CACHE_TYPE $testJvm - - .gitlab/cgroup-report.sh - gitlab_section_end "collect-reports" - URL_ENCODED_JOB_NAME=$(jq -rn --arg x "$CI_JOB_NAME" '$x|@uri') - echo -e "${TEXT_BOLD}${TEXT_YELLOW}See test results in Datadog:${TEXT_CLEAR} https://app.datadoghq.com/ci/test/runs?query=test_level%3Atest%20%40test.service%3Add-trace-java%20%40ci.pipeline.id%3A${CI_PIPELINE_ID}%20%40ci.job.name%3A%22${URL_ENCODED_JOB_NAME}%22" diff --git a/.gitlab/cgroup-report.sh b/.gitlab/cgroup-info.sh similarity index 100% rename from .gitlab/cgroup-report.sh rename to .gitlab/cgroup-info.sh