Skip to content

Show cgroup memory and cpu after run #8946

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Jun 10, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions .gitlab-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,12 @@ default:
- ONE_INDEXED_NODE_INDEX=${CI_NODE_INDEX:-1}; export NORMALIZED_NODE_INDEX=$((ONE_INDEXED_NODE_INDEX - 1))
- echo "NORMALIZED_NODE_TOTAL=${NORMALIZED_NODE_TOTAL}, NORMALIZED_NODE_INDEX=$NORMALIZED_NODE_INDEX"

.cgroup_info: &cgroup_info
- source .gitlab/gitlab-utils.sh
- gitlab_section_start "cgroup-info" "cgroup info"
- .gitlab/cgroup-info.sh
- gitlab_section_end "cgroup-info"

.gradle_build: &gradle_build
image: ghcr.io/datadog/dd-trace-java-docker-build:${BUILDER_IMAGE_VERSION_PREFIX}base
stage: build
Expand Down Expand Up @@ -141,6 +147,8 @@ default:
- mv .gradle-copy .gradle
- ls -la
- gitlab_section_end "gradle-dance"
after_script:
- *cgroup_info

build:
extends: .gradle_build
Expand Down Expand Up @@ -244,6 +252,7 @@ test_published_artifacts:
- export GRADLE_OPTS="-Dorg.gradle.jvmargs='-Xmx1G -Xms1G -XX:ErrorFile=/tmp/hs_err_pid%p.log -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=/tmp'"
- ./gradlew check --info $GRADLE_ARGS
after_script:
- *cgroup_info
- source .gitlab/gitlab-utils.sh
- gitlab_section_start "collect-reports" "Collecting reports"
- .circleci/collect_reports.sh
Expand All @@ -262,6 +271,7 @@ test_published_artifacts:
script:
- ./gradlew $GRADLE_TARGET -PskipTests -PrunBuildSrcTests -PskipSpotless -PtaskPartitionCount=$NORMALIZED_NODE_TOTAL -PtaskPartition=$NORMALIZED_NODE_INDEX $GRADLE_ARGS
after_script:
- *cgroup_info
- source .gitlab/gitlab-utils.sh
- gitlab_section_start "collect-reports" "Collecting reports"
- .circleci/collect_reports.sh --destination ./check_reports --move
Expand Down Expand Up @@ -322,6 +332,7 @@ muzzle:
- split --number=l/$NORMALIZED_NODE_TOTAL --suffix-length=1 --numeric-suffixes sortedMuzzleTasks muzzleSplit
- ./gradlew `cat muzzleSplit${NORMALIZED_NODE_INDEX} | xargs` $GRADLE_ARGS
after_script:
- *cgroup_info
- source .gitlab/gitlab-utils.sh
- gitlab_section_start "collect-reports" "Collecting reports"
- .circleci/collect_reports.sh
Expand All @@ -342,6 +353,7 @@ muzzle-dep-report:
- export SKIP_BUILDSCAN="true"
- ./gradlew generateMuzzleReport muzzleInstrumentationReport $GRADLE_ARGS
after_script:
- *cgroup_info
- .circleci/collect_muzzle_deps.sh
artifacts:
when: always
Expand Down Expand Up @@ -401,6 +413,7 @@ muzzle-dep-report:
after_script:
- *restore_pretest_env
- *set_datadog_api_keys
- *cgroup_info
- source .gitlab/gitlab-utils.sh
- gitlab_section_start "collect-reports" "Collecting reports"
- .circleci/collect_reports.sh
Expand Down
83 changes: 83 additions & 0 deletions .gitlab/cgroup-info.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
#!/usr/bin/env bash

print_metric() {
local label="$1"
local raw_value="$2"
local trimmed_value

# Use read -rd '' to trim leading/trailing IFS whitespace (space, tab, newline)
read -rd '' trimmed_value <<< "$raw_value" || :

# Check if trimmed_value contains a newline character for formatting
if [[ "$trimmed_value" == *$'\n'* ]]; then
local indent=" "
# Using a more robust way to handle potential leading/trailing newlines in raw_value for printf
printf "%-35s :\n" "$label"
printf "%s\n" "$indent${trimmed_value//$'\n'/$'\n'$indent}" # Indent and print the value on new lines
else
printf "%-35s : %s\n" "$label" "$trimmed_value"
fi
}

cat_file() {
cat "$1" 2>/dev/null || echo 'not found'
}

# Show cgroup memory usage
print_metric "RAM memory" "$( (grep MemTotal /proc/meminfo | tr -s ' ' | cut -d ' ' -f 2) 2>/dev/null || echo 'not found')"

if [ -f /sys/fs/cgroup/cgroup.controllers ]; then
# cgroup v2
print_metric "cgroup v2 memory.peak" "$(cat_file /sys/fs/cgroup/memory.peak)"
print_metric "cgroup v2 memory.max" "$(cat_file /sys/fs/cgroup/memory.max)"
print_metric "cgroup v2 memory.high" "$(cat_file /sys/fs/cgroup/memory.high)"
print_metric "cgroup v2 memory.current" "$(cat_file /sys/fs/cgroup/memory.current)"
if [ -f /sys/fs/cgroup/memory.pressure ]; then
print_metric "cgroup v2 memory.pressure" "$(cat_file /sys/fs/cgroup/memory.pressure)"
fi
if [ -f /sys/fs/cgroup/memory.events ]; then
print_metric "cgroup v2 memory.events oom" "$( (grep -E '^oom\\s' /sys/fs/cgroup/memory.events | cut -d' ' -f2) 2>/dev/null || echo 'not found')"
print_metric "cgroup v2 memory.events oom_kill" "$( (grep -E '^oom_kill\\s' /sys/fs/cgroup/memory.events | cut -d' ' -f2) 2>/dev/null || echo 'not found')"
print_metric "cgroup v2 memory.events high" "$( (grep -E '^high\\s' /sys/fs/cgroup/memory.events | cut -d' ' -f2) 2>/dev/null || echo 'not found')"
fi

# CPU metrics
print_metric "cgroup v2 cpu.max" "$(cat_file /sys/fs/cgroup/cpu.max)"
print_metric "cgroup v2 cpu.nr_throttled" "$( (grep -E "^nr_throttled[[:space:]]+" /sys/fs/cgroup/cpu.stat | cut -d' ' -f2) 2>/dev/null || echo 'not found')"
print_metric "cgroup v2 cpu.throttled_usec" "$( (grep -E "^throttled_usec[[:space:]]+" /sys/fs/cgroup/cpu.stat | cut -d' ' -f2) 2>/dev/null || echo 'not found')"
print_metric "cgroup v2 cpu.usage_usec" "$( (grep -E "^usage_usec[[:space:]]+" /sys/fs/cgroup/cpu.stat | cut -d' ' -f2) 2>/dev/null || echo 'not found')"
if [ -f /sys/fs/cgroup/cpu.pressure ]; then # cpu.pressure might not exist on older kernels/setups
print_metric "cgroup v2 cpu.pressure" "$(cat_file /sys/fs/cgroup/cpu.pressure)"
fi

elif [ -d "/sys/fs/cgroup/memory" ]; then # Assuming if memory cgroup v1 exists, cpu might too
# cgroup v1
# Note: In cgroup v1, memory stats are typically found under /sys/fs/cgroup/memory/
# The specific path might vary if inside a nested cgroup.
# This script assumes it's running in a context where /sys/fs/cgroup/memory/ points to the relevant cgroup.
print_metric "cgroup v1 memory.usage_in_bytes" "$(cat_file /sys/fs/cgroup/memory/memory.usage_in_bytes)"
print_metric "cgroup v1 memory.limit_in_bytes" "$(cat_file /sys/fs/cgroup/memory/memory.limit_in_bytes)"
print_metric "cgroup v1 memory.failcnt" "$(cat_file /sys/fs/cgroup/memory/memory.failcnt)"
print_metric "cgroup v1 memory.max_usage_in_bytes" "$(cat_file /sys/fs/cgroup/memory/memory.max_usage_in_bytes)"

# Throttling stats from /sys/fs/cgroup/cpu/cpu.stat
if [ -f /sys/fs/cgroup/cpu/cpu.stat ]; then
print_metric "cgroup v1 cpu.nr_throttled" "$( (grep -E "^nr_throttled[[:space:]]+" /sys/fs/cgroup/cpu/cpu.stat | cut -d' ' -f2) 2>/dev/null || echo 'not found')"
print_metric "cgroup v1 cpu.throttled_time_ns" "$( (grep -E "^throttled_time[[:space:]]+" /sys/fs/cgroup/cpu/cpu.stat | cut -d' ' -f2) 2>/dev/null || echo 'not found')"
else
# Print not found for these specific metrics if cpu.stat is missing, to avoid ambiguity
print_metric "cgroup v1 cpu.nr_throttled" "not found (cpu.stat)"
print_metric "cgroup v1 cpu.throttled_time_ns" "not found (cpu.stat)"
fi
# CPU Quota settings from /sys/fs/cgroup/cpu/
print_metric "cgroup v1 cpu.cfs_period_us" "$(cat_file /sys/fs/cgroup/cpu/cpu.cfs_period_us)"
print_metric "cgroup v1 cpu.cfs_quota_us" "$(cat_file /sys/fs/cgroup/cpu/cpu.cfs_quota_us)"
# CPU usage from /sys/fs/cgroup/cpuacct/ (usually same hierarchy as cpu)
print_metric "cgroup v1 cpuacct.usage_ns" "$(cat_file /sys/fs/cgroup/cpuacct/cpuacct.usage)"
print_metric "cgroup v1 cpuacct.usage_user_ns" "$(cat_file /sys/fs/cgroup/cpuacct/cpuacct.usage_user)"
print_metric "cgroup v1 cpuacct.usage_sys_ns" "$(cat_file /sys/fs/cgroup/cpuacct/cpuacct.usage_sys)"

else
printf "cgroup memory paths not found. Neither cgroup v2 controller file nor cgroup v1 memory directory detected.\n"
fi