Skip to content

Commit a9e684f

Browse files
committed
also throttle on load
1 parent e278f2f commit a9e684f

File tree

1 file changed

+18
-5
lines changed

1 file changed

+18
-5
lines changed

test/common/run-tests

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -499,6 +499,14 @@ def run(opts: CliOpts, image: str) -> int:
499499
# Safety check: ensure all tests can fit in available RAM
500500
available_ram_gb = get_available_memory_gb()
501501
usable_ram_gb = available_ram_gb - SYSTEM_RESERVE_GB
502+
cpu_count = os.cpu_count()
503+
if cpu_count is None:
504+
raise RuntimeError("Cannot determine CPU count (os.cpu_count() returned None)")
505+
max_load = cpu_count * 1.0
506+
current_load = os.getloadavg()[0]
507+
logging.info("System: %d CPUs, load %.2f, %.1f GB RAM total, %.1f GB reserved, %.1f GB usable, %d ND tests, %d destructive tests",
508+
cpu_count, current_load, available_ram_gb, SYSTEM_RESERVE_GB, usable_ram_gb,
509+
nondestructive_tests_len, destructive_tests_len)
502510
all_tests = nondestructive_tests + destructive_tests
503511
if all_tests:
504512
max_test_cost = max(t.cost for t in all_tests)
@@ -619,26 +627,31 @@ def run(opts: CliOpts, image: str) -> int:
619627
return sum(len(getattr(t.__class__, "provision", None) or {"machine": {}})
620628
for t in running_tests if not t.nondestructive)
621629

622-
# Start destructive tests if we have capacity based on available RAM
630+
# Start destructive tests if we have capacity based on available RAM and system load
623631
while destructive_tests:
624632
available_ram_gb = get_available_memory_gb()
633+
current_load = os.getloadavg()[0]
625634
next_test = destructive_tests[0]
626635
current_cost = running_cost()
627636
usable_ram = available_ram_gb - SYSTEM_RESERVE_GB
628637
free_ram = usable_ram - current_cost
629638
num_vms = count_running_vms()
630639

631-
logging.info("Scheduler: %.1f GB avail, %.1f GB reserved, %.1f GB usable, %.1f GB used by %d tests, "
632-
"%.1f GB free, %d VMs running, next test %s needs %.1f GB",
633-
available_ram_gb, SYSTEM_RESERVE_GB, usable_ram, current_cost,
640+
logging.info("Scheduler: load %.2f (max %.1f), %.1f GB avail, %.1f GB reserved, %.1f GB usable, "
641+
"%.1f GB used by %d tests, %.1f GB free, %d VMs running, next test %s needs %.1f GB",
642+
current_load, max_load, available_ram_gb, SYSTEM_RESERVE_GB, usable_ram, current_cost,
634643
len(running_tests), free_ram, num_vms, next_test, next_test.cost)
635644

645+
# Check system load first - high load means CPU starvation
646+
if current_load > max_load:
647+
logging.info("NOT starting %s: high load (%.2f > %.1f max)", next_test, current_load, max_load)
648+
break
636649
# Check available RAM
637650
if current_cost + next_test.cost > usable_ram:
638651
logging.info("NOT starting %s: insufficient RAM (would use %.1f GB, only %.1f GB usable)",
639652
next_test, current_cost + next_test.cost, usable_ram)
640653
break
641-
# RAM check passed, start the test
654+
# Load and RAM checks passed, start the test
642655
test = destructive_tests.pop(0)
643656
logging.info("STARTING %s (%.1f GB), will have %d tests using %.1f GB total",
644657
test, test.cost, len(running_tests) + 1, current_cost + test.cost)

0 commit comments

Comments
 (0)