Skip to content

Commit 22416d7

Browse files
committed
Move ci-kubernetes-e2e-gce-scale-resource-size to experiments
Remembered that we have experimental periodic jobs and dashboard. This might be a better place.
1 parent fbbc472 commit 22416d7

File tree

2 files changed

+109
-110
lines changed

2 files changed

+109
-110
lines changed

config/jobs/kubernetes/sig-scalability/sig-scalability-experimental-periodic-jobs.yaml

Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -456,3 +456,112 @@ periodics:
456456
limits:
457457
cpu: 2
458458
memory: "2Gi"
459+
# Experimental tests for larger resource size as proposed in https://github.com/kubernetes/kubernetes/issues/134375
460+
- cron: '1 17 2-31/2 * *' # Run on even days at 9:01PST (17:01 UTC)
461+
name: ci-kubernetes-e2e-gce-scale-resource-size
462+
tags:
463+
- "perfDashPrefix: gce-5000Nodes-ResourceSize"
464+
- "perfDashBuildsCount: 270"
465+
- "perfDashJobType: performance"
466+
cluster: k8s-infra-prow-build
467+
labels:
468+
preset-service-account: "true"
469+
preset-k8s-ssh: "true"
470+
preset-e2e-scalability-common: "true"
471+
preset-e2e-scalability-periodics: "true"
472+
preset-e2e-scalability-periodics-master: "true"
473+
decorate: true
474+
decoration_config:
475+
timeout: 450m
476+
extra_refs:
477+
- org: kubernetes
478+
repo: kubernetes
479+
base_ref: master
480+
path_alias: k8s.io/kubernetes
481+
- org: kubernetes
482+
repo: perf-tests
483+
base_ref: master
484+
path_alias: k8s.io/perf-tests
485+
annotations:
486+
testgrid-dashboards: sig-scalability-experiments
487+
testgrid-tab-name: gce-master-scale-resource-size
488+
description: "Experimental tests for larger resource size as proposed in https://github.com/kubernetes/kubernetes/issues/134375"
489+
spec:
490+
volumes:
491+
- name: cache-secret
492+
secret:
493+
secretName: scale-pull-cache-token
494+
containers:
495+
- image: gcr.io/k8s-staging-test-infra/kubekins-e2e:v20250925-95b5a2c7a5-master
496+
volumeMounts:
497+
- name: cache-secret
498+
readOnly: true
499+
mountPath: /etc/registry-auth
500+
env:
501+
- name: KUBERNETES_REGISTRY_PULL_THROUGH_HOST
502+
value: https://us-central1-docker.pkg.dev/v2/k8s-infra-e2e-scale-5k-project/k8s-5k-scale-cache/
503+
- name: KUBERNETES_REGISTRY_PULL_THROUGH_BASIC_AUTH_TOKEN_PATH
504+
value: /etc/registry-auth/token
505+
command:
506+
- runner.sh
507+
- /workspace/scenarios/kubernetes_e2e.py
508+
args:
509+
- --cluster=gce-scale-cluster
510+
- --env=HEAPSTER_MACHINE_TYPE=e2-standard-32
511+
# TODO(mborsz): Adjust or remove this change once we understand coredns
512+
# memory usage regression.
513+
- --env=KUBE_DNS_MEMORY_LIMIT=300Mi
514+
- --extract=ci/fast/latest-fast
515+
- --gcp-nodes=5000
516+
- --gcp-project-type=scalability-scale-project
517+
- --gcp-zone=us-east1-b
518+
- --provider=gce
519+
- --metadata-sources=cl2-metadata.json
520+
- --env=CL2_LOAD_TEST_THROUGHPUT=50
521+
- --env=CL2_DELETE_TEST_THROUGHPUT=50
522+
- --env=CL2_RATE_LIMIT_POD_CREATION=false
523+
- --env=KUBE_CONTROLLER_MANAGER_TEST_ARGS=--authorization-always-allow-paths=/healthz,/readyz,/livez,/metrics --endpointslice-updates-batch-period=500ms --endpoint-updates-batch-period=500ms
524+
# Overrides CONTROLLER_MANAGER_TEST_ARGS from preset-e2e-scalability-periodics.
525+
- --env=CONTROLLER_MANAGER_TEST_ARGS=--authorization-always-allow-paths=/healthz,/readyz,/livez,/metrics --profiling --contention-profiling --kube-api-qps=100 --kube-api-burst=100
526+
# Overrides SCHEDULER_TEST_ARGS from preset-e2e-scalability-periodics.
527+
# TODO(#1311): Clean this up after the experiment - it should allow
528+
# to hugely decrease pod-startup-latency across the whole test.
529+
# Given that individual controllers have separate QPS limits, we allow
530+
# scheduler to keep up with the load from deployment, daemonset and job
531+
# performing pod creations at once.
532+
- --env=SCHEDULER_TEST_ARGS=--authorization-always-allow-paths=/healthz,/readyz,/livez,/metrics --profiling --contention-profiling --kube-api-qps=500 --kube-api-burst=500
533+
# With APF only sum of --max-requests-inflight and --max-mutating-requests-inflight matters, so set --max-mutating-requests-inflight to 0.
534+
- --env=APISERVER_TEST_ARGS=--max-requests-inflight=640 --max-mutating-requests-inflight=0
535+
- --env=CL2_ENABLE_API_AVAILABILITY_MEASUREMENT=true
536+
- --env=CL2_API_AVAILABILITY_PERCENTAGE_THRESHOLD=99.5
537+
- --env=CL2_DAEMONSET_POD_PAYLOAD_SIZE=1024
538+
- --env=CL2_DEPLOYMENT_POD_PAYLOAD_SIZE=1024
539+
- --env=CL2_STATEFULSET_POD_PAYLOAD_SIZE=1024
540+
- --env=CL2_JOB_POD_PAYLOAD_SIZE=1024
541+
- --test=false
542+
- --test-cmd=$GOPATH/src/k8s.io/perf-tests/run-e2e.sh
543+
- --test-cmd-args=cluster-loader2
544+
- --test-cmd-args=--experimental-gcp-snapshot-prometheus-disk=true
545+
- --test-cmd-args=--experimental-prometheus-disk-snapshot-name=$(JOB_NAME)-$(BUILD_ID)
546+
- --test-cmd-args=--experimental-prometheus-snapshot-to-report-dir=true
547+
- --test-cmd-args=--nodes=5000
548+
- --test-cmd-args=--prometheus-scrape-node-exporter
549+
- --test-cmd-args=--provider=gce
550+
- --test-cmd-args=--report-dir=$(ARTIFACTS)
551+
- --test-cmd-args=--testconfig=testing/load/config.yaml
552+
- --test-cmd-args=--testconfig=testing/huge-service/config.yaml
553+
- --test-cmd-args=--testconfig=testing/access-tokens/config.yaml
554+
- --test-cmd-args=--testoverrides=./testing/experiments/enable_restart_count_check.yaml
555+
- --test-cmd-args=--testoverrides=./testing/experiments/ignore_known_gce_container_restarts.yaml
556+
- --test-cmd-args=--testoverrides=./testing/overrides/5000_nodes.yaml
557+
- --test-cmd-name=ClusterLoaderV2
558+
- --timeout=420m
559+
- --use-logexporter
560+
- --logexporter-gcs-path=gs://k8s-infra-scalability-tests-logs/$(JOB_NAME)/$(BUILD_ID)
561+
resources:
562+
requests:
563+
cpu: 6
564+
memory: "16Gi"
565+
limits:
566+
cpu: 6
567+
memory: "16Gi"

config/jobs/kubernetes/sig-scalability/sig-scalability-periodic-jobs.yaml

Lines changed: 0 additions & 110 deletions
Original file line numberDiff line numberDiff line change
@@ -1166,113 +1166,3 @@ periodics:
11661166
limits:
11671167
cpu: 3
11681168
memory: "8Gi"
1169-
1170-
# Exploratory tests for resource size limit as proposed in https://github.com/kubernetes/kubernetes/issues/134375
1171-
- cron: '1 17 2-31/2 * *' # Run on even days at 9:01PST (17:01 UTC)
1172-
name: ci-kubernetes-e2e-gce-scale-resource-size
1173-
tags:
1174-
- "perfDashPrefix: gce-5000Nodes-ResourceSize"
1175-
- "perfDashBuildsCount: 270"
1176-
- "perfDashJobType: performance"
1177-
cluster: k8s-infra-prow-build
1178-
labels:
1179-
preset-service-account: "true"
1180-
preset-k8s-ssh: "true"
1181-
preset-e2e-scalability-common: "true"
1182-
preset-e2e-scalability-periodics: "true"
1183-
preset-e2e-scalability-periodics-master: "true"
1184-
decorate: true
1185-
decoration_config:
1186-
timeout: 450m
1187-
extra_refs:
1188-
- org: kubernetes
1189-
repo: kubernetes
1190-
base_ref: master
1191-
path_alias: k8s.io/kubernetes
1192-
- org: kubernetes
1193-
repo: perf-tests
1194-
base_ref: master
1195-
path_alias: k8s.io/perf-tests
1196-
annotations:
1197-
testgrid-dashboards: sig-scalability-gce, google-gce
1198-
testgrid-tab-name: gce-master-scale-resource-size
1199-
description: "Exploratory tests for resource size limit as proposed in https://github.com/kubernetes/kubernetes/issues/134375"
1200-
spec:
1201-
volumes:
1202-
- name: cache-secret
1203-
secret:
1204-
secretName: scale-pull-cache-token
1205-
containers:
1206-
- image: gcr.io/k8s-staging-test-infra/kubekins-e2e:v20250925-95b5a2c7a5-master
1207-
volumeMounts:
1208-
- name: cache-secret
1209-
readOnly: true
1210-
mountPath: /etc/registry-auth
1211-
env:
1212-
- name: KUBERNETES_REGISTRY_PULL_THROUGH_HOST
1213-
value: https://us-central1-docker.pkg.dev/v2/k8s-infra-e2e-scale-5k-project/k8s-5k-scale-cache/
1214-
- name: KUBERNETES_REGISTRY_PULL_THROUGH_BASIC_AUTH_TOKEN_PATH
1215-
value: /etc/registry-auth/token
1216-
command:
1217-
- runner.sh
1218-
- /workspace/scenarios/kubernetes_e2e.py
1219-
args:
1220-
- --cluster=gce-scale-cluster
1221-
- --env=HEAPSTER_MACHINE_TYPE=e2-standard-32
1222-
# TODO(mborsz): Adjust or remove this change once we understand coredns
1223-
# memory usage regression.
1224-
- --env=KUBE_DNS_MEMORY_LIMIT=300Mi
1225-
- --extract=ci/fast/latest-fast
1226-
- --gcp-nodes=5000
1227-
- --gcp-project-type=scalability-scale-project
1228-
- --gcp-zone=us-east1-b
1229-
- --provider=gce
1230-
- --metadata-sources=cl2-metadata.json
1231-
- --env=CL2_LOAD_TEST_THROUGHPUT=50
1232-
- --env=CL2_DELETE_TEST_THROUGHPUT=50
1233-
- --env=CL2_RATE_LIMIT_POD_CREATION=false
1234-
- --env=KUBE_CONTROLLER_MANAGER_TEST_ARGS=--authorization-always-allow-paths=/healthz,/readyz,/livez,/metrics --endpointslice-updates-batch-period=500ms --endpoint-updates-batch-period=500ms
1235-
# Overrides CONTROLLER_MANAGER_TEST_ARGS from preset-e2e-scalability-periodics.
1236-
- --env=CONTROLLER_MANAGER_TEST_ARGS=--authorization-always-allow-paths=/healthz,/readyz,/livez,/metrics --profiling --contention-profiling --kube-api-qps=100 --kube-api-burst=100
1237-
# Overrides SCHEDULER_TEST_ARGS from preset-e2e-scalability-periodics.
1238-
# TODO(#1311): Clean this up after the experiment - it should allow
1239-
# to hugely decrease pod-startup-latency across the whole test.
1240-
# Given that individual controllers have separate QPS limits, we allow
1241-
# scheduler to keep up with the load from deployment, daemonset and job
1242-
# performing pod creations at once.
1243-
- --env=SCHEDULER_TEST_ARGS=--authorization-always-allow-paths=/healthz,/readyz,/livez,/metrics --profiling --contention-profiling --kube-api-qps=500 --kube-api-burst=500
1244-
# With APF only sum of --max-requests-inflight and --max-mutating-requests-inflight matters, so set --max-mutating-requests-inflight to 0.
1245-
- --env=APISERVER_TEST_ARGS=--max-requests-inflight=640 --max-mutating-requests-inflight=0
1246-
- --env=CL2_ENABLE_API_AVAILABILITY_MEASUREMENT=true
1247-
- --env=CL2_API_AVAILABILITY_PERCENTAGE_THRESHOLD=99.5
1248-
- --env=CL2_DAEMONSET_POD_PAYLOAD_SIZE=1024
1249-
- --env=CL2_DEPLOYMENT_POD_PAYLOAD_SIZE=1024
1250-
- --env=CL2_STATEFULSET_POD_PAYLOAD_SIZE=1024
1251-
- --env=CL2_JOB_POD_PAYLOAD_SIZE=1024
1252-
- --test=false
1253-
- --test-cmd=$GOPATH/src/k8s.io/perf-tests/run-e2e.sh
1254-
- --test-cmd-args=cluster-loader2
1255-
- --test-cmd-args=--experimental-gcp-snapshot-prometheus-disk=true
1256-
- --test-cmd-args=--experimental-prometheus-disk-snapshot-name=$(JOB_NAME)-$(BUILD_ID)
1257-
- --test-cmd-args=--experimental-prometheus-snapshot-to-report-dir=true
1258-
- --test-cmd-args=--nodes=5000
1259-
- --test-cmd-args=--prometheus-scrape-node-exporter
1260-
- --test-cmd-args=--provider=gce
1261-
- --test-cmd-args=--report-dir=$(ARTIFACTS)
1262-
- --test-cmd-args=--testconfig=testing/load/config.yaml
1263-
- --test-cmd-args=--testconfig=testing/huge-service/config.yaml
1264-
- --test-cmd-args=--testconfig=testing/access-tokens/config.yaml
1265-
- --test-cmd-args=--testoverrides=./testing/experiments/enable_restart_count_check.yaml
1266-
- --test-cmd-args=--testoverrides=./testing/experiments/ignore_known_gce_container_restarts.yaml
1267-
- --test-cmd-args=--testoverrides=./testing/overrides/5000_nodes.yaml
1268-
- --test-cmd-name=ClusterLoaderV2
1269-
- --timeout=420m
1270-
- --use-logexporter
1271-
- --logexporter-gcs-path=gs://k8s-infra-scalability-tests-logs/$(JOB_NAME)/$(BUILD_ID)
1272-
resources:
1273-
requests:
1274-
cpu: 6
1275-
memory: "16Gi"
1276-
limits:
1277-
cpu: 6
1278-
memory: "16Gi"

0 commit comments

Comments
 (0)