Skip to content

Commit 86ee2b7

Browse files
committed
Improve logging when the cluster reaches max nodes total.
- add autoscaling status to reflect that - change the log severity to warning as this means that autoscaler will not be fully functional (in praticular scaling up will not work) - fix the scale up enforcer logic not to skip the max nodes reached logging point
1 parent 29b611d commit 86ee2b7

File tree

2 files changed

+13
-6
lines changed

2 files changed

+13
-6
lines changed

cluster-autoscaler/core/static_autoscaler.go

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -519,15 +519,17 @@ func (a *StaticAutoscaler) RunOnce(currentTime time.Time) caerrors.AutoscalerErr
519519
return false, nil
520520
}
521521

522-
forceScaleUp := a.processors.ScaleUpEnforcer.ShouldForceScaleUp(unschedulablePodsToHelp)
522+
shouldScaleUp := true
523523

524524
if len(unschedulablePodsToHelp) == 0 {
525525
scaleUpStatus.Result = status.ScaleUpNotNeeded
526526
klog.V(1).Info("No unschedulable pods")
527-
} else if a.MaxNodesTotal > 0 && len(readyNodes) >= a.MaxNodesTotal && !forceScaleUp {
528-
scaleUpStatus.Result = status.ScaleUpNoOptionsAvailable
529-
klog.V(1).Infof("Max total nodes in cluster reached: %v. Current number of ready nodes: %v", a.MaxNodesTotal, len(readyNodes))
530-
} else if len(a.BypassedSchedulers) == 0 && !forceScaleUp && allPodsAreNew(unschedulablePodsToHelp, currentTime) {
527+
shouldScaleUp = false
528+
} else if a.MaxNodesTotal > 0 && len(readyNodes) >= a.MaxNodesTotal {
529+
scaleUpStatus.Result = status.ScaleUpLimitedByMaxNodesTotal
530+
klog.Warningf("Max total nodes in cluster reached: %v. Current number of ready nodes: %v", a.MaxNodesTotal, len(readyNodes))
531+
shouldScaleUp = false
532+
} else if len(a.BypassedSchedulers) == 0 && allPodsAreNew(unschedulablePodsToHelp, currentTime) {
531533
// The assumption here is that these pods have been created very recently and probably there
532534
// is more pods to come. In theory we could check the newest pod time but then if pod were created
533535
// slowly but at the pace of 1 every 2 seconds then no scale up would be triggered for long time.
@@ -537,7 +539,10 @@ func (a *StaticAutoscaler) RunOnce(currentTime time.Time) caerrors.AutoscalerErr
537539
a.processorCallbacks.DisableScaleDownForLoop()
538540
scaleUpStatus.Result = status.ScaleUpInCooldown
539541
klog.V(1).Info("Unschedulable pods are very new, waiting one iteration for more")
540-
} else {
542+
shouldScaleUp = false
543+
}
544+
545+
if shouldScaleUp || a.processors.ScaleUpEnforcer.ShouldForceScaleUp(unschedulablePodsToHelp) {
541546
scaleUpStart := preScaleUp()
542547
scaleUpStatus, typedErr = a.scaleUpOrchestrator.ScaleUp(unschedulablePodsToHelp, readyNodes, daemonsets, nodeInfosForGroups, false)
543548
if exit, err := postScaleUp(scaleUpStart); exit {

cluster-autoscaler/processors/status/scale_up_status_processor.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,8 @@ const (
6666
ScaleUpNotTried
6767
// ScaleUpInCooldown - the scale up wasn't even attempted, because it's in a cooldown state (it's suspended for a scheduled period of time).
6868
ScaleUpInCooldown
69+
// ScaleUpLimitedByMaxNodesTotal - the scale up wasn't attempted, because the cluster reached max nodes total
70+
ScaleUpLimitedByMaxNodesTotal
6971
)
7072

7173
// WasSuccessful returns true if the scale-up was successful.

0 commit comments

Comments
 (0)