Skip to content

Commit

Permalink
Improve events when max total nodes of the cluster is reached.
Browse files Browse the repository at this point in the history
- log cluster wide event - previous event would never get fired because
  the estimators would already cap the options they generate and
additionally it would fire once and events are kept only for some time
- log per pod event explaining why the scale up is not triggered
  (previously it would either get no scale up because no matching group
or it would not get an event at all)

This required adding a list of pods that were unschedulable to the
status in case when the max total nodes were reached.
  • Loading branch information
jbtk committed Feb 12, 2025
1 parent cf115af commit cbed430
Show file tree
Hide file tree
Showing 3 changed files with 64 additions and 25 deletions.
55 changes: 33 additions & 22 deletions cluster-autoscaler/core/static_autoscaler.go
Original file line number Diff line number Diff line change
Expand Up @@ -132,22 +132,22 @@ func (callbacks *staticAutoscalerProcessorCallbacks) reset() {

// NewStaticAutoscaler creates an instance of Autoscaler filled with provided parameters
func NewStaticAutoscaler(
opts config.AutoscalingOptions,
fwHandle *framework.Handle,
clusterSnapshot clustersnapshot.ClusterSnapshot,
autoscalingKubeClients *context.AutoscalingKubeClients,
processors *ca_processors.AutoscalingProcessors,
loopStartNotifier *loopstart.ObserversList,
cloudProvider cloudprovider.CloudProvider,
expanderStrategy expander.Strategy,
estimatorBuilder estimator.EstimatorBuilder,
backoff backoff.Backoff,
debuggingSnapshotter debuggingsnapshot.DebuggingSnapshotter,
remainingPdbTracker pdb.RemainingPdbTracker,
scaleUpOrchestrator scaleup.Orchestrator,
deleteOptions options.NodeDeleteOptions,
drainabilityRules rules.Rules,
draProvider *draprovider.Provider) *StaticAutoscaler {
opts config.AutoscalingOptions,
fwHandle *framework.Handle,
clusterSnapshot clustersnapshot.ClusterSnapshot,
autoscalingKubeClients *context.AutoscalingKubeClients,
processors *ca_processors.AutoscalingProcessors,
loopStartNotifier *loopstart.ObserversList,
cloudProvider cloudprovider.CloudProvider,
expanderStrategy expander.Strategy,
estimatorBuilder estimator.EstimatorBuilder,
backoff backoff.Backoff,
debuggingSnapshotter debuggingsnapshot.DebuggingSnapshotter,
remainingPdbTracker pdb.RemainingPdbTracker,
scaleUpOrchestrator scaleup.Orchestrator,
deleteOptions options.NodeDeleteOptions,
drainabilityRules rules.Rules,
draProvider *draprovider.Provider) *StaticAutoscaler {

klog.V(4).Infof("Creating new static autoscaler with opts: %v", opts)

Expand Down Expand Up @@ -528,7 +528,18 @@ func (a *StaticAutoscaler) RunOnce(currentTime time.Time) caerrors.AutoscalerErr
} else if a.MaxNodesTotal > 0 && len(readyNodes) >= a.MaxNodesTotal {
scaleUpStatus.Result = status.ScaleUpLimitedByMaxNodesTotal
klog.Warningf("Max total nodes in cluster reached: %v. Current number of ready nodes: %v", a.MaxNodesTotal, len(readyNodes))
autoscalingContext.LogRecorder.Eventf(apiv1.EventTypeWarning, "MaxNodesTotalReached",
"Max total nodes in cluster reached: %v", autoscalingContext.MaxNodesTotal)
shouldScaleUp = false

noScaleUpInfoForPods := []status.NoScaleUpInfo{}
for _, pod := range unschedulablePodsToHelp {
noScaleUpInfo := status.NoScaleUpInfo{
Pod: pod,
}
noScaleUpInfoForPods = append(noScaleUpInfoForPods, noScaleUpInfo)
}
scaleUpStatus.PodsRemainUnschedulable = noScaleUpInfoForPods
} else if len(a.BypassedSchedulers) == 0 && allPodsAreNew(unschedulablePodsToHelp, currentTime) {
// The assumption here is that these pods have been created very recently and probably there
// is more pods to come. In theory we could check the newest pod time but then if pod were created
Expand Down Expand Up @@ -597,7 +608,7 @@ func (a *StaticAutoscaler) RunOnce(currentTime time.Time) caerrors.AutoscalerErr

scaleDownInCooldown := a.isScaleDownInCooldown(currentTime, scaleDownCandidates)
klog.V(4).Infof("Scale down status: lastScaleUpTime=%s lastScaleDownDeleteTime=%v "+
"lastScaleDownFailTime=%s scaleDownForbidden=%v scaleDownInCooldown=%v",
"lastScaleDownFailTime=%s scaleDownForbidden=%v scaleDownInCooldown=%v",
a.lastScaleUpTime, a.lastScaleDownDeleteTime, a.lastScaleDownFailTime,
a.processorCallbacks.disableScaleDownForLoop, scaleDownInCooldown)
metrics.UpdateScaleDownInCooldown(scaleDownInCooldown)
Expand Down Expand Up @@ -636,7 +647,7 @@ func (a *StaticAutoscaler) RunOnce(currentTime time.Time) caerrors.AutoscalerErr
}

if scaleDownStatus.Result == scaledownstatus.ScaleDownNoNodeDeleted &&
a.AutoscalingContext.AutoscalingOptions.MaxBulkSoftTaintCount != 0 {
a.AutoscalingContext.AutoscalingOptions.MaxBulkSoftTaintCount != 0 {
taintableNodes := a.scaleDownPlanner.UnneededNodes()

// Make sure we are only cleaning taints from selected node groups.
Expand Down Expand Up @@ -706,9 +717,9 @@ func (a *StaticAutoscaler) isScaleDownInCooldown(currentTime time.Time, scaleDow
return scaleDownInCooldown
}
return scaleDownInCooldown ||
a.lastScaleUpTime.Add(a.ScaleDownDelayAfterAdd).After(currentTime) ||
a.lastScaleDownFailTime.Add(a.ScaleDownDelayAfterFailure).After(currentTime) ||
a.lastScaleDownDeleteTime.Add(a.ScaleDownDelayAfterDelete).After(currentTime)
a.lastScaleUpTime.Add(a.ScaleDownDelayAfterAdd).After(currentTime) ||
a.lastScaleDownFailTime.Add(a.ScaleDownDelayAfterFailure).After(currentTime) ||
a.lastScaleDownDeleteTime.Add(a.ScaleDownDelayAfterDelete).After(currentTime)
}

// Sets the target size of node groups to the current number of nodes in them
Expand Down Expand Up @@ -745,7 +756,7 @@ func fixNodeGroupSize(context *context.AutoscalingContext, clusterStateRegistry
// removeOldUnregisteredNodes removes unregistered nodes if needed. Returns true
// if anything was removed and error if such occurred.
func (a *StaticAutoscaler) removeOldUnregisteredNodes(allUnregisteredNodes []clusterstate.UnregisteredNode,
csr *clusterstate.ClusterStateRegistry, currentTime time.Time, logRecorder *utils.LogEventRecorder) (bool, error) {
csr *clusterstate.ClusterStateRegistry, currentTime time.Time, logRecorder *utils.LogEventRecorder) (bool, error) {

unregisteredNodesToRemove, err := a.oldUnregisteredNodes(allUnregisteredNodes, csr, currentTime)
if err != nil {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ func (p *EventingScaleUpStatusProcessor) Process(context *context.AutoscalingCon
for _, noScaleUpInfo := range status.PodsRemainUnschedulable {
context.Recorder.Event(noScaleUpInfo.Pod, apiv1.EventTypeNormal, "NotTriggerScaleUp",
fmt.Sprintf("pod didn't trigger scale-up: %s",
ReasonsMessage(noScaleUpInfo, consideredNodeGroupsMap)))
ReasonsMessage(status.Result, noScaleUpInfo, consideredNodeGroupsMap)))
}
} else {
klog.V(4).Infof("Skipping event processing for unschedulable pods since there is a" +
Expand All @@ -60,7 +60,11 @@ func (p *EventingScaleUpStatusProcessor) CleanUp() {
}

// ReasonsMessage aggregates reasons from NoScaleUpInfos.
func ReasonsMessage(noScaleUpInfo NoScaleUpInfo, consideredNodeGroups map[string]cloudprovider.NodeGroup) string {
func ReasonsMessage(scaleUpStatus ScaleUpResult, noScaleUpInfo NoScaleUpInfo, consideredNodeGroups map[string]cloudprovider.NodeGroup) string {
if scaleUpStatus == ScaleUpLimitedByMaxNodesTotal {
return "max total nodes in cluster reached"
}

messages := []string{}
aggregated := map[string]int{}
for nodeGroupId, reasons := range noScaleUpInfo.RejectedNodeGroups {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,21 @@ func TestEventingScaleUpStatusProcessor(t *testing.T) {
expectedTriggered: 0,
expectedNoTriggered: 0,
},
{
caseName: "No scale up; max total nodes in cluster reached",
state: &ScaleUpStatus{
Result: ScaleUpLimitedByMaxNodesTotal,
ScaleUpInfos: []nodegroupset.ScaleUpInfo{{}},
PodsTriggeredScaleUp: []*apiv1.Pod{},
PodsRemainUnschedulable: []NoScaleUpInfo{
{Pod: p1},
{Pod: p2},
{Pod: p3},
},
},
expectedTriggered: 0,
expectedNoTriggered: 3,
},
}

for _, tc := range testCases {
Expand Down Expand Up @@ -166,9 +181,18 @@ func TestReasonsMessage(t *testing.T) {
"2 max limit reached",
"1 not ready",
}
result := ReasonsMessage(NoScaleUpInfo{nil, rejected, skipped}, considered)
result := ReasonsMessage(ScaleUpNoOptionsAvailable, NoScaleUpInfo{nil, rejected, skipped}, considered)

for _, part := range expected {
assert.Contains(t, result, part)
}
}

func TestReasonsMessageWhenScaleUpLimitedByMaxNodesTotal(t *testing.T) {
considered := map[string]cloudprovider.NodeGroup{}
noScaleUpInfo := NoScaleUpInfo{
Pod: nil,
}
result := ReasonsMessage(ScaleUpLimitedByMaxNodesTotal, noScaleUpInfo, considered)
assert.Contains(t, result, "max total nodes in cluster reached")
}

0 comments on commit cbed430

Please sign in to comment.