Skip to content

Commit

Permalink
Merge pull request #7496 from PBundyra/noretry-flag
Browse files Browse the repository at this point in the history
Introduce noRetry Parameter for checkcapacity ProvisioningRequest
  • Loading branch information
k8s-ci-robot authored Nov 25, 2024
2 parents 3080b95 + 14067b7 commit 60a35bb
Show file tree
Hide file tree
Showing 3 changed files with 41 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,14 @@ import (
ca_processors "k8s.io/autoscaler/cluster-autoscaler/processors"
)

const (
// NoRetryParameterKey is a a key for ProvReq's Parameters that describes
// if ProvisioningRequest should be retried in case CA cannot provision it.
// Supported values are "true" and "false" - by default ProvisioningRequests are always retried.
// Currently supported only for checkcapacity class.
NoRetryParameterKey = "noRetry"
)

type checkCapacityProvClass struct {
context *context.AutoscalingContext
client *provreqclient.ProvisioningRequestClient
Expand Down Expand Up @@ -139,7 +147,16 @@ func (o *checkCapacityProvClass) checkcapacity(unschedulablePods []*apiv1.Pod, p
err, cleanupErr := clustersnapshot.WithForkedSnapshot(o.context.ClusterSnapshot, func() (bool, error) {
st, _, err := o.schedulingSimulator.TrySchedulePods(o.context.ClusterSnapshot, unschedulablePods, scheduling.ScheduleAnywhere, true)
if len(st) < len(unschedulablePods) || err != nil {
conditions.AddOrUpdateCondition(provReq, v1.Provisioned, metav1.ConditionFalse, conditions.CapacityIsNotFoundReason, "Capacity is not found, CA will try to find it later.", metav1.Now())
if noRetry, ok := provReq.Spec.Parameters[NoRetryParameterKey]; ok && noRetry == "true" {
// Failed=true condition triggers retry in Kueue. Otherwise ProvisioningRequest with Provisioned=Failed
// condition block capacity in Kueue even if it's in the middle of backoff waiting time.
conditions.AddOrUpdateCondition(provReq, v1.Failed, metav1.ConditionTrue, conditions.CapacityIsNotFoundReason, "CA could not find requested capacity", metav1.Now())
} else {
if noRetry, ok := provReq.Spec.Parameters[NoRetryParameterKey]; ok && noRetry != "false" {
klog.Errorf("Ignoring Parameter %v with invalid value: %v in ProvisioningRequest: %v. Supported values are: \"true\", \"false\"", NoRetryParameterKey, noRetry, provReq.Name)
}
conditions.AddOrUpdateCondition(provReq, v1.Provisioned, metav1.ConditionFalse, conditions.CapacityIsNotFoundReason, "Capacity is not found, CA will try to find it later.", metav1.Now())
}
capacityAvailable = false
} else {
conditions.AddOrUpdateCondition(provReq, v1.Provisioned, metav1.ConditionTrue, conditions.CapacityIsFoundReason, conditions.CapacityIsFoundMsg, metav1.Now())
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,7 @@ func TestScaleUp(t *testing.T) {
batchTimebox time.Duration
numProvisionedTrue int
numProvisionedFalse int
numFailedTrue int
}{
{
name: "no ProvisioningRequests",
Expand Down Expand Up @@ -242,6 +243,15 @@ func TestScaleUp(t *testing.T) {
provReqToScaleUp: newCheckCapacityCpuProvReq,
scaleUpResult: status.ScaleUpSuccessful,
},
{
name: "impossible check-capacity, with noRetry parameter",
provReqs: []*provreqwrapper.ProvisioningRequest{
impossibleCheckCapacityReq.CopyWithParameters(map[string]v1.Parameter{"noRetry": "true"}),
},
provReqToScaleUp: impossibleCheckCapacityReq,
scaleUpResult: status.ScaleUpNoOptionsAvailable,
numFailedTrue: 1,
},
{
name: "some capacity is pre-booked, atomic scale-up not needed",
provReqs: []*provreqwrapper.ProvisioningRequest{bookedCapacityProvReq, atomicScaleUpProvReq},
Expand Down Expand Up @@ -438,6 +448,7 @@ func TestScaleUp(t *testing.T) {
provReqsAfterScaleUp, err := client.ProvisioningRequestsNoCache()
assert.NoError(t, err)
assert.Equal(t, len(tc.provReqs), len(provReqsAfterScaleUp))
assert.Equal(t, tc.numFailedTrue, NumProvisioningRequestsWithCondition(provReqsAfterScaleUp, v1.Failed, metav1.ConditionTrue))

if tc.batchProcessing {
// Since batch processing returns aggregated result, we need to check the number of provisioned requests which have the provisioned condition.
Expand Down
12 changes: 12 additions & 0 deletions cluster-autoscaler/provisioningrequest/provreqwrapper/testutils.go
Original file line number Diff line number Diff line change
Expand Up @@ -159,3 +159,15 @@ func BuildTestPods(namespace, name string, podCount int) []*apiv1.Pod {
}
return pods
}

// CopyWithParameters makes a deep copy of embedded ProvReq and sets its CopyWithParameters
func (pr *ProvisioningRequest) CopyWithParameters(params map[string]v1.Parameter) *ProvisioningRequest {
prCopy := pr.DeepCopy()
if prCopy.Spec.Parameters == nil {
prCopy.Spec.Parameters = make(map[string]v1.Parameter, len(params))
}
for key, val := range params {
prCopy.Spec.Parameters[key] = val
}
return &ProvisioningRequest{prCopy, pr.PodTemplates}
}

0 comments on commit 60a35bb

Please sign in to comment.