Skip to content

Commit 85d8617

Browse files
Fix creation of aw/job with same name in different namespaces
1 parent bbb6871 commit 85d8617

35 files changed

+168
-142
lines changed

doc/usage/tutorial.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -312,7 +312,7 @@ Delete the first `AppWrapper` job.
312312

313313
```bash
314314
$ kubectl delete -f aw-01.yaml
315-
appwrapper.workload.codeflare.dev "0001-aw-generic-deployment-1" deleted
315+
workload.codeflare.dev/appwrapper "0001-aw-generic-deployment-1" deleted
316316
```
317317

318318
Check the pods status of the `AppWrapper` jobs. The new pods from the second `AppWrapper` job: `0002-aw-generic-deployment-2` job should now be deployed and running.

go.mod

+2-2
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,9 @@ require (
1818
k8s.io/client-go v0.26.2
1919
k8s.io/klog/v2 v2.90.1
2020
k8s.io/metrics v0.26.2
21+
k8s.io/utils v0.0.0-20230220204549-a5ecb0141aa5
2122
sigs.k8s.io/custom-metrics-apiserver v0.0.0
23+
sigs.k8s.io/structured-merge-diff/v4 v4.2.3
2224
)
2325

2426
replace sigs.k8s.io/custom-metrics-apiserver => sigs.k8s.io/custom-metrics-apiserver v1.25.1-0.20230306170449-63d8c93851f3
@@ -107,9 +109,7 @@ require (
107109
k8s.io/component-base v0.26.2 // indirect
108110
k8s.io/kms v0.26.2 // indirect
109111
k8s.io/kube-openapi v0.0.0-20230303024457-afdc3dddf62d // indirect
110-
k8s.io/utils v0.0.0-20230220204549-a5ecb0141aa5 // indirect
111112
sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.0.35 // indirect
112113
sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect
113-
sigs.k8s.io/structured-merge-diff/v4 v4.2.3 // indirect
114114
sigs.k8s.io/yaml v1.3.0 // indirect
115115
)

pkg/apis/controller/v1beta1/appwrapper.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ const AppWrapperPlural string = "appwrappers"
2626

2727
// AppWrapperAnnotationKey is the annotation key of Pod to identify
2828
// which AppWrapper it belongs to.
29-
const AppWrapperAnnotationKey = "appwrapper.mcad.ibm.com/appwrapper-name"
29+
const AppWrapperAnnotationKey = "workload.codeflare.dev/appwrapper-name"
3030

3131
// +genclient
3232
// +kubebuilder:object:root=true

pkg/controller/queuejob/queuejob_controller_ex.go

+2-2
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,7 @@ func GetQueueJobKey(obj interface{}) (string, error) {
143143
// UpdateQueueJobStatus was part of pod informer, this is now a method of queuejob_controller file.
144144
// This change is done in an effort to simplify the controller and enable to move to controller runtime.
145145
func (qjm *XController) UpdateQueueJobStatus(queuejob *arbv1.AppWrapper) error {
146-
labelSelector := fmt.Sprintf("%s=%s", "appwrapper.mcad.ibm.com", queuejob.Name)
146+
labelSelector := fmt.Sprintf("%s=%s", "workload.codeflare.dev/appwrapper", queuejob.Name)
147147
pods, errt := qjm.clients.CoreV1().Pods("").List(context.TODO(), metav1.ListOptions{LabelSelector: labelSelector})
148148
if errt != nil {
149149
return errt
@@ -208,7 +208,7 @@ func (qjm *XController) allocatableCapacity() *clusterstateapi.Resource {
208208
klog.Errorf("[allocatableCapacity] Error listing pods %v", err)
209209
}
210210
for _, pod := range podList.Items {
211-
if _, ok := pod.GetLabels()["appwrappers.mcad.ibm.com"]; !ok && pod.Status.Phase != v1.PodFailed && pod.Status.Phase != v1.PodSucceeded {
211+
if _, ok := pod.GetLabels()["workload.codeflare.dev/appwrappers"]; !ok && pod.Status.Phase != v1.PodFailed && pod.Status.Phase != v1.PodSucceeded {
212212
for _, container := range pod.Spec.Containers {
213213
usedResource := clusterstateapi.NewResource(container.Resources.Requests)
214214
capacity.Sub(usedResource)

pkg/controller/queuejobresources/genericresource/genericresource.go

+36-19
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ import (
2121
"encoding/json"
2222
"fmt"
2323
"math"
24+
"math/rand"
2425
"runtime/debug"
2526
"strings"
2627
"time"
@@ -43,8 +44,9 @@ import (
4344
"k8s.io/client-go/restmapper"
4445
)
4546

46-
var appwrapperJobName = "appwrapper.mcad.ibm.com"
47-
var resourceName = "resourceName"
47+
var appwrapperJobLabelName = "workload.codeflare.dev/appwrapper"
48+
var appwrapperJobLabelNamespace = "workload.codeflare.dev/appwrapper-namespace"
49+
var resourceName = "workload.codeflare.dev/resourceName"
4850
var appWrapperKind = arbv1.SchemeGroupVersion.WithKind("AppWrapper")
4951

5052
type GenericResources struct {
@@ -72,6 +74,18 @@ func join(strs ...string) string {
7274
return result
7375
}
7476

77+
78+
func GetRandomString(n int) string {
79+
var letters = []rune("abcdefghijklmnopqrstuvwxyz0123456789")
80+
81+
rand.Seed(time.Now().UnixNano())
82+
b := make([]rune, n)
83+
for i := range b {
84+
b[i] = letters[rand.Intn(len(letters))]
85+
}
86+
return string(b)
87+
}
88+
7589
func (gr *GenericResources) Cleanup(aw *arbv1.AppWrapper, awr *arbv1.AppWrapperGenericResource) (genericResourceName string, groupversionkind *schema.GroupVersionKind, erro error) {
7690
var err error
7791
err = nil
@@ -166,7 +180,7 @@ func (gr *GenericResources) Cleanup(aw *arbv1.AppWrapper, awr *arbv1.AppWrapperG
166180
}
167181

168182
// Get the resource to see if it exists in the AppWrapper namespace
169-
labelSelector := fmt.Sprintf("%s=%s, %s=%s", appwrapperJobName, aw.Name, resourceName, unstruct.GetName())
183+
labelSelector := fmt.Sprintf("%s=%s, %s=%s, %s=%s", appwrapperJobLabelName, aw.Name, appwrapperJobLabelNamespace, aw.Namespace, resourceName, unstruct.GetName())
170184
inEtcd, err := dclient.Resource(rsrc).Namespace(aw.Namespace).List(context.Background(), metav1.ListOptions{LabelSelector: labelSelector})
171185
if err != nil {
172186
return name, gvk, err
@@ -175,8 +189,9 @@ func (gr *GenericResources) Cleanup(aw *arbv1.AppWrapper, awr *arbv1.AppWrapperG
175189
// Check to see if object already exists in etcd, if not, create the object.
176190
if inEtcd != nil || len(inEtcd.Items) > 0 {
177191
newName := name
178-
if len(newName) > 63 {
179-
newName = newName[:63]
192+
if len(newName) > 60 {
193+
newName = newName[:60]
194+
newName += GetRandomString(3)
180195
}
181196

182197
err = deleteObject(namespaced, namespace, newName, rsrc, dclient)
@@ -187,7 +202,7 @@ func (gr *GenericResources) Cleanup(aw *arbv1.AppWrapper, awr *arbv1.AppWrapperG
187202
return name, gvk, err
188203
}
189204
} else {
190-
klog.Warningf("[Cleanup] %s/%s not found using label selector: %s.\n", name, namespace, labelSelector)
205+
klog.Warningf("[Cleanup] %s/%s not found using label selector: %s.\n", namespace, name, labelSelector)
191206
}
192207

193208
return name, gvk, err
@@ -297,18 +312,19 @@ func (gr *GenericResources) SyncQueueJob(aw *arbv1.AppWrapper, awr *arbv1.AppWra
297312
} else {
298313
labels = unstruct.GetLabels()
299314
}
300-
labels[appwrapperJobName] = aw.Name
315+
labels[appwrapperJobLabelName] = aw.Name
316+
labels[appwrapperJobLabelNamespace] = aw.Namespace
301317
labels[resourceName] = unstruct.GetName()
302318
unstruct.SetLabels(labels)
303319

304320
// Add labels to pod template if one exists.
305321
podTemplateFound := addLabelsToPodTemplateField(&unstruct, labels)
306322
if !podTemplateFound {
307-
klog.V(4).Infof("[SyncQueueJob] No pod template spec exists for resource: %s to add labels.", name)
323+
klog.V(4).Infof("[SyncQueueJob] No pod template spec exists for resource: %s/%s to add labels.", namespace, name)
308324
}
309325

310-
// Get the resource to see if it exists
311-
labelSelector := fmt.Sprintf("%s=%s, %s=%s", appwrapperJobName, aw.Name, resourceName, unstruct.GetName())
326+
// Get the resource to see if it exists
327+
labelSelector := fmt.Sprintf("%s=%s, %s=%s, %s=%s", appwrapperJobLabelName, aw.Name, appwrapperJobLabelNamespace, aw.Namespace, resourceName, unstruct.GetName())
312328
inEtcd, err := dclient.Resource(rsrc).List(context.Background(), metav1.ListOptions{LabelSelector: labelSelector})
313329
if err != nil {
314330
return []*v1.Pod{}, err
@@ -317,8 +333,9 @@ func (gr *GenericResources) SyncQueueJob(aw *arbv1.AppWrapper, awr *arbv1.AppWra
317333
// Check to see if object already exists in etcd, if not, create the object.
318334
if inEtcd == nil || len(inEtcd.Items) < 1 {
319335
newName := name
320-
if len(newName) > 63 {
321-
newName = newName[:63]
336+
if len(newName) > 60 {
337+
newName = newName[:60]
338+
newName += GetRandomString(3)
322339
}
323340
unstruct.SetName(newName)
324341
//Asumption object is always namespaced
@@ -329,7 +346,7 @@ func (gr *GenericResources) SyncQueueJob(aw *arbv1.AppWrapper, awr *arbv1.AppWra
329346
if errors.IsAlreadyExists(err) {
330347
klog.V(4).Infof("%v\n", err.Error())
331348
} else {
332-
klog.Errorf("Error creating the object `%v`, the error is `%v`", newName, errors.ReasonForError(err))
349+
klog.Errorf("Error creating the object `%s/%s`, the error is `%v`", namespace, newName, errors.ReasonForError(err))
333350
return []*v1.Pod{}, err
334351
}
335352
}
@@ -499,7 +516,7 @@ func deleteObject(namespaced bool, namespace string, name string, rsrc schema.Gr
499516
}
500517

501518
if err != nil && !errors.IsNotFound(err) {
502-
klog.Errorf("[deleteObject] Error deleting the object `%v`, the error is `%v`.", name, errors.ReasonForError(err))
519+
klog.Errorf("[deleteObject] Error deleting the object `%v`, in namespace %v, the error is `%v`.", name, namespace, errors.ReasonForError(err))
503520
return err
504521
} else {
505522
klog.V(4).Infof("[deleteObject] Resource `%v` deleted.\n", name)
@@ -531,7 +548,7 @@ func GetListOfPodResourcesFromOneGenericItem(awr *arbv1.AppWrapperGenericResourc
531548
klog.V(8).Infof("[GetListOfPodResourcesFromOneGenericItem] Requested total allocation resource from 1 pod `%v`.\n", podTotalresource)
532549
}
533550

534-
// Addd individual pods to results
551+
// Add individual pods to results
535552
var replicaCount int = int(replicas)
536553
for i := 0; i < replicaCount; i++ {
537554
podResourcesList = append(podResourcesList, podTotalresource)
@@ -623,7 +640,7 @@ func getContainerResources(container v1.Container, replicas float64) *clustersta
623640
}
624641

625642
// returns status of an item present in etcd
626-
func (gr *GenericResources) IsItemCompleted(awgr *arbv1.AppWrapperGenericResource, namespace string, appwrapperName string, genericItemName string) (completed bool) {
643+
func (gr *GenericResources) IsItemCompleted(awgr *arbv1.AppWrapperGenericResource, appwrapperNamespace string, appwrapperName string, genericItemName string) (completed bool) {
627644
dd := gr.clients.Discovery()
628645
apigroups, err := restmapper.GetAPIGroupResources(dd)
629646
if err != nil {
@@ -654,8 +671,8 @@ func (gr *GenericResources) IsItemCompleted(awgr *arbv1.AppWrapperGenericResourc
654671
return false
655672
}
656673

657-
labelSelector := fmt.Sprintf("%s=%s", appwrapperJobName, appwrapperName)
658-
inEtcd, err := dclient.Resource(rsrc).Namespace(namespace).List(context.Background(), metav1.ListOptions{LabelSelector: labelSelector})
674+
labelSelector := fmt.Sprintf("%s=%s, %s=%s", appwrapperJobLabelName, appwrapperName, appwrapperJobLabelNamespace, appwrapperNamespace)
675+
inEtcd, err := dclient.Resource(rsrc).Namespace(appwrapperNamespace).List(context.Background(), metav1.ListOptions{LabelSelector: labelSelector})
659676
if err != nil {
660677
klog.Errorf("[IsItemCompleted] Error listing object: %v", err)
661678
return false
@@ -675,7 +692,7 @@ func (gr *GenericResources) IsItemCompleted(awgr *arbv1.AppWrapperGenericResourc
675692
}
676693
}
677694
if !validAwOwnerRef {
678-
klog.Warningf("[IsItemCompleted] Item owner name %v does match appwrappper name %v in namespace %v", unstructuredObjectName, appwrapperName, namespace)
695+
klog.Warningf("[IsItemCompleted] Item owner name %v does match appwrappper name %v in namespace %v", unstructuredObjectName, appwrapperName, appwrapperNamespace)
679696
continue
680697
}
681698

test/e2e-kuttl-borrowing/steps/03-install.yaml

+2-2
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ spec:
3131
name: my-job-1
3232
namespace: test
3333
labels:
34-
appwrapper.mcad.ibm.com: my-job-1
34+
workload.codeflare.dev/appwrapper: my-job-1
3535
spec:
3636
parallelism: 1
3737
completions: 1
@@ -40,7 +40,7 @@ spec:
4040
name: my-job-1
4141
namespace: test
4242
labels:
43-
appwrapper.mcad.ibm.com: my-job-1
43+
workload.codeflare.dev/appwrapper: my-job-1
4444
spec:
4545
terminationGracePeriodSeconds: 1
4646
restartPolicy: Never

test/e2e-kuttl-borrowing/steps/04-install.yaml

+2-2
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ spec:
3131
name: my-job-2
3232
namespace: test
3333
labels:
34-
appwrapper.mcad.ibm.com: my-job-2
34+
workload.codeflare.dev/appwrapper: my-job-2
3535
spec:
3636
parallelism: 1
3737
completions: 1
@@ -40,7 +40,7 @@ spec:
4040
name: my-job-2
4141
namespace: test
4242
labels:
43-
appwrapper.mcad.ibm.com: my-job-2
43+
workload.codeflare.dev/appwrapper: my-job-2
4444
spec:
4545
terminationGracePeriodSeconds: 1
4646
restartPolicy: Never

test/e2e-kuttl-deployment-01/steps/01-assert.yaml

+3-2
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,9 @@ metadata:
1414
namespace: start-up
1515
labels:
1616
app: no-quota-deployment-01
17-
appwrapper.mcad.ibm.com: no-quota-deployment-01
18-
resourceName: no-quota-deployment-01
17+
workload.codeflare.dev/appwrapper: no-quota-deployment-01
18+
workload.codeflare.dev/appwrapper-namespace: start-up
19+
workload.codeflare.dev/resourceName: no-quota-deployment-01
1920
status:
2021
availableReplicas: 1
2122
observedGeneration: 1

test/e2e-kuttl-deployment-01/steps/02-install.yaml

+2-2
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ spec:
2727
name: no-quota-job-02
2828
namespace: start-up
2929
labels:
30-
appwrapper.mcad.ibm.com: no-quota-job-02
30+
workload.codeflare.dev/appwrapper: no-quota-job-02
3131
spec:
3232
parallelism: 1
3333
completions: 1
@@ -36,7 +36,7 @@ spec:
3636
name: no-quota-job-1
3737
namespace: start-up
3838
labels:
39-
appwrapper.mcad.ibm.com: no-quota-job-02
39+
workload.codeflare.dev/appwrapper: no-quota-job-02
4040
spec:
4141
terminationGracePeriodSeconds: 1
4242
restartPolicy: Never

test/e2e-kuttl-deployment-01/steps/03-assert.yaml

+19-16
Original file line numberDiff line numberDiff line change
@@ -12,28 +12,31 @@ metadata:
1212
name: hold-completion-job-03-01
1313
namespace: start-up
1414
labels:
15-
appwrapper.mcad.ibm.com: hold-completion-job-03
16-
resourceName: hold-completion-job-03-01
15+
workload.codeflare.dev/appwrapper: hold-completion-job-03
16+
workload.codeflare.dev/appwrapper-namespace: start-up
17+
workload.codeflare.dev/resourceName: hold-completion-job-03-01
1718
status:
1819
conditions:
1920
- status: "True"
2021
type: Complete
2122
succeeded: 1
2223
---
23-
apiVersion: v1
24-
kind: Pod
25-
metadata:
24+
apiVersion: v1
25+
kind: Pod
26+
metadata:
2627
namespace: start-up
27-
labels:
28-
appwrapper.mcad.ibm.com: hold-completion-job-03
29-
job-name: hold-completion-job-03-01
30-
resourceName: hold-completion-job-03-01
28+
labels:
29+
workload.codeflare.dev/appwrapper: hold-completion-job-03
30+
workload.codeflare.dev/appwrapper-namespace: start-up
31+
job-name: hold-completion-job-03-01
32+
workload.codeflare.dev/resourceName: hold-completion-job-03-01
3133
---
32-
apiVersion: v1
33-
kind: Pod
34-
metadata:
34+
apiVersion: v1
35+
kind: Pod
36+
metadata:
3537
namespace: start-up
36-
labels:
37-
appwrapper.mcad.ibm.com: hold-completion-job-03
38-
job-name: hold-completion-job-03-02
39-
resourceName: hold-completion-job-03-02
38+
labels:
39+
workload.codeflare.dev/appwrapper: hold-completion-job-03
40+
workload.codeflare.dev/appwrapper-namespace: start-up
41+
job-name: hold-completion-job-03-02
42+
workload.codeflare.dev/resourceName: hold-completion-job-03-02

test/e2e-kuttl-deployment-01/steps/03-install.yaml

+4-4
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ spec:
3131
name: hold-completion-job-03-01
3232
namespace: start-up
3333
labels:
34-
appwrapper.mcad.ibm.com: hold-completion-job-03
34+
workload.codeflare.dev/appwrapper: hold-completion-job-03
3535
spec:
3636
parallelism: 1
3737
completions: 1
@@ -40,7 +40,7 @@ spec:
4040
name: hold-completion-job-03-01
4141
namespace: start-up
4242
labels:
43-
appwrapper.mcad.ibm.com: hold-completion-job-03
43+
workload.codeflare.dev/appwrapper: hold-completion-job-03
4444
spec:
4545
terminationGracePeriodSeconds: 1
4646
restartPolicy: Never
@@ -80,7 +80,7 @@ spec:
8080
name: hold-completion-job-03-02
8181
namespace: start-up
8282
labels:
83-
appwrapper.mcad.ibm.com: hold-completion-job-03
83+
workload.codeflare.dev/appwrapper: hold-completion-job-03
8484
spec:
8585
parallelism: 1
8686
completions: 1
@@ -89,7 +89,7 @@ spec:
8989
name: hold-completion-job-03-02
9090
namespace: start-up
9191
labels:
92-
appwrapper.mcad.ibm.com: hold-completion-job-03
92+
workload.codeflare.dev/appwrapper: hold-completion-job-03
9393
spec:
9494
terminationGracePeriodSeconds: 1
9595
restartPolicy: Never

test/e2e-kuttl-deployment-01/steps/06-install.yaml

+2-2
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ spec:
3131
name: no-quota-job-06
3232
namespace: start-up
3333
labels:
34-
appwrapper.mcad.ibm.com: no-quota-job-06
34+
workload.codeflare.dev/appwrapper: no-quota-job-06
3535
spec:
3636
parallelism: 1
3737
completions: 1
@@ -40,7 +40,7 @@ spec:
4040
name: no-quota-job-06
4141
namespace: start-up
4242
labels:
43-
appwrapper.mcad.ibm.com: no-quota-job-06
43+
workload.codeflare.dev/appwrapper: no-quota-job-06
4444
spec:
4545
terminationGracePeriodSeconds: 1
4646
restartPolicy: Never

0 commit comments

Comments
 (0)