Skip to content

Commit 7ff112c

Browse files
Prevent no-op updates of ConstraintTemplates
The policy added in the test, with both a ConstraintTemplate and Constraint, was found to oscillate between "created" and "updated" events. This filled up the policy history, caused repeated reconciles in multiple controllers, and unnecesary API requests. The Semantic.DeepEqual has been replaced with comparing the marshalled JSON for the desired and actual object specs - this is likely slower, but more accurate. It is not clear what in the Policy precisely caused the issue. As a second protection against this behavior, when the template-sync updates an object, it now compares the new resourceVersion against the old one, to skip work if the object did not actually change. Refs: - https://issues.redhat.com/browse/ACM-29231 Signed-off-by: Justin Kulikauskas <[email protected]>
1 parent 9c551d9 commit 7ff112c

File tree

3 files changed

+230
-28
lines changed

3 files changed

+230
-28
lines changed

controllers/templatesync/template_sync.go

Lines changed: 33 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
package templatesync
55

66
import (
7+
"bytes"
78
"context"
89
"encoding/json"
910
"errors"
@@ -811,12 +812,12 @@ func (r *PolicyReconciler) Reconcile(ctx context.Context, request reconcile.Requ
811812
eObjectUnstructured["spec"] = tObjectUnstructured.Object["spec"]
812813

813814
eObject.SetAnnotations(tObjectUnstructured.GetAnnotations())
814-
815815
eObject.SetLabels(tObjectUnstructured.GetLabels())
816-
817816
eObject.SetOwnerReferences(tObjectUnstructured.GetOwnerReferences())
818817

819-
_, err = res.Update(ctx, eObject, metav1.UpdateOptions{
818+
previousRV := eObject.GetResourceVersion()
819+
820+
updatedObj, err := res.Update(ctx, eObject, metav1.UpdateOptions{
820821
FieldValidation: metav1.FieldValidationStrict,
821822
})
822823
if err != nil {
@@ -845,38 +846,40 @@ func (r *PolicyReconciler) Reconcile(ctx context.Context, request reconcile.Requ
845846
continue
846847
}
847848

848-
successMsg := fmt.Sprintf("Policy template %s was updated successfully", tName)
849+
if updatedObj.GetResourceVersion() != previousRV {
850+
successMsg := fmt.Sprintf("Policy template %s was updated successfully", tName)
849851

850-
// Handle cluster scoped objects
851-
if isClusterScoped {
852-
addFinalizer = true
852+
// Handle cluster scoped objects
853+
if isClusterScoped {
854+
addFinalizer = true
853855

854-
reqLogger.V(2).Info("Finalizer required for " + gvk.Kind)
856+
reqLogger.V(2).Info("Finalizer required for " + gvk.Kind)
855857

856-
if isGkObj {
857-
r.setCreatedGkConstraint(true)
858-
}
859-
// The ConstraintTemplate does not generate status, so we need to generate an event for it
860-
if isGkConstraintTemplate {
861-
tLogger.Info("Emitting status event for " + gvk.Kind)
862-
msg := fmt.Sprintf("%s %s was updated successfully", gvk.Kind, tName)
858+
if isGkObj {
859+
r.setCreatedGkConstraint(true)
860+
}
861+
// The ConstraintTemplate does not generate status, so we need to generate an event for it
862+
if isGkConstraintTemplate {
863+
tLogger.Info("Emitting status event for " + gvk.Kind)
864+
msg := fmt.Sprintf("%s %s was updated successfully", gvk.Kind, tName)
863865

864-
emitErr := r.emitTemplateSuccess(ctx, instance, tIndex, tName, isClusterScoped, msg)
865-
if emitErr != nil {
866-
resultError = emitErr
866+
emitErr := r.emitTemplateSuccess(ctx, instance, tIndex, tName, isClusterScoped, msg)
867+
if emitErr != nil {
868+
resultError = emitErr
869+
}
867870
}
868871
}
869-
}
870872

871-
err = r.handleSyncSuccess(ctx, instance, tIndex, tName, successMsg, res, gvk.GroupVersion(), eObject)
872-
if err != nil {
873-
resultError = err
874-
tLogger.Error(resultError, "Error after updating template (will requeue)")
873+
err = r.handleSyncSuccess(ctx, instance, tIndex, tName, successMsg, res, gvk.GroupVersion(), eObject)
874+
if err != nil {
875+
resultError = err
876+
tLogger.Error(resultError, "Error after updating template (will requeue)")
875877

876-
policySystemErrorsCounter.WithLabelValues(instance.Name, tName, "patch-error").Inc()
877-
}
878+
policySystemErrorsCounter.WithLabelValues(instance.Name, tName, "patch-error").Inc()
879+
}
878880

879-
tLogger.Info("Existing object has been updated")
881+
tLogger.Info("Existing object has been updated")
882+
}
880883
} else {
881884
err = r.handleSyncSuccess(ctx, instance, tIndex, tName, "", res, gvk.GroupVersion(), eObject)
882885
if err != nil {
@@ -1044,7 +1047,10 @@ func equivalentTemplates(eObject *unstructured.Unstructured, tObject *unstructur
10441047
}
10451048
}
10461049

1047-
if !equality.Semantic.DeepEqual(eObject.UnstructuredContent()["spec"], tObject.Object["spec"]) {
1050+
eJSON, e1 := json.Marshal(eObject.UnstructuredContent()["spec"])
1051+
tJSON, e2 := json.Marshal(tObject.Object["spec"])
1052+
1053+
if !bytes.Equal(eJSON, tJSON) || e1 != nil || e2 != nil {
10481054
return false
10491055
}
10501056

test/e2e/case17_gatekeeper_sync_test.go

Lines changed: 63 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,8 @@ var _ = Describe("Test Gatekeeper ConstraintTemplate and constraint sync", Order
3636
policyYamlExtra string = yamlBasePath + policyName + "-extra.yaml"
3737
policyName2 string = policyName + "-2"
3838
policyYaml2 string = yamlBasePath + policyName2 + ".yaml"
39+
maxIAMPolicyName string = "gatekeeper-limitclusteradmin"
40+
maxIAMPolicyYaml string = yamlBasePath + "limitclusteradmin-policy.yaml"
3941
gkAuditFrequency time.Duration = time.Minute
4042
gkConstraintTemplateName string = caseNumber + "constrainttemplate"
4143
gkConstraintTemplateYaml string = yamlBasePath + gkConstraintTemplateName + ".yaml"
@@ -98,7 +100,7 @@ var _ = Describe("Test Gatekeeper ConstraintTemplate and constraint sync", Order
98100
Expect(err).ToNot(HaveOccurred())
99101
}
100102

101-
for _, pName := range []string{policyName, policyName2} {
103+
for _, pName := range []string{policyName, policyName2, maxIAMPolicyName} {
102104
By("Deleting policy " + pName + " on the hub in ns:" + clusterNamespaceOnHub)
103105
err := clientHubDynamic.Resource(gvrPolicy).Namespace(clusterNamespaceOnHub).Delete(
104106
context.TODO(), pName, metav1.DeleteOptions{},
@@ -496,6 +498,66 @@ var _ = Describe("Test Gatekeeper ConstraintTemplate and constraint sync", Order
496498
)
497499
})
498500

501+
It("should not repeatedly reconcile the 'maxiamclusterbindings' ConstraintTemplate", func() {
502+
By("Applying the " + maxIAMPolicyName + " policy")
503+
_, err := kubectlHub("apply", "-f", maxIAMPolicyYaml, "-n", clusterNamespaceOnHub)
504+
Expect(err).NotTo(HaveOccurred())
505+
506+
By("Waiting for policy status to be populated")
507+
Eventually(func(g Gomega) {
508+
plc := propagatorutils.GetWithTimeout(clientManagedDynamic, gvrPolicy,
509+
maxIAMPolicyName, clusterNamespace, true, defaultTimeoutSeconds)
510+
511+
dpt, found, err := unstructured.NestedSlice(plc.Object, "status", "details")
512+
g.Expect(err).NotTo(HaveOccurred())
513+
g.Expect(found).To(BeTrue())
514+
g.Expect(dpt).To(HaveLen(2))
515+
516+
dpt0, ok := dpt[0].(map[string]any)
517+
g.Expect(ok).To(BeTrue())
518+
519+
history0, found, err := unstructured.NestedSlice(dpt0, "history")
520+
g.Expect(err).NotTo(HaveOccurred())
521+
g.Expect(found).To(BeTrue())
522+
g.Expect(history0).NotTo(BeEmpty())
523+
524+
dpt1, ok := dpt[1].(map[string]any)
525+
g.Expect(ok).To(BeTrue())
526+
527+
history1, found, err := unstructured.NestedSlice(dpt1, "history")
528+
g.Expect(err).NotTo(HaveOccurred())
529+
g.Expect(found).To(BeTrue())
530+
g.Expect(history1).NotTo(BeEmpty())
531+
}, defaultTimeoutSeconds, 1).Should(Succeed())
532+
533+
By("Verifying the policy history does not fill up")
534+
Consistently(func(g Gomega) {
535+
plc := propagatorutils.GetWithTimeout(clientManagedDynamic, gvrPolicy,
536+
maxIAMPolicyName, clusterNamespace, true, defaultTimeoutSeconds)
537+
538+
dpt, found, err := unstructured.NestedSlice(plc.Object, "status", "details")
539+
g.Expect(err).NotTo(HaveOccurred())
540+
g.Expect(found).To(BeTrue())
541+
g.Expect(dpt).To(HaveLen(2))
542+
543+
dpt0, ok := dpt[0].(map[string]any)
544+
g.Expect(ok).To(BeTrue())
545+
546+
history0, found, err := unstructured.NestedSlice(dpt0, "history")
547+
g.Expect(err).NotTo(HaveOccurred())
548+
g.Expect(found).To(BeTrue())
549+
g.Expect(len(history0)).To(BeNumerically("<", 5))
550+
551+
dpt1, ok := dpt[1].(map[string]any)
552+
g.Expect(ok).To(BeTrue())
553+
554+
history1, found, err := unstructured.NestedSlice(dpt1, "history")
555+
g.Expect(err).NotTo(HaveOccurred())
556+
g.Expect(found).To(BeTrue())
557+
g.Expect(len(history1)).To(BeNumerically("<", 5))
558+
}, "20s", 1).Should(Succeed())
559+
})
560+
499561
Describe("Test policy ordering with gatekeeper objects", func() {
500562
const (
501563
waitForTemplateName = "case17-gk-dep-on-tmpl"
Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,134 @@
1+
# From https://github.com/open-cluster-management-io/policy-collection/blob/b60b1d855185fb790d6d173b0daddc3e843013db/community/AC-Access-Control/policy-gatekeeper-limitclusteradmin.yaml
2+
apiVersion: policy.open-cluster-management.io/v1
3+
kind: Policy
4+
metadata:
5+
name: gatekeeper-limitclusteradmin
6+
spec:
7+
disabled: false
8+
policy-templates:
9+
- objectDefinition:
10+
apiVersion: templates.gatekeeper.sh/v1
11+
kind: ConstraintTemplate
12+
metadata:
13+
name: maxiamclusterbindings
14+
spec:
15+
crd:
16+
spec:
17+
names:
18+
kind: MaxIAMClusterBindings
19+
validation:
20+
openAPIV3Schema:
21+
type: object
22+
properties:
23+
maxClusterRoleBindingUsers:
24+
type: number
25+
description: Maximum number of users and users within groups allowed to be bound to the specified role
26+
example: 5
27+
clusterRole:
28+
type: string
29+
description: Name of the ClusterRole to validate
30+
example: "cluster-admin"
31+
ignoreClusterRoleBindings:
32+
type: array
33+
items:
34+
type: string
35+
description: List of ClusterRoleBindings to exclude from the validation count
36+
example: '["myclusteradmins-crb", "system-admins"]'
37+
38+
targets:
39+
- target: admission.k8s.gatekeeper.sh
40+
libs:
41+
- |
42+
package lib.helpers
43+
44+
# Types of subjects to count
45+
count_kinds := {"User", "Group"}
46+
47+
validate_subject(kind, roleRef) {
48+
kind == count_kinds[_]
49+
is_clusterrole_ref(roleRef)
50+
}
51+
52+
is_clusterrole_ref(role) {
53+
role.kind == "ClusterRole"
54+
role.name == input.parameters.clusterRole
55+
}
56+
57+
get_user_names("User", s_name) := names {
58+
names := {nm | nm := s_name}
59+
}
60+
61+
get_user_names("Group", s_name) := names {
62+
names := {nm | nm := data.inventory.cluster["user.openshift.io/v1"].Group[s_name].users[_]}
63+
}
64+
65+
is_excluded(exclusion_list, crd_name) {
66+
exclusions := {e | e := exclusion_list[_]}
67+
crd_name == exclusions[_]
68+
}
69+
70+
rego: |
71+
package maxiamclusterbindings
72+
73+
import data.lib.helpers.validate_subject
74+
import data.lib.helpers.is_clusterrole_ref
75+
import data.lib.helpers.get_user_names
76+
import data.lib.helpers.is_excluded
77+
78+
max_admins := input.parameters.maxClusterRoleBindingUsers
79+
violation[{"msg": msg}] {
80+
is_number(max_admins)
81+
max_admins < 1
82+
msg = sprintf("maxClusterRoleBindingUsers parameter must be greater than 0(zero) maxClusterRoleBindingUsers: %v", [max_admins] )
83+
}
84+
85+
# Make a list of all ClusterRoleBindings that should be excluded from the count
86+
# Exclude the binding under review so if the number of subjects decreases we don't have an incorrect count.
87+
# The updated subjects will be added back in to the count below.
88+
bindings_to_exclude[crd_name] {
89+
crd_name := input.review.object.metadata.name
90+
}
91+
bindings_to_exclude[crd_name] {
92+
crd_name := input.parameters.ignoreClusterRoleBindings[_]
93+
}
94+
95+
violation[{"msg": msg}] {
96+
# check if the requested role references clusterRole parameter
97+
is_clusterrole_ref(input.review.object.roleRef)
98+
99+
# check if the requested binding should be excluded
100+
not is_excluded(input.parameters.ignoreClusterRoleBindings, input.review.object.metadata.name)
101+
102+
# Get all ClusterRoleBinding that are not excluded
103+
crb_list := {crb | not is_excluded(bindings_to_exclude, data.inventory.cluster["rbac.authorization.k8s.io/v1"].ClusterRoleBinding[i].metadata.name);
104+
crb := data.inventory.cluster["rbac.authorization.k8s.io/v1"].ClusterRoleBinding[i]}
105+
106+
# Get the list of all users in the list
107+
current_subjects := {sub | validate_subject(crb_list[i].subjects[s].kind, crb_list[i].roleRef);
108+
sub := get_user_names(crb_list[i].subjects[s].kind, crb_list[i].subjects[s].name)[_]}
109+
110+
new_subjects := {sub | validate_subject(input.review.object.subjects[s].kind, input.review.object.roleRef);
111+
sub := get_user_names(input.review.object.subjects[s].kind, input.review.object.subjects[s].name)[_]}
112+
total_admins := count(current_subjects | new_subjects)
113+
total_admins > max_admins
114+
115+
msg := sprintf("Total number of bindings to role '%v' exceeded. max-admins allowed: %v - current total: %v", [input.parameters.clusterRole, max_admins, total_admins] )
116+
}
117+
118+
- objectDefinition:
119+
apiVersion: constraints.gatekeeper.sh/v1beta1
120+
kind: MaxIAMClusterBindings
121+
metadata:
122+
name: max-cluster-admins
123+
spec:
124+
match:
125+
kinds:
126+
- apiGroups:
127+
- rbac.authorization.k8s.io
128+
kinds:
129+
- ClusterRoleBinding
130+
parameters:
131+
clusterRole: cluster-admin
132+
ignoreClusterRoleBindings:
133+
- iam-max-groups
134+
maxClusterRoleBindingUsers: 5

0 commit comments

Comments
 (0)