Skip to content

Commit 81a5903

Browse files
authored
add new addonHealthCheck func to check all fields and support wildcard (#289)
Signed-off-by: Zhiwei Yin <[email protected]>
1 parent 94bcb3c commit 81a5903

File tree

14 files changed

+616
-44
lines changed

14 files changed

+616
-44
lines changed

cmd/example/helloworld_helm/main.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,7 @@ func runController(ctx context.Context, kubeConfig *rest.Config) error {
132132
utils.AgentInstallNamespaceFromDeploymentConfigFunc(
133133
utils.NewAddOnDeploymentConfigGetter(addonClient),
134134
),
135-
).
135+
).WithAgentHealthProber(helloworld_helm.AgentHealthProber()).
136136
BuildHelmAgentAddon()
137137
if err != nil {
138138
klog.Errorf("failed to build agent %v", err)

examples/helloworld_helm/helloworld_helm.go

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,10 @@ import (
99
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
1010
"k8s.io/client-go/kubernetes"
1111
"open-cluster-management.io/addon-framework/pkg/addonfactory"
12+
"open-cluster-management.io/addon-framework/pkg/agent"
1213
addonapiv1alpha1 "open-cluster-management.io/api/addon/v1alpha1"
1314
clusterv1 "open-cluster-management.io/api/cluster/v1"
15+
workapiv1 "open-cluster-management.io/api/work/v1"
1416
)
1517

1618
const (
@@ -112,3 +114,46 @@ func GetImageValues(kubeClient kubernetes.Interface) addonfactory.GetValuesFunc
112114
return overrideValues, nil
113115
}
114116
}
117+
118+
func AgentHealthProber() *agent.HealthProber {
119+
return &agent.HealthProber{
120+
Type: agent.HealthProberTypeWork,
121+
WorkProber: &agent.WorkHealthProber{
122+
ProbeFields: []agent.ProbeField{
123+
{
124+
ResourceIdentifier: workapiv1.ResourceIdentifier{
125+
Group: "apps",
126+
Resource: "deployments",
127+
Name: "*",
128+
Namespace: "*",
129+
},
130+
ProbeRules: []workapiv1.FeedbackRule{
131+
{
132+
Type: workapiv1.WellKnownStatusType,
133+
},
134+
},
135+
},
136+
},
137+
HealthChecker: func(fields []agent.FieldResult, cluster *clusterv1.ManagedCluster,
138+
addon *addonapiv1alpha1.ManagedClusterAddOn) error {
139+
if len(fields) == 0 {
140+
return fmt.Errorf("no fields found in health checker")
141+
}
142+
for _, field := range fields {
143+
if len(field.FeedbackResult.Values) == 0 {
144+
continue
145+
}
146+
switch field.ResourceIdentifier.Name {
147+
case "helloworldhelm-agent":
148+
for _, value := range field.FeedbackResult.Values {
149+
if value.Name == "AvailableReplicas" && *value.Value.Integer == 1 {
150+
return nil
151+
}
152+
}
153+
}
154+
}
155+
return fmt.Errorf("helloworldhelmhm agent is not ready")
156+
},
157+
},
158+
}
159+
}

go.mod

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ require (
2222
k8s.io/component-base v0.30.2
2323
k8s.io/klog/v2 v2.120.1
2424
k8s.io/utils v0.0.0-20240310230437-4693a0247e57
25-
open-cluster-management.io/api v0.15.0
25+
open-cluster-management.io/api v0.15.1-0.20241120090202-cb7ce98ab874
2626
open-cluster-management.io/sdk-go v0.15.0
2727
sigs.k8s.io/controller-runtime v0.18.4
2828
)

go.sum

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -474,8 +474,8 @@ k8s.io/kube-openapi v0.0.0-20240228011516-70dd3763d340 h1:BZqlfIlq5YbRMFko6/PM7F
474474
k8s.io/kube-openapi v0.0.0-20240228011516-70dd3763d340/go.mod h1:yD4MZYeKMBwQKVht279WycxKyM84kkAx2DPrTXaeb98=
475475
k8s.io/utils v0.0.0-20240310230437-4693a0247e57 h1:gbqbevonBh57eILzModw6mrkbwM0gQBEuevE/AaBsHY=
476476
k8s.io/utils v0.0.0-20240310230437-4693a0247e57/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0=
477-
open-cluster-management.io/api v0.15.0 h1:lRee1KOlGHZb2scTA7ff9E9Fxt2hJc7jpkHnaCbvkOU=
478-
open-cluster-management.io/api v0.15.0/go.mod h1:9erZEWEn4bEqh0nIX2wA7f/s3KCuFycQdBrPrRzi0QM=
477+
open-cluster-management.io/api v0.15.1-0.20241120090202-cb7ce98ab874 h1:WgkuYXTbJV7EK+qtiMq3soa21faGUKeTG5w0C8Mn1Ok=
478+
open-cluster-management.io/api v0.15.1-0.20241120090202-cb7ce98ab874/go.mod h1:9erZEWEn4bEqh0nIX2wA7f/s3KCuFycQdBrPrRzi0QM=
479479
open-cluster-management.io/sdk-go v0.15.0 h1:2IAJnPfUoY6rPC5w7LhqAnvIlgekPoVW03LdZO1unIM=
480480
open-cluster-management.io/sdk-go v0.15.0/go.mod h1:fi5WBsbC5K3txKb8eRLuP0Sim/Oqz/PHX18skAEyjiA=
481481
sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.29.0 h1:/U5vjBbQn3RChhv7P11uhYvCSm5G2GaIi5AIGBS6r4c=

pkg/addonmanager/controllers/agentdeploy/healthcheck_sync.go

Lines changed: 93 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package agentdeploy
33
import (
44
"context"
55
"fmt"
6+
"regexp"
67
"strings"
78

89
appsv1 "k8s.io/api/apps/v1"
@@ -167,17 +168,39 @@ func (s *healthCheckSyncer) probeAddonStatusByWorks(
167168
manifestConditions = append(manifestConditions, work.Status.ResourceStatus.Manifests...)
168169
}
169170

170-
probeFields, healthChecker, err := s.analyzeWorkProber(s.agentAddon, cluster, addon)
171+
// TODO: remove healthCheck since healthCheck has been deprecated
172+
probeFields, healthCheck, healthChecker, err := s.analyzeWorkProber(s.agentAddon, cluster, addon)
171173
if err != nil {
172174
// should not happen, return
173175
return err
174176
}
175177

178+
var FieldResults []agent.FieldResult
179+
176180
for _, field := range probeFields {
177-
result := findResultByIdentifier(field.ResourceIdentifier, manifestConditions)
181+
results := findResultsByIdentifier(field.ResourceIdentifier, manifestConditions)
182+
183+
// healthCheck will be ignored if healthChecker is set
184+
if healthChecker != nil {
185+
if len(results) != 0 {
186+
FieldResults = append(FieldResults, results...)
187+
}
188+
continue
189+
}
190+
191+
if healthCheck == nil {
192+
meta.SetStatusCondition(&addon.Status.Conditions, metav1.Condition{
193+
Type: addonapiv1alpha1.ManagedClusterAddOnConditionAvailable,
194+
Status: metav1.ConditionFalse,
195+
Reason: addonapiv1alpha1.AddonAvailableReasonProbeUnavailable,
196+
Message: fmt.Sprintf("health checker function is not set %v", err),
197+
})
198+
return nil
199+
}
200+
178201
// if no results are returned. it is possible that work agent has not returned the feedback value.
179202
// mark condition to unknown
180-
if result == nil {
203+
if len(results) == 0 {
181204
meta.SetStatusCondition(&addon.Status.Conditions, metav1.Condition{
182205
Type: addonapiv1alpha1.ManagedClusterAddOnConditionAvailable,
183206
Status: metav1.ConditionUnknown,
@@ -189,16 +212,29 @@ func (s *healthCheckSyncer) probeAddonStatusByWorks(
189212
return nil
190213
}
191214

192-
err := healthChecker(field.ResourceIdentifier, *result)
193-
if err != nil {
194-
meta.SetStatusCondition(&addon.Status.Conditions, metav1.Condition{
195-
Type: addonapiv1alpha1.ManagedClusterAddOnConditionAvailable,
196-
Status: metav1.ConditionFalse,
197-
Reason: addonapiv1alpha1.AddonAvailableReasonProbeUnavailable,
198-
Message: fmt.Sprintf("Probe addon unavailable with err %v", err),
199-
})
200-
return nil
215+
for _, result := range results {
216+
err := healthCheck(result.ResourceIdentifier, result.FeedbackResult)
217+
if err != nil {
218+
meta.SetStatusCondition(&addon.Status.Conditions, metav1.Condition{
219+
Type: addonapiv1alpha1.ManagedClusterAddOnConditionAvailable,
220+
Status: metav1.ConditionFalse,
221+
Reason: addonapiv1alpha1.AddonAvailableReasonProbeUnavailable,
222+
Message: fmt.Sprintf("Probe addon unavailable with err %v", err),
223+
})
224+
return nil
225+
}
201226
}
227+
228+
}
229+
230+
if healthChecker != nil && healthChecker(FieldResults, cluster, addon) != nil {
231+
meta.SetStatusCondition(&addon.Status.Conditions, metav1.Condition{
232+
Type: addonapiv1alpha1.ManagedClusterAddOnConditionAvailable,
233+
Status: metav1.ConditionFalse,
234+
Reason: addonapiv1alpha1.AddonAvailableReasonProbeUnavailable,
235+
Message: fmt.Sprintf("Probe addon unavailable with err %v", err),
236+
})
237+
return nil
202238
}
203239

204240
meta.SetStatusCondition(&addon.Status.Conditions, metav1.Condition{
@@ -210,25 +246,28 @@ func (s *healthCheckSyncer) probeAddonStatusByWorks(
210246
return nil
211247
}
212248

249+
// TODO: use wildcard to refactor analyzeDeploymentWorkProber and analyzeWorkloadsWorkProber
213250
func (s *healthCheckSyncer) analyzeWorkProber(
214251
agentAddon agent.AgentAddon,
215252
cluster *clusterv1.ManagedCluster,
216253
addon *addonapiv1alpha1.ManagedClusterAddOn,
217-
) ([]agent.ProbeField, agent.AddonHealthCheckFunc, error) {
254+
) ([]agent.ProbeField, agent.AddonHealthCheckFunc, agent.AddonHealthCheckerFunc, error) {
218255

219256
switch agentAddon.GetAgentAddonOptions().HealthProber.Type {
220257
case agent.HealthProberTypeWork:
221258
workProber := agentAddon.GetAgentAddonOptions().HealthProber.WorkProber
222259
if workProber != nil {
223-
return workProber.ProbeFields, workProber.HealthCheck, nil
260+
return workProber.ProbeFields, workProber.HealthCheck, workProber.HealthChecker, nil
224261
}
225-
return nil, nil, fmt.Errorf("work prober is not configured")
262+
return nil, nil, nil, fmt.Errorf("work prober is not configured")
226263
case agent.HealthProberTypeDeploymentAvailability:
227-
return s.analyzeDeploymentWorkProber(agentAddon, cluster, addon)
264+
probeFields, heathCheck, err := s.analyzeDeploymentWorkProber(agentAddon, cluster, addon)
265+
return probeFields, heathCheck, nil, err
228266
case agent.HealthProberTypeWorkloadAvailability:
229-
return s.analyzeWorkloadsWorkProber(agentAddon, cluster, addon)
267+
probeFields, heathCheck, err := s.analyzeWorkloadsWorkProber(agentAddon, cluster, addon)
268+
return probeFields, heathCheck, nil, err
230269
default:
231-
return nil, nil, fmt.Errorf("unsupported health prober type %s", agentAddon.GetAgentAddonOptions().HealthProber.Type)
270+
return nil, nil, nil, fmt.Errorf("unsupported health prober type %s", agentAddon.GetAgentAddonOptions().HealthProber.Type)
232271
}
233272
}
234273

@@ -294,27 +333,46 @@ func (s *healthCheckSyncer) analyzeWorkloadsWorkProber(
294333
return probeFields, utils.WorkloadAvailabilityHealthCheck, nil
295334
}
296335

297-
func findResultByIdentifier(identifier workapiv1.ResourceIdentifier, manifestConditions []workapiv1.ManifestCondition) *workapiv1.StatusFeedbackResult {
336+
func findResultsByIdentifier(identifier workapiv1.ResourceIdentifier,
337+
manifestConditions []workapiv1.ManifestCondition) []agent.FieldResult {
338+
var results []agent.FieldResult
298339
for _, status := range manifestConditions {
299-
if identifier.Group != status.ResourceMeta.Group {
300-
continue
301-
}
302-
if identifier.Resource != status.ResourceMeta.Resource {
303-
continue
304-
}
305-
if identifier.Name != status.ResourceMeta.Name {
306-
continue
307-
}
308-
if identifier.Namespace != status.ResourceMeta.Namespace {
309-
continue
340+
if resourceMatch(status.ResourceMeta, identifier) && len(status.StatusFeedbacks.Values) != 0 {
341+
results = append(results, agent.FieldResult{
342+
ResourceIdentifier: workapiv1.ResourceIdentifier{
343+
Group: status.ResourceMeta.Group,
344+
Resource: status.ResourceMeta.Resource,
345+
Name: status.ResourceMeta.Name,
346+
Namespace: status.ResourceMeta.Namespace,
347+
},
348+
FeedbackResult: status.StatusFeedbacks,
349+
})
310350
}
351+
}
311352

312-
if len(status.StatusFeedbacks.Values) == 0 {
313-
return nil
314-
}
353+
return results
354+
}
315355

316-
return &status.StatusFeedbacks
356+
// compare two string, target may include *
357+
func wildcardMatch(resource, target string) bool {
358+
if resource == target || target == "*" {
359+
return true
317360
}
318361

319-
return nil
362+
pattern := "^" + regexp.QuoteMeta(target) + "$"
363+
pattern = strings.ReplaceAll(pattern, "\\*", ".*")
364+
365+
re, err := regexp.Compile(pattern)
366+
if err != nil {
367+
return false
368+
}
369+
370+
return re.MatchString(resource)
371+
}
372+
373+
func resourceMatch(resourceMeta workapiv1.ManifestResourceMeta, resource workapiv1.ResourceIdentifier) bool {
374+
return resourceMeta.Group == resource.Group &&
375+
resourceMeta.Resource == resource.Resource &&
376+
wildcardMatch(resourceMeta.Namespace, resource.Namespace) &&
377+
wildcardMatch(resourceMeta.Name, resource.Name)
320378
}

0 commit comments

Comments
 (0)