Skip to content

Commit a84ff50

Browse files
authored
fix: Fix cluster health score query (#464)
1 parent ffabe7a commit a84ff50

File tree

2 files changed

+161
-8
lines changed

2 files changed

+161
-8
lines changed

pkg/cache/db/cache_test.go

Lines changed: 142 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -332,37 +332,172 @@ func TestComponentCache_HealthScore(t *testing.T) {
332332
defer db.GetComponentCache().Close()
333333

334334
uid := testUID
335-
component := createComponent(uid, nil, WithState(client.ComponentStateRunning))
335+
component := createComponent(uid, nil, WithState(client.ComponentStateRunning), WithKind("Pod"))
336336
err := db.GetComponentCache().SetComponent(component)
337337
require.NoError(t, err)
338338

339-
child1 := createComponent("child1", &uid, WithState(client.ComponentStateRunning))
339+
child1 := createComponent("child1", &uid, WithState(client.ComponentStateRunning), WithKind("Pod"))
340340
err = db.GetComponentCache().SetComponent(child1)
341341
require.NoError(t, err)
342342

343-
child2 := createComponent("child2", &uid, WithState(client.ComponentStateFailed))
343+
child2 := createComponent("child2", &uid, WithState(client.ComponentStateRunning), WithKind("Pod"))
344344
err = db.GetComponentCache().SetComponent(child2)
345345
require.NoError(t, err)
346346

347347
score, err := db.GetComponentCache().HealthScore()
348348
require.NoError(t, err)
349-
assert.Equal(t, int64(66), score)
349+
assert.Equal(t, int64(100), score)
350350

351-
child3 := createComponent("child3", &uid, WithState(client.ComponentStateFailed))
351+
child3 := createComponent("child3", &uid, WithState(client.ComponentStateFailed), WithKind("Pod"))
352352
err = db.GetComponentCache().SetComponent(child3)
353353
require.NoError(t, err)
354354

355355
score, err = db.GetComponentCache().HealthScore()
356356
require.NoError(t, err)
357-
assert.Equal(t, int64(50), score)
357+
assert.Equal(t, int64(75), score)
358358

359-
child4 := createComponent("child4", &uid, WithState(client.ComponentStateFailed))
359+
child4 := createComponent("child4", &uid, WithState(client.ComponentStateFailed), WithKind("Deployment"))
360360
err = db.GetComponentCache().SetComponent(child4)
361361
require.NoError(t, err)
362362

363363
score, err = db.GetComponentCache().HealthScore()
364364
require.NoError(t, err)
365+
assert.Equal(t, int64(60), score)
366+
367+
// Invalid certificate should deduct an additional 10 points.
368+
child5 := createComponent("child5", &uid, WithState(client.ComponentStateFailed), WithKind("Certificate"))
369+
err = db.GetComponentCache().SetComponent(child5)
370+
require.NoError(t, err)
371+
372+
score, err = db.GetComponentCache().HealthScore()
373+
require.NoError(t, err)
374+
assert.Equal(t, int64(40), score)
375+
376+
// Failing resources in kube-system namespace should deduct an additional 20 points.
377+
child6 := createComponent("child6", &uid, WithState(client.ComponentStateFailed), WithKind("Pod"), WithNamespace("kube-system"))
378+
err = db.GetComponentCache().SetComponent(child6)
379+
require.NoError(t, err)
380+
381+
score, err = db.GetComponentCache().HealthScore()
382+
require.NoError(t, err)
383+
assert.Equal(t, int64(12), score)
384+
385+
// Failing persistent volume should deduct an additional 10 points.
386+
// The score should not go below 0.
387+
child7 := createComponent("child7", &uid, WithState(client.ComponentStateFailed), WithKind("PersistentVolume"))
388+
err = db.GetComponentCache().SetComponent(child7)
389+
require.NoError(t, err)
390+
391+
score, err = db.GetComponentCache().HealthScore()
392+
require.NoError(t, err)
393+
assert.Equal(t, int64(0), score)
394+
})
395+
396+
t.Run("cache should calculate correct health score for components with no children", func(t *testing.T) {
397+
db.Init()
398+
defer db.GetComponentCache().Close()
399+
400+
uid := testUID
401+
component := createComponent(uid, nil, WithState(client.ComponentStateRunning))
402+
err := db.GetComponentCache().SetComponent(component)
403+
require.NoError(t, err)
404+
405+
score, err := db.GetComponentCache().HealthScore()
406+
require.NoError(t, err)
407+
assert.Equal(t, int64(100), score)
408+
})
409+
410+
t.Run("cache should calculate health score with critical system component failures", func(t *testing.T) {
411+
db.Init()
412+
defer db.GetComponentCache().Close()
413+
414+
baseComponent := createComponent(testUID, nil, WithState(client.ComponentStateRunning))
415+
err := db.GetComponentCache().SetComponent(baseComponent)
416+
require.NoError(t, err)
417+
418+
runningPod := createComponent("running-pod", nil, WithState(client.ComponentStateRunning), WithKind("Pod"), WithName("running-pod"))
419+
err = db.GetComponentCache().SetComponent(runningPod)
420+
require.NoError(t, err)
421+
422+
runningDeployment := createComponent("running-deployment", nil, WithState(client.ComponentStateRunning), WithKind("Deployment"), WithName("running-deployment"))
423+
err = db.GetComponentCache().SetComponent(runningDeployment)
424+
require.NoError(t, err)
425+
426+
runningService := createComponent("running-service", nil, WithState(client.ComponentStateRunning), WithKind("Service"), WithName("running-service"))
427+
err = db.GetComponentCache().SetComponent(runningService)
428+
require.NoError(t, err)
429+
430+
// Test CoreDNS failure (50 point deduction)
431+
coredns := createComponent("coredns", nil, WithState(client.ComponentStateFailed), WithName("coredns"))
432+
err = db.GetComponentCache().SetComponent(coredns)
433+
require.NoError(t, err)
434+
435+
score, err := db.GetComponentCache().HealthScore()
436+
require.NoError(t, err)
437+
assert.Equal(t, int64(30), score)
438+
439+
// Test AWS CNI failure (additional 50 point deduction)
440+
awscni := createComponent("aws-cni", nil, WithState(client.ComponentStateFailed), WithName("aws-cni"))
441+
err = db.GetComponentCache().SetComponent(awscni)
442+
require.NoError(t, err)
443+
444+
score, err = db.GetComponentCache().HealthScore()
445+
require.NoError(t, err)
446+
assert.Equal(t, int64(0), score)
447+
448+
// Test ingress-nginx service failure (would deduct 50 but already at 0)
449+
ingress := createComponent("ingress", nil, WithState(client.ComponentStateFailed), WithKind("Service"), WithName("ingress-nginx-controller"), WithNamespace("ingress-nginx"))
450+
err = db.GetComponentCache().SetComponent(ingress)
451+
require.NoError(t, err)
452+
453+
score, err = db.GetComponentCache().HealthScore()
454+
require.NoError(t, err)
455+
assert.Equal(t, int64(0), score)
456+
})
457+
458+
t.Run("cache should calculate health score with combined resource failures", func(t *testing.T) {
459+
db.Init()
460+
defer db.GetComponentCache().Close()
461+
462+
baseComponent := createComponent(testUID, nil, WithState(client.ComponentStateRunning))
463+
err := db.GetComponentCache().SetComponent(baseComponent)
464+
require.NoError(t, err)
465+
466+
// Failed Certificate (10 point deduction)
467+
cert := createComponent("cert", nil, WithState(client.ComponentStateFailed), WithKind("Certificate"))
468+
err = db.GetComponentCache().SetComponent(cert)
469+
require.NoError(t, err)
470+
471+
score, err := db.GetComponentCache().HealthScore()
472+
require.NoError(t, err)
365473
assert.Equal(t, int64(40), score)
474+
475+
// Failed kube-system resource (20 point deduction)
476+
kubeSystem := createComponent("kube-system-res", nil, WithState(client.ComponentStateFailed), WithNamespace("kube-system"))
477+
err = db.GetComponentCache().SetComponent(kubeSystem)
478+
require.NoError(t, err)
479+
480+
score, err = db.GetComponentCache().HealthScore()
481+
require.NoError(t, err)
482+
assert.Equal(t, int64(3), score)
483+
484+
// Failed PersistentVolume (10 point deduction)
485+
pv := createComponent("pv", nil, WithState(client.ComponentStateFailed), WithKind("PersistentVolume"))
486+
err = db.GetComponentCache().SetComponent(pv)
487+
require.NoError(t, err)
488+
489+
score, err = db.GetComponentCache().HealthScore()
490+
require.NoError(t, err)
491+
assert.Equal(t, int64(0), score)
492+
493+
// Failed istio-system resource (50 point deduction)
494+
istio := createComponent("istio-res", nil, WithState(client.ComponentStateFailed), WithNamespace("istio-system"))
495+
err = db.GetComponentCache().SetComponent(istio)
496+
require.NoError(t, err)
497+
498+
score, err = db.GetComponentCache().HealthScore()
499+
require.NoError(t, err)
500+
assert.Equal(t, int64(0), score)
366501
})
367502
}
368503

pkg/cache/db/queries.go

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,25 @@ const (
7272
FROM descendants
7373
`
7474

75-
clusterHealthScore = `SELECT CAST(AVG(health = 0) * 100 as INTEGER) as score FROM component`
75+
clusterHealthScore = `
76+
WITH base_score AS (
77+
SELECT CAST(AVG(CASE WHEN health = 0 THEN 100 ELSE 0 END) as INTEGER) as score
78+
FROM component
79+
),
80+
deductions AS (
81+
SELECT
82+
SUM(CASE
83+
WHEN kind = 'Certificate' AND health = 2 THEN 10
84+
WHEN namespace = 'kube-system' AND health = 2 THEN 20
85+
WHEN kind = 'PersistentVolume' AND health = 2 THEN 10
86+
WHEN (namespace = 'istio-system' OR name LIKE '%coredns%' OR name LIKE '%aws-cni%') AND health = 2 THEN 50
87+
WHEN (namespace LIKE '%ingress%' OR namespace LIKE '%traefik%') AND kind = 'Service' AND health = 2 THEN 50
88+
ELSE 0
89+
END) as total_deductions
90+
FROM component
91+
)
92+
SELECT MAX(0, (SELECT score FROM base_score) - (SELECT COALESCE(total_deductions, 0) FROM deductions)) as score
93+
`
7694

7795
nodeStatistics = `
7896
SELECT node, COUNT(*)

0 commit comments

Comments
 (0)