Skip to content

Commit 5f85df6

Browse files
committed
Support prometheus metrics
Following metrics added: certmanager_csi_certificate_request_expiration_timestamp_seconds certmanager_csi_certificate_request_ready_status certmanager_csi_certificate_request_renewal_timestamp_seconds certmanager_csi_driver_issue_call_count certmanager_csi_driver_issue_error_count certmanager_csi_managed_certificate_count certmanager_csi_managed_volume_count fixes: #60 Signed-off-by: Jing Liu <[email protected]>
1 parent c552f3a commit 5f85df6

9 files changed

+951
-18
lines changed

go.mod

+2-1
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ require (
77
github.com/container-storage-interface/spec v1.10.0
88
github.com/go-logr/logr v1.4.2
99
github.com/kubernetes-csi/csi-lib-utils v0.19.0
10+
github.com/prometheus/client_golang v1.20.4
1011
github.com/stretchr/testify v1.9.0
1112
google.golang.org/grpc v1.66.2
1213
k8s.io/apimachinery v0.31.1
@@ -42,6 +43,7 @@ require (
4243
github.com/josharian/intern v1.0.0 // indirect
4344
github.com/json-iterator/go v1.1.12 // indirect
4445
github.com/klauspost/compress v1.17.9 // indirect
46+
github.com/kylelemons/godebug v1.1.0 // indirect
4547
github.com/mailru/easyjson v0.7.7 // indirect
4648
github.com/moby/sys/mountinfo v0.7.2 // indirect
4749
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
@@ -51,7 +53,6 @@ require (
5153
github.com/opencontainers/runtime-spec v1.2.0 // indirect
5254
github.com/pkg/errors v0.9.1 // indirect
5355
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect
54-
github.com/prometheus/client_golang v1.20.4 // indirect
5556
github.com/prometheus/client_model v0.6.1 // indirect
5657
github.com/prometheus/common v0.59.1 // indirect
5758
github.com/prometheus/procfs v0.15.1 // indirect

manager/manager.go

+46-7
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ import (
4747
internalapi "github.com/cert-manager/csi-lib/internal/api"
4848
internalapiutil "github.com/cert-manager/csi-lib/internal/api/util"
4949
"github.com/cert-manager/csi-lib/metadata"
50+
"github.com/cert-manager/csi-lib/metrics"
5051
"github.com/cert-manager/csi-lib/storage"
5152
)
5253

@@ -89,6 +90,9 @@ type Options struct {
8990

9091
// RenewalBackoffConfig configures the exponential backoff applied to certificate renewal failures.
9192
RenewalBackoffConfig *wait.Backoff
93+
94+
// Metrics is used for exposing Prometheus metrics
95+
Metrics *metrics.Metrics
9296
}
9397

9498
// NewManager constructs a new manager used to manage volumes containing
@@ -126,6 +130,9 @@ func NewManager(opts Options) (*Manager, error) {
126130
if opts.Log == nil {
127131
return nil, errors.New("log must be set")
128132
}
133+
if opts.Metrics == nil {
134+
opts.Metrics = metrics.New(opts.Log)
135+
}
129136
if opts.MetadataReader == nil {
130137
return nil, errors.New("MetadataReader must be set")
131138
}
@@ -241,6 +248,7 @@ func NewManager(opts Options) (*Manager, error) {
241248
metadataReader: opts.MetadataReader,
242249
clock: opts.Clock,
243250
log: *opts.Log,
251+
metrics: opts.Metrics,
244252

245253
generatePrivateKey: opts.GeneratePrivateKey,
246254
generateRequest: opts.GenerateRequest,
@@ -375,6 +383,9 @@ type Manager struct {
375383
// No thread safety is added around this field, and it MUST NOT be used for any implementation logic.
376384
// It should not be used full-stop :).
377385
doNotUse_CallOnEachIssue func()
386+
387+
// metrics is used to expose Prometheus
388+
metrics *metrics.Metrics
378389
}
379390

380391
// issue will step through the entire issuance flow for a volume.
@@ -387,6 +398,9 @@ func (m *Manager) issue(ctx context.Context, volumeID string) error {
387398
log := m.log.WithValues("volume_id", volumeID)
388399
log.Info("Processing issuance")
389400

401+
// Increase issue count
402+
m.metrics.IncrementIssueCallCount(m.nodeNameHash, volumeID)
403+
390404
if err := m.cleanupStaleRequests(ctx, log, volumeID); err != nil {
391405
return fmt.Errorf("cleaning up stale requests: %w", err)
392406
}
@@ -594,7 +608,7 @@ func (m *Manager) handleRequest(ctx context.Context, volumeID string, meta metad
594608
// Calculate the default next issuance time.
595609
// The implementation's writeKeypair function may override this value before
596610
// writing to the storage layer.
597-
renewalPoint, err := calculateNextIssuanceTime(req.Status.Certificate)
611+
expiryPoint, renewalPoint, err := getExpiryAndDefaultNextIssuanceTime(req.Status.Certificate)
598612
if err != nil {
599613
return fmt.Errorf("calculating next issuance time: %w", err)
600614
}
@@ -606,6 +620,10 @@ func (m *Manager) handleRequest(ctx context.Context, volumeID string, meta metad
606620
}
607621
log.V(2).Info("Wrote new keypair to storage")
608622

623+
// Update the request metrics.
624+
// Using meta.NextIssuanceTime instead of renewalPoint here, in case writeKeypair overrides the value.
625+
m.metrics.UpdateCertificateRequest(req, expiryPoint, *meta.NextIssuanceTime)
626+
609627
// We must explicitly delete the private key from the pending requests map so that the existing Completed
610628
// request will not be re-used upon renewal.
611629
// Without this, the renewal would pick up the existing issued certificate and re-issue, rather than requesting
@@ -657,6 +675,9 @@ func (m *Manager) cleanupStaleRequests(ctx context.Context, log logr.Logger, vol
657675
}
658676
}
659677

678+
// Remove the CertificateRequest from the metrics.
679+
m.metrics.RemoveCertificateRequest(toDelete.Name, toDelete.Namespace)
680+
660681
log.Info("Deleted CertificateRequest resource", "name", toDelete.Name, "namespace", toDelete.Namespace)
661682
}
662683

@@ -756,6 +777,8 @@ func (m *Manager) ManageVolumeImmediate(ctx context.Context, volumeID string) (m
756777
// If issuance fails, immediately return without retrying so the caller can decide
757778
// how to proceed depending on the context this method was called within.
758779
if err := m.issue(ctx, volumeID); err != nil {
780+
// Increase issue error count
781+
m.metrics.IncrementIssueErrorCount(m.nodeNameHash, volumeID)
759782
return true, err
760783
}
761784
}
@@ -783,6 +806,8 @@ func (m *Manager) manageVolumeIfNotManaged(volumeID string) (managed bool) {
783806
// construct a new channel used to stop management of the volume
784807
stopCh := make(chan struct{})
785808
m.managedVolumes[volumeID] = stopCh
809+
// Increase managed volume count for this driver
810+
m.metrics.IncrementManagedVolumeCount(m.nodeNameHash)
786811

787812
return true
788813
}
@@ -800,6 +825,10 @@ func (m *Manager) startRenewalRoutine(volumeID string) (started bool) {
800825
return false
801826
}
802827

828+
// Increase managed certificate count for this driver.
829+
// We assume each volume will have one certificate to be managed.
830+
m.metrics.IncrementManagedCertificateCount(m.nodeNameHash)
831+
803832
// Create a context that will be cancelled when the stopCh is closed
804833
ctx, cancel := context.WithCancel(context.Background())
805834
go func() {
@@ -835,6 +864,8 @@ func (m *Manager) startRenewalRoutine(volumeID string) (started bool) {
835864
defer issueCancel()
836865
if err := m.issue(issueCtx, volumeID); err != nil {
837866
log.Error(err, "Failed to issue certificate, retrying after applying exponential backoff")
867+
// Increase issue error count
868+
m.metrics.IncrementIssueErrorCount(m.nodeNameHash, volumeID)
838869
return false, nil
839870
}
840871
return true, nil
@@ -874,6 +905,14 @@ func (m *Manager) UnmanageVolume(volumeID string) {
874905
if stopCh, ok := m.managedVolumes[volumeID]; ok {
875906
close(stopCh)
876907
delete(m.managedVolumes, volumeID)
908+
if reqs, err := m.listAllRequestsForVolume(volumeID); err == nil {
909+
// Remove the CertificateRequest from the metrics with the best effort.
910+
for _, req := range reqs {
911+
if req != nil {
912+
m.metrics.RemoveCertificateRequest(req.Name, req.Namespace)
913+
}
914+
}
915+
}
877916
}
878917
}
879918

@@ -919,19 +958,19 @@ func (m *Manager) Stop() {
919958
}
920959
}
921960

922-
// calculateNextIssuanceTime will return the default time at which the certificate
923-
// should be renewed by the driver- 2/3rds through its lifetime (NotAfter -
924-
// NotBefore).
925-
func calculateNextIssuanceTime(chain []byte) (time.Time, error) {
961+
// getExpiryAndDefaultNextIssuanceTime will return the certificate expiry time, together with
962+
// default time at which the certificate should be renewed by the driver- 2/3rds through its
963+
// lifetime (NotAfter - NotBefore).
964+
func getExpiryAndDefaultNextIssuanceTime(chain []byte) (time.Time, time.Time, error) {
926965
block, _ := pem.Decode(chain)
927966
crt, err := x509.ParseCertificate(block.Bytes)
928967
if err != nil {
929-
return time.Time{}, fmt.Errorf("parsing issued certificate: %w", err)
968+
return time.Time{}, time.Time{}, fmt.Errorf("parsing issued certificate: %w", err)
930969
}
931970

932971
actualDuration := crt.NotAfter.Sub(crt.NotBefore)
933972

934973
renewBeforeNotAfter := actualDuration / 3
935974

936-
return crt.NotAfter.Add(-renewBeforeNotAfter), nil
975+
return crt.NotAfter, crt.NotAfter.Add(-renewBeforeNotAfter), nil
937976
}

manager/manager_test.go

+10-7
Original file line numberDiff line numberDiff line change
@@ -454,7 +454,7 @@ func TestManager_cleanupStaleRequests(t *testing.T) {
454454
}
455455
}
456456

457-
func Test_calculateNextIssuanceTime(t *testing.T) {
457+
func Test_getExpiryAndDefaultNextIssuanceTime(t *testing.T) {
458458
notBefore := time.Date(1970, time.January, 1, 0, 0, 0, 0, time.UTC)
459459
notAfter := time.Date(1970, time.January, 4, 0, 0, 0, 0, time.UTC)
460460
pk, err := rsa.GenerateKey(rand.Reader, 2048)
@@ -474,20 +474,23 @@ func Test_calculateNextIssuanceTime(t *testing.T) {
474474
certPEM := pem.EncodeToMemory(&pem.Block{Type: "CERTIFICATE", Bytes: derBytes})
475475

476476
tests := map[string]struct {
477-
expTime time.Time
478-
expErr bool
477+
expTime time.Time
478+
renewTime time.Time
479+
expErr bool
479480
}{
480481
"if no attributes given, return 2/3rd certificate lifetime": {
481-
expTime: notBefore.AddDate(0, 0, 2),
482-
expErr: false,
482+
expTime: notAfter,
483+
renewTime: notBefore.AddDate(0, 0, 2),
484+
expErr: false,
483485
},
484486
}
485487

486488
for name, test := range tests {
487489
t.Run(name, func(t *testing.T) {
488-
renewTime, err := calculateNextIssuanceTime(certPEM)
490+
expTime, renewTime, err := getExpiryAndDefaultNextIssuanceTime(certPEM)
489491
assert.Equal(t, test.expErr, err != nil)
490-
assert.Equal(t, test.expTime, renewTime)
492+
assert.Equal(t, test.expTime, expTime)
493+
assert.Equal(t, test.renewTime, renewTime)
491494
})
492495
}
493496
}

metrics/certificaterequest.go

+102
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
/*
2+
Copyright 2024 The cert-manager Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package metrics
18+
19+
import (
20+
"time"
21+
22+
"github.com/prometheus/client_golang/prometheus"
23+
24+
cmapi "github.com/cert-manager/cert-manager/pkg/apis/certmanager/v1"
25+
cmmeta "github.com/cert-manager/cert-manager/pkg/apis/meta/v1"
26+
)
27+
28+
var readyConditionStatuses = [...]cmmeta.ConditionStatus{
29+
cmmeta.ConditionTrue,
30+
cmmeta.ConditionFalse,
31+
cmmeta.ConditionUnknown,
32+
}
33+
34+
// UpdateCertificateRequest will update the given CertificateRequest's metrics for its expiry, renewal, and status condition.
35+
func (m *Metrics) UpdateCertificateRequest(cr *cmapi.CertificateRequest, exp, renewal time.Time) {
36+
m.updateCertificateRequestExpiryAndRenewalTime(cr, exp, renewal)
37+
m.updateCertificateRequestStatus(cr)
38+
}
39+
40+
// updateCertificateRequestExpiryAndRenewalTime updates the expiry and renewal time of a certificate request
41+
func (m *Metrics) updateCertificateRequestExpiryAndRenewalTime(cr *cmapi.CertificateRequest, exp, renewal time.Time) {
42+
expiryTime := 0.0
43+
if !exp.IsZero() {
44+
expiryTime = float64(exp.Unix())
45+
}
46+
m.certificateRequestExpiryTimeSeconds.With(prometheus.Labels{
47+
"name": cr.Name,
48+
"namespace": cr.Namespace,
49+
"issuer_name": cr.Spec.IssuerRef.Name,
50+
"issuer_kind": cr.Spec.IssuerRef.Kind,
51+
"issuer_group": cr.Spec.IssuerRef.Group}).Set(expiryTime)
52+
53+
renewalTime := 0.0
54+
if !renewal.IsZero() {
55+
renewalTime = float64(renewal.Unix())
56+
}
57+
m.certificateRequestRenewalTimeSeconds.With(prometheus.Labels{
58+
"name": cr.Name,
59+
"namespace": cr.Namespace,
60+
"issuer_name": cr.Spec.IssuerRef.Name,
61+
"issuer_kind": cr.Spec.IssuerRef.Kind,
62+
"issuer_group": cr.Spec.IssuerRef.Group}).Set(renewalTime)
63+
}
64+
65+
// updateCertificateRequestStatus will update the metric for that Certificate Request
66+
func (m *Metrics) updateCertificateRequestStatus(cr *cmapi.CertificateRequest) {
67+
for _, c := range cr.Status.Conditions {
68+
if c.Type == cmapi.CertificateRequestConditionReady {
69+
m.updateCertificateRequestReadyStatus(cr, c.Status)
70+
return
71+
}
72+
}
73+
74+
// If no status condition set yet, set to Unknown
75+
m.updateCertificateRequestReadyStatus(cr, cmmeta.ConditionUnknown)
76+
}
77+
78+
func (m *Metrics) updateCertificateRequestReadyStatus(cr *cmapi.CertificateRequest, current cmmeta.ConditionStatus) {
79+
for _, condition := range readyConditionStatuses {
80+
value := 0.0
81+
82+
if current == condition {
83+
value = 1.0
84+
}
85+
86+
m.certificateRequestReadyStatus.With(prometheus.Labels{
87+
"name": cr.Name,
88+
"namespace": cr.Namespace,
89+
"condition": string(condition),
90+
"issuer_name": cr.Spec.IssuerRef.Name,
91+
"issuer_kind": cr.Spec.IssuerRef.Kind,
92+
"issuer_group": cr.Spec.IssuerRef.Group,
93+
}).Set(value)
94+
}
95+
}
96+
97+
// RemoveCertificateRequest will delete the CertificateRequest metrics from continuing to be exposed.
98+
func (m *Metrics) RemoveCertificateRequest(name, namespace string) {
99+
m.certificateRequestExpiryTimeSeconds.DeletePartialMatch(prometheus.Labels{"name": name, "namespace": namespace})
100+
m.certificateRequestRenewalTimeSeconds.DeletePartialMatch(prometheus.Labels{"name": name, "namespace": namespace})
101+
m.certificateRequestReadyStatus.DeletePartialMatch(prometheus.Labels{"name": name, "namespace": namespace})
102+
}

0 commit comments

Comments
 (0)