@@ -47,6 +47,7 @@ import (
47
47
internalapi "github.com/cert-manager/csi-lib/internal/api"
48
48
internalapiutil "github.com/cert-manager/csi-lib/internal/api/util"
49
49
"github.com/cert-manager/csi-lib/metadata"
50
+ "github.com/cert-manager/csi-lib/metrics"
50
51
"github.com/cert-manager/csi-lib/storage"
51
52
)
52
53
@@ -89,6 +90,9 @@ type Options struct {
89
90
90
91
// RenewalBackoffConfig configures the exponential backoff applied to certificate renewal failures.
91
92
RenewalBackoffConfig * wait.Backoff
93
+
94
+ // Metrics is used for exposing Prometheus metrics
95
+ Metrics * metrics.Metrics
92
96
}
93
97
94
98
// NewManager constructs a new manager used to manage volumes containing
@@ -126,6 +130,9 @@ func NewManager(opts Options) (*Manager, error) {
126
130
if opts .Log == nil {
127
131
return nil , errors .New ("log must be set" )
128
132
}
133
+ if opts .Metrics == nil {
134
+ opts .Metrics = metrics .New (opts .Log )
135
+ }
129
136
if opts .MetadataReader == nil {
130
137
return nil , errors .New ("MetadataReader must be set" )
131
138
}
@@ -241,6 +248,7 @@ func NewManager(opts Options) (*Manager, error) {
241
248
metadataReader : opts .MetadataReader ,
242
249
clock : opts .Clock ,
243
250
log : * opts .Log ,
251
+ metrics : opts .Metrics ,
244
252
245
253
generatePrivateKey : opts .GeneratePrivateKey ,
246
254
generateRequest : opts .GenerateRequest ,
@@ -375,6 +383,9 @@ type Manager struct {
375
383
// No thread safety is added around this field, and it MUST NOT be used for any implementation logic.
376
384
// It should not be used full-stop :).
377
385
doNotUse_CallOnEachIssue func ()
386
+
387
+ // metrics is used to expose Prometheus
388
+ metrics * metrics.Metrics
378
389
}
379
390
380
391
// issue will step through the entire issuance flow for a volume.
@@ -387,6 +398,9 @@ func (m *Manager) issue(ctx context.Context, volumeID string) error {
387
398
log := m .log .WithValues ("volume_id" , volumeID )
388
399
log .Info ("Processing issuance" )
389
400
401
+ // Increase issue count
402
+ m .metrics .IncrementIssueCallCount (m .nodeNameHash , volumeID )
403
+
390
404
if err := m .cleanupStaleRequests (ctx , log , volumeID ); err != nil {
391
405
return fmt .Errorf ("cleaning up stale requests: %w" , err )
392
406
}
@@ -594,7 +608,7 @@ func (m *Manager) handleRequest(ctx context.Context, volumeID string, meta metad
594
608
// Calculate the default next issuance time.
595
609
// The implementation's writeKeypair function may override this value before
596
610
// writing to the storage layer.
597
- renewalPoint , err := calculateNextIssuanceTime (req .Status .Certificate )
611
+ expiryPoint , renewalPoint , err := getExpiryAndDefaultNextIssuanceTime (req .Status .Certificate )
598
612
if err != nil {
599
613
return fmt .Errorf ("calculating next issuance time: %w" , err )
600
614
}
@@ -606,6 +620,10 @@ func (m *Manager) handleRequest(ctx context.Context, volumeID string, meta metad
606
620
}
607
621
log .V (2 ).Info ("Wrote new keypair to storage" )
608
622
623
+ // Update the request metrics.
624
+ // Using meta.NextIssuanceTime instead of renewalPoint here, in case writeKeypair overrides the value.
625
+ m .metrics .UpdateCertificateRequest (req , expiryPoint , * meta .NextIssuanceTime )
626
+
609
627
// We must explicitly delete the private key from the pending requests map so that the existing Completed
610
628
// request will not be re-used upon renewal.
611
629
// Without this, the renewal would pick up the existing issued certificate and re-issue, rather than requesting
@@ -657,6 +675,9 @@ func (m *Manager) cleanupStaleRequests(ctx context.Context, log logr.Logger, vol
657
675
}
658
676
}
659
677
678
+ // Remove the CertificateRequest from the metrics.
679
+ m .metrics .RemoveCertificateRequest (toDelete .Name , toDelete .Namespace )
680
+
660
681
log .Info ("Deleted CertificateRequest resource" , "name" , toDelete .Name , "namespace" , toDelete .Namespace )
661
682
}
662
683
@@ -756,6 +777,8 @@ func (m *Manager) ManageVolumeImmediate(ctx context.Context, volumeID string) (m
756
777
// If issuance fails, immediately return without retrying so the caller can decide
757
778
// how to proceed depending on the context this method was called within.
758
779
if err := m .issue (ctx , volumeID ); err != nil {
780
+ // Increase issue error count
781
+ m .metrics .IncrementIssueErrorCount (m .nodeNameHash , volumeID )
759
782
return true , err
760
783
}
761
784
}
@@ -783,6 +806,8 @@ func (m *Manager) manageVolumeIfNotManaged(volumeID string) (managed bool) {
783
806
// construct a new channel used to stop management of the volume
784
807
stopCh := make (chan struct {})
785
808
m .managedVolumes [volumeID ] = stopCh
809
+ // Increase managed volume count for this driver
810
+ m .metrics .IncrementManagedVolumeCount (m .nodeNameHash )
786
811
787
812
return true
788
813
}
@@ -800,6 +825,10 @@ func (m *Manager) startRenewalRoutine(volumeID string) (started bool) {
800
825
return false
801
826
}
802
827
828
+ // Increase managed certificate count for this driver.
829
+ // We assume each volume will have one certificate to be managed.
830
+ m .metrics .IncrementManagedCertificateCount (m .nodeNameHash )
831
+
803
832
// Create a context that will be cancelled when the stopCh is closed
804
833
ctx , cancel := context .WithCancel (context .Background ())
805
834
go func () {
@@ -835,6 +864,8 @@ func (m *Manager) startRenewalRoutine(volumeID string) (started bool) {
835
864
defer issueCancel ()
836
865
if err := m .issue (issueCtx , volumeID ); err != nil {
837
866
log .Error (err , "Failed to issue certificate, retrying after applying exponential backoff" )
867
+ // Increase issue error count
868
+ m .metrics .IncrementIssueErrorCount (m .nodeNameHash , volumeID )
838
869
return false , nil
839
870
}
840
871
return true , nil
@@ -874,6 +905,14 @@ func (m *Manager) UnmanageVolume(volumeID string) {
874
905
if stopCh , ok := m .managedVolumes [volumeID ]; ok {
875
906
close (stopCh )
876
907
delete (m .managedVolumes , volumeID )
908
+ if reqs , err := m .listAllRequestsForVolume (volumeID ); err == nil {
909
+ // Remove the CertificateRequest from the metrics with the best effort.
910
+ for _ , req := range reqs {
911
+ if req != nil {
912
+ m .metrics .RemoveCertificateRequest (req .Name , req .Namespace )
913
+ }
914
+ }
915
+ }
877
916
}
878
917
}
879
918
@@ -919,19 +958,19 @@ func (m *Manager) Stop() {
919
958
}
920
959
}
921
960
922
- // calculateNextIssuanceTime will return the default time at which the certificate
923
- // should be renewed by the driver- 2/3rds through its lifetime (NotAfter -
924
- // NotBefore).
925
- func calculateNextIssuanceTime (chain []byte ) (time.Time , error ) {
961
+ // getExpiryAndDefaultNextIssuanceTime will return the certificate expiry time, together with
962
+ // default time at which the certificate should be renewed by the driver- 2/3rds through its
963
+ // lifetime (NotAfter - NotBefore).
964
+ func getExpiryAndDefaultNextIssuanceTime (chain []byte ) (time. Time , time.Time , error ) {
926
965
block , _ := pem .Decode (chain )
927
966
crt , err := x509 .ParseCertificate (block .Bytes )
928
967
if err != nil {
929
- return time.Time {}, fmt .Errorf ("parsing issued certificate: %w" , err )
968
+ return time.Time {}, time. Time {}, fmt .Errorf ("parsing issued certificate: %w" , err )
930
969
}
931
970
932
971
actualDuration := crt .NotAfter .Sub (crt .NotBefore )
933
972
934
973
renewBeforeNotAfter := actualDuration / 3
935
974
936
- return crt .NotAfter .Add (- renewBeforeNotAfter ), nil
975
+ return crt .NotAfter , crt . NotAfter .Add (- renewBeforeNotAfter ), nil
937
976
}
0 commit comments