@@ -47,6 +47,7 @@ import (
4747 internalapi "github.com/cert-manager/csi-lib/internal/api"
4848 internalapiutil "github.com/cert-manager/csi-lib/internal/api/util"
4949 "github.com/cert-manager/csi-lib/metadata"
50+ "github.com/cert-manager/csi-lib/metrics"
5051 "github.com/cert-manager/csi-lib/storage"
5152)
5253
@@ -89,6 +90,9 @@ type Options struct {
8990
9091 // RenewalBackoffConfig configures the exponential backoff applied to certificate renewal failures.
9192 RenewalBackoffConfig * wait.Backoff
93+
94+ // Metrics is used for exposing Prometheus metrics
95+ Metrics * metrics.Metrics
9296}
9397
9498// NewManager constructs a new manager used to manage volumes containing
@@ -126,6 +130,9 @@ func NewManager(opts Options) (*Manager, error) {
126130 if opts .Log == nil {
127131 return nil , errors .New ("log must be set" )
128132 }
133+ if opts .Metrics == nil {
134+ opts .Metrics = metrics .New (opts .Log )
135+ }
129136 if opts .MetadataReader == nil {
130137 return nil , errors .New ("MetadataReader must be set" )
131138 }
@@ -241,6 +248,7 @@ func NewManager(opts Options) (*Manager, error) {
241248 metadataReader : opts .MetadataReader ,
242249 clock : opts .Clock ,
243250 log : * opts .Log ,
251+ metrics : opts .Metrics ,
244252
245253 generatePrivateKey : opts .GeneratePrivateKey ,
246254 generateRequest : opts .GenerateRequest ,
@@ -375,6 +383,9 @@ type Manager struct {
375383 // No thread safety is added around this field, and it MUST NOT be used for any implementation logic.
376384 // It should not be used full-stop :).
377385 doNotUse_CallOnEachIssue func ()
386+
387+ // metrics is used to expose Prometheus
388+ metrics * metrics.Metrics
378389}
379390
380391// issue will step through the entire issuance flow for a volume.
@@ -387,6 +398,9 @@ func (m *Manager) issue(ctx context.Context, volumeID string) error {
387398 log := m .log .WithValues ("volume_id" , volumeID )
388399 log .Info ("Processing issuance" )
389400
401+ // Increase issue count
402+ m .metrics .IncrementIssueCallCount (m .nodeNameHash , volumeID )
403+
390404 if err := m .cleanupStaleRequests (ctx , log , volumeID ); err != nil {
391405 return fmt .Errorf ("cleaning up stale requests: %w" , err )
392406 }
@@ -594,7 +608,7 @@ func (m *Manager) handleRequest(ctx context.Context, volumeID string, meta metad
594608 // Calculate the default next issuance time.
595609 // The implementation's writeKeypair function may override this value before
596610 // writing to the storage layer.
597- renewalPoint , err := calculateNextIssuanceTime (req .Status .Certificate )
611+ expiryPoint , renewalPoint , err := getExpiryAndDefaultNextIssuanceTime (req .Status .Certificate )
598612 if err != nil {
599613 return fmt .Errorf ("calculating next issuance time: %w" , err )
600614 }
@@ -606,6 +620,10 @@ func (m *Manager) handleRequest(ctx context.Context, volumeID string, meta metad
606620 }
607621 log .V (2 ).Info ("Wrote new keypair to storage" )
608622
623+ // Update the request metrics.
624+ // Using meta.NextIssuanceTime instead of renewalPoint here, in case writeKeypair overrides the value.
625+ m .metrics .UpdateCertificateRequest (req , expiryPoint , * meta .NextIssuanceTime )
626+
609627 // We must explicitly delete the private key from the pending requests map so that the existing Completed
610628 // request will not be re-used upon renewal.
611629 // Without this, the renewal would pick up the existing issued certificate and re-issue, rather than requesting
@@ -657,6 +675,9 @@ func (m *Manager) cleanupStaleRequests(ctx context.Context, log logr.Logger, vol
657675 }
658676 }
659677
678+ // Remove the CertificateRequest from the metrics.
679+ m .metrics .RemoveCertificateRequest (toDelete .Name , toDelete .Namespace )
680+
660681 log .Info ("Deleted CertificateRequest resource" , "name" , toDelete .Name , "namespace" , toDelete .Namespace )
661682 }
662683
@@ -756,6 +777,8 @@ func (m *Manager) ManageVolumeImmediate(ctx context.Context, volumeID string) (m
756777 // If issuance fails, immediately return without retrying so the caller can decide
757778 // how to proceed depending on the context this method was called within.
758779 if err := m .issue (ctx , volumeID ); err != nil {
780+ // Increase issue error count
781+ m .metrics .IncrementIssueErrorCount (m .nodeNameHash , volumeID )
759782 return true , err
760783 }
761784 }
@@ -783,6 +806,8 @@ func (m *Manager) manageVolumeIfNotManaged(volumeID string) (managed bool) {
783806 // construct a new channel used to stop management of the volume
784807 stopCh := make (chan struct {})
785808 m .managedVolumes [volumeID ] = stopCh
809+ // Increase managed volume count for this driver
810+ m .metrics .IncrementManagedVolumeCount (m .nodeNameHash )
786811
787812 return true
788813}
@@ -800,6 +825,10 @@ func (m *Manager) startRenewalRoutine(volumeID string) (started bool) {
800825 return false
801826 }
802827
828+ // Increase managed certificate count for this driver.
829+ // We assume each volume will have one certificate to be managed.
830+ m .metrics .IncrementManagedCertificateCount (m .nodeNameHash )
831+
803832 // Create a context that will be cancelled when the stopCh is closed
804833 ctx , cancel := context .WithCancel (context .Background ())
805834 go func () {
@@ -835,6 +864,8 @@ func (m *Manager) startRenewalRoutine(volumeID string) (started bool) {
835864 defer issueCancel ()
836865 if err := m .issue (issueCtx , volumeID ); err != nil {
837866 log .Error (err , "Failed to issue certificate, retrying after applying exponential backoff" )
867+ // Increase issue error count
868+ m .metrics .IncrementIssueErrorCount (m .nodeNameHash , volumeID )
838869 return false , nil
839870 }
840871 return true , nil
@@ -874,6 +905,14 @@ func (m *Manager) UnmanageVolume(volumeID string) {
874905 if stopCh , ok := m .managedVolumes [volumeID ]; ok {
875906 close (stopCh )
876907 delete (m .managedVolumes , volumeID )
908+ if reqs , err := m .listAllRequestsForVolume (volumeID ); err == nil {
909+ // Remove the CertificateRequest from the metrics with the best effort.
910+ for _ , req := range reqs {
911+ if req != nil {
912+ m .metrics .RemoveCertificateRequest (req .Name , req .Namespace )
913+ }
914+ }
915+ }
877916 }
878917}
879918
@@ -919,19 +958,19 @@ func (m *Manager) Stop() {
919958 }
920959}
921960
922- // calculateNextIssuanceTime will return the default time at which the certificate
923- // should be renewed by the driver- 2/3rds through its lifetime (NotAfter -
924- // NotBefore).
925- func calculateNextIssuanceTime (chain []byte ) (time.Time , error ) {
961+ // getExpiryAndDefaultNextIssuanceTime will return the certificate expiry time, together with
962+ // default time at which the certificate should be renewed by the driver- 2/3rds through its
963+ // lifetime (NotAfter - NotBefore).
964+ func getExpiryAndDefaultNextIssuanceTime (chain []byte ) (time. Time , time.Time , error ) {
926965 block , _ := pem .Decode (chain )
927966 crt , err := x509 .ParseCertificate (block .Bytes )
928967 if err != nil {
929- return time.Time {}, fmt .Errorf ("parsing issued certificate: %w" , err )
968+ return time.Time {}, time. Time {}, fmt .Errorf ("parsing issued certificate: %w" , err )
930969 }
931970
932971 actualDuration := crt .NotAfter .Sub (crt .NotBefore )
933972
934973 renewBeforeNotAfter := actualDuration / 3
935974
936- return crt .NotAfter .Add (- renewBeforeNotAfter ), nil
975+ return crt .NotAfter , crt . NotAfter .Add (- renewBeforeNotAfter ), nil
937976}
0 commit comments