Skip to content
45 changes: 45 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2541,3 +2541,48 @@ go install github.com/TwiN/gatus/v5@latest

### High level design overview
![Gatus diagram](.github/assets/gatus-diagram.jpg)

### Certificate Monitoring

Gatus supports monitoring SSL/TLS certificates for expiration. For endpoints using TLS (including HTTPS and STARTTLS),
Gatus will check the expiration of all certificates in the certificate chain, including:
- The leaf (end-entity) certificate
- Any intermediate certificates
- The root certificate

You can use the `[CERTIFICATE_EXPIRATION]` placeholder in your conditions to check the expiration time of the leaf certificate:

```
metrics: true

endpoints:
- name: google-cert-chain
url: https://google.com
interval: 1m
conditions:
- "[CERTIFICATE_EXPIRATION] > 48h"
- "[CONNECTED] == true"

- name: gmail-starttls-chain
url: "starttls://smtp.gmail.com:587"
interval: 1m
conditions:
- "[CONNECTED] == true"
- "[CERTIFICATE_EXPIRATION] > 48h"

- name: cloudflare-tls-chain
url: "tls://1.1.1.1:853"
interval: 1m
conditions:
- "[CONNECTED] == true"
- "[CERTIFICATE_EXPIRATION] > 48h"
```

The certificate chain information is also exposed via Prometheus metrics:
- `gatus_results_certificate_expiration_seconds`: Time until leaf certificate expiration
- `gatus_results_certificate_chain_expiration_seconds`: Time until expiration for each certificate in the chain, with labels for subject and issuer

Example PromQL query to alert on any certificate in the chain expiring soon:
```promql
min(gatus_results_certificate_chain_expiration_seconds) by (key, group, name) < 172800 # 48 hours
```
46 changes: 30 additions & 16 deletions client/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -122,53 +122,67 @@ func CanCreateSCTPConnection(address string, config *Config) bool {
}
}

// CertificateChainInfo contains information about a TLS certificate chain
type CertificateChainInfo struct {
Connected bool
Chain []*x509.Certificate
Error error
}

// CanPerformStartTLS checks whether a connection can be established to an address using the STARTTLS protocol
func CanPerformStartTLS(address string, config *Config) (connected bool, certificate *x509.Certificate, err error) {
func CanPerformStartTLS(address string, config *Config) CertificateChainInfo {
hostAndPort := strings.Split(address, ":")
if len(hostAndPort) != 2 {
return false, nil, errors.New("invalid address for starttls, format must be host:port")
return CertificateChainInfo{Error: errors.New("invalid address for starttls, format must be host:port")}
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't know how I feel about not passing the error as a separate return value

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agree. Looks weird.

}
connection, err := net.DialTimeout("tcp", address, config.Timeout)
if err != nil {
return
return CertificateChainInfo{Error: err}
}
smtpClient, err := smtp.NewClient(connection, hostAndPort[0])
if err != nil {
return
return CertificateChainInfo{Error: err}
}
err = smtpClient.StartTLS(&tls.Config{
InsecureSkipVerify: config.Insecure,
ServerName: hostAndPort[0],
})
if err != nil {
return
return CertificateChainInfo{Error: err}
}
if state, ok := smtpClient.TLSConnectionState(); ok {
certificate = state.PeerCertificates[0]
} else {
return false, nil, errors.New("could not get TLS connection state")
return CertificateChainInfo{
Connected: true,
Chain: state.PeerCertificates,
}
}
return true, certificate, nil
return CertificateChainInfo{Error: errors.New("could not get TLS connection state")}
}

// CanPerformTLS checks whether a connection can be established to an address using the TLS protocol
func CanPerformTLS(address string, config *Config) (connected bool, certificate *x509.Certificate, err error) {
func CanPerformTLS(address string, config *Config) CertificateChainInfo {
connection, err := tls.DialWithDialer(&net.Dialer{Timeout: config.Timeout}, "tcp", address, &tls.Config{
InsecureSkipVerify: config.Insecure,
})
if err != nil {
return
return CertificateChainInfo{Error: err}
}
defer connection.Close()
verifiedChains := connection.ConnectionState().VerifiedChains

state := connection.ConnectionState()
// If config.Insecure is set to true, verifiedChains will be an empty list []
// We should get the parsed certificates from PeerCertificates, it can't be empty on the client side
// Reference: https://pkg.go.dev/crypto/tls#PeerCertificates
if len(verifiedChains) == 0 || len(verifiedChains[0]) == 0 {
peerCertificates := connection.ConnectionState().PeerCertificates
return true, peerCertificates[0], nil
if len(state.VerifiedChains) == 0 || len(state.VerifiedChains[0]) == 0 {
return CertificateChainInfo{
Connected: true,
Chain: state.PeerCertificates,
}
}
return CertificateChainInfo{
Connected: true,
Chain: state.VerifiedChains[0],
}
return true, verifiedChains[0][0], nil
}

// CanCreateSSHConnection checks whether a connection can be established and a command can be executed to an address
Expand Down
20 changes: 10 additions & 10 deletions client/client_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -154,13 +154,13 @@ func TestCanPerformStartTLS(t *testing.T) {
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
t.Parallel()
connected, _, err := CanPerformStartTLS(tt.args.address, &Config{Insecure: tt.args.insecure, Timeout: 5 * time.Second})
if (err != nil) != tt.wantErr {
t.Errorf("CanPerformStartTLS() err=%v, wantErr=%v", err, tt.wantErr)
info := CanPerformStartTLS(tt.args.address, &Config{Insecure: tt.args.insecure, Timeout: 5 * time.Second})
if (info.Error != nil) != tt.wantErr {
t.Errorf("CanPerformStartTLS() err=%v, wantErr=%v", info.Error, tt.wantErr)
return
}
if connected != tt.wantConnected {
t.Errorf("CanPerformStartTLS() connected=%v, wantConnected=%v", connected, tt.wantConnected)
if info.Connected != tt.wantConnected {
t.Errorf("CanPerformStartTLS() connected=%v, wantConnected=%v", info.Connected, tt.wantConnected)
}
})
}
Expand Down Expand Up @@ -223,13 +223,13 @@ func TestCanPerformTLS(t *testing.T) {
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
t.Parallel()
connected, _, err := CanPerformTLS(tt.args.address, &Config{Insecure: tt.args.insecure, Timeout: 5 * time.Second})
if (err != nil) != tt.wantErr {
t.Errorf("CanPerformTLS() err=%v, wantErr=%v", err, tt.wantErr)
info := CanPerformTLS(tt.args.address, &Config{Insecure: tt.args.insecure, Timeout: 5 * time.Second})
if (info.Error != nil) != tt.wantErr {
t.Errorf("CanPerformTLS() err=%v, wantErr=%v", info.Error, tt.wantErr)
return
}
if connected != tt.wantConnected {
t.Errorf("CanPerformTLS() connected=%v, wantConnected=%v", connected, tt.wantConnected)
if info.Connected != tt.wantConnected {
t.Errorf("CanPerformTLS() connected=%v, wantConnected=%v", info.Connected, tt.wantConnected)
}
})
}
Expand Down
51 changes: 48 additions & 3 deletions config/endpoint/endpoint.go
Original file line number Diff line number Diff line change
Expand Up @@ -354,16 +354,15 @@ func (e *Endpoint) call(result *Result) {
result.Duration = time.Since(startTime)
} else if endpointType == TypeSTARTTLS || endpointType == TypeTLS {
if endpointType == TypeSTARTTLS {
result.Connected, certificate, err = client.CanPerformStartTLS(strings.TrimPrefix(e.URL, "starttls://"), e.ClientConfig)
err = e.evaluateSTARTTLS(result)
} else {
result.Connected, certificate, err = client.CanPerformTLS(strings.TrimPrefix(e.URL, "tls://"), e.ClientConfig)
err = e.evaluateTLS(result)
}
if err != nil {
result.AddError(err.Error())
return
}
result.Duration = time.Since(startTime)
result.CertificateExpiration = time.Until(certificate.NotAfter)
} else if endpointType == TypeTCP {
result.Connected = client.CanCreateTCPConnection(strings.TrimPrefix(e.URL, "tcp://"), e.ClientConfig)
result.Duration = time.Since(startTime)
Expand Down Expand Up @@ -481,3 +480,49 @@ func (e *Endpoint) needsToRetrieveIP() bool {
}
return false
}

func (e *Endpoint) evaluateSTARTTLS(result *Result) error {
if len(e.Body) > 0 {
return errors.New("STARTTLS endpoints do not support body")
}
info := client.CanPerformStartTLS(strings.TrimPrefix(e.URL, "starttls://"), e.ClientConfig)
if info.Error != nil {
return info.Error
}
result.Connected = info.Connected
if len(info.Chain) > 0 {
result.CertificateExpiration = time.Until(info.Chain[0].NotAfter)
for _, cert := range info.Chain {
result.CertificateChain = append(result.CertificateChain, CertificateInfo{
Subject: cert.Subject.String(),
Issuer: cert.Issuer.String(),
NotAfter: cert.NotAfter,
ExpiresIn: time.Until(cert.NotAfter),
})
}
}
return nil
}

func (e *Endpoint) evaluateTLS(result *Result) error {
if len(e.Body) > 0 {
return errors.New("TLS endpoints do not support body")
}
info := client.CanPerformTLS(strings.TrimPrefix(e.URL, "tls://"), e.ClientConfig)
if info.Error != nil {
return info.Error
}
result.Connected = info.Connected
if len(info.Chain) > 0 {
result.CertificateExpiration = time.Until(info.Chain[0].NotAfter)
for _, cert := range info.Chain {
result.CertificateChain = append(result.CertificateChain, CertificateInfo{
Subject: cert.Subject.String(),
Issuer: cert.Issuer.String(),
NotAfter: cert.NotAfter,
ExpiresIn: time.Until(cert.NotAfter),
})
}
}
return nil
}
8 changes: 8 additions & 0 deletions config/endpoint/result.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,14 @@ import (
"time"
)

// CertificateInfo stores information about a certificate in the chain
type CertificateInfo struct {
Subject string `json:"subject"`
Issuer string `json:"issuer"`
NotAfter time.Time `json:"not_after"`
ExpiresIn time.Duration `json:"-"`
}

// Result of the evaluation of a Endpoint
type Result struct {
// HTTPStatus is the HTTP response status code
Expand Down
31 changes: 25 additions & 6 deletions metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,13 @@ const namespace = "gatus" // The prefix of the metrics
var (
initializedMetrics bool // Whether the metrics have been initialized

resultTotal *prometheus.CounterVec
resultDurationSeconds *prometheus.GaugeVec
resultConnectedTotal *prometheus.CounterVec
resultCodeTotal *prometheus.CounterVec
resultCertificateExpirationSeconds *prometheus.GaugeVec
resultEndpointSuccess *prometheus.GaugeVec
resultTotal *prometheus.CounterVec
resultDurationSeconds *prometheus.GaugeVec
resultConnectedTotal *prometheus.CounterVec
resultCodeTotal *prometheus.CounterVec
resultCertificateExpirationSeconds *prometheus.GaugeVec
resultEndpointSuccess *prometheus.GaugeVec
resultCertificateChainExpirationSeconds *prometheus.GaugeVec
)

func initializePrometheusMetrics() {
Expand Down Expand Up @@ -52,6 +53,11 @@ func initializePrometheusMetrics() {
Name: "results_endpoint_success",
Help: "Displays whether or not the endpoint was a success",
}, []string{"key", "group", "name", "type"})
resultCertificateChainExpirationSeconds = promauto.NewGaugeVec(prometheus.GaugeOpts{
Namespace: namespace,
Name: "results_certificate_chain_expiration_seconds",
Help: "Number of seconds until each certificate in the chain expires",
}, []string{"key", "group", "name", "type", "subject", "issuer"})
}

// PublishMetricsForEndpoint publishes metrics for the given endpoint and its result.
Expand Down Expand Up @@ -81,4 +87,17 @@ func PublishMetricsForEndpoint(ep *endpoint.Endpoint, result *endpoint.Result) {
} else {
resultEndpointSuccess.WithLabelValues(ep.Key(), ep.Group, ep.Name, string(endpointType)).Set(0)
}

if len(result.CertificateChain) > 0 {
for _, cert := range result.CertificateChain {
resultCertificateChainExpirationSeconds.WithLabelValues(
ep.Key(),
ep.Group,
ep.Name,
string(endpointType),
cert.Subject,
cert.Issuer,
).Set(cert.ExpiresIn.Seconds())
}
}
}