Skip to content

Commit fe82465

Browse files
committed
Prevent multiple services from being evaluated at the same time
1 parent ab73c46 commit fe82465

File tree

7 files changed

+36
-32
lines changed

7 files changed

+36
-32
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ You can specify a custom path by setting the `GATUS_CONFIG_FILE` environment var
1818
metrics: true # Whether to expose metrics at /metrics
1919
services:
2020
- name: twinnation # Name of your service, can be anything
21-
url: https://twinnation.org/actuator/health
21+
url: https://twinnation.org/health
2222
interval: 15s # Duration to wait between every status check (opt. default: 10s)
2323
conditions:
2424
- "[STATUS] == 200"

config.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
metrics: true
22
services:
33
- name: Twinnation
4-
url: https://twinnation.org/actuator/health
4+
url: https://twinnation.org/health
55
interval: 30s
66
conditions:
77
- "[STATUS] == 200"

core/types.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ func (service *Service) EvaluateConditions() *Result {
7676
result.Success = false
7777
}
7878
for _, condition := range service.Conditions {
79-
success := condition.Evaluate(result)
79+
success := condition.evaluate(result)
8080
if !success {
8181
result.Success = false
8282
}
@@ -93,7 +93,7 @@ type ConditionResult struct {
9393

9494
type Condition string
9595

96-
func (c *Condition) Evaluate(result *Result) bool {
96+
func (c *Condition) evaluate(result *Result) bool {
9797
condition := string(*c)
9898
if strings.Contains(condition, "==") {
9999
parts := sanitizeAndResolve(strings.Split(condition, "=="), result)

core/types_test.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ import (
77
func TestEvaluateWithIp(t *testing.T) {
88
condition := Condition("[IP] == 127.0.0.1")
99
result := &Result{Ip: "127.0.0.1"}
10-
condition.Evaluate(result)
10+
condition.evaluate(result)
1111
if !result.ConditionResults[0].Success {
1212
t.Errorf("Condition '%s' should have been a success", condition)
1313
}
@@ -16,7 +16,7 @@ func TestEvaluateWithIp(t *testing.T) {
1616
func TestEvaluateWithStatus(t *testing.T) {
1717
condition := Condition("[STATUS] == 201")
1818
result := &Result{HttpStatus: 201}
19-
condition.Evaluate(result)
19+
condition.evaluate(result)
2020
if !result.ConditionResults[0].Success {
2121
t.Errorf("Condition '%s' should have been a success", condition)
2222
}
@@ -25,7 +25,7 @@ func TestEvaluateWithStatus(t *testing.T) {
2525
func TestEvaluateWithFailure(t *testing.T) {
2626
condition := Condition("[STATUS] == 200")
2727
result := &Result{HttpStatus: 500}
28-
condition.Evaluate(result)
28+
condition.evaluate(result)
2929
if result.ConditionResults[0].Success {
3030
t.Errorf("Condition '%s' should have been a failure", condition)
3131
}

example/docker-compose-grafana-prometheus/config.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
metrics: true
22
services:
33
- name: TwiNNatioN
4-
url: https://twinnation.org/actuator/health
4+
url: https://twinnation.org/health
55
interval: 10s
66
conditions:
77
- "[STATUS] == 200"

example/kubernetes/gatus.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ data:
44
metrics: true
55
services:
66
- name: TwiNNatioN
7-
url: https://twinnation.org/actuator/health
7+
url: https://twinnation.org/health
88
interval: 1m
99
conditions:
1010
- "[STATUS] == 200"

watchdog/watchdog.go

Lines changed: 27 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -20,28 +20,32 @@ func GetServiceResults() *map[string][]*core.Result {
2020

2121
func Monitor(cfg *config.Config) {
2222
for _, service := range cfg.Services {
23-
go func(service *core.Service) {
24-
for {
25-
log.Printf("[watchdog][Monitor] Monitoring serviceName=%s", service.Name)
26-
result := service.EvaluateConditions()
27-
metric.PublishMetricsForService(service, result)
28-
rwLock.Lock()
29-
serviceResults[service.Name] = append(serviceResults[service.Name], result)
30-
if len(serviceResults[service.Name]) > 20 {
31-
serviceResults[service.Name] = serviceResults[service.Name][1:]
32-
}
33-
rwLock.Unlock()
34-
log.Printf(
35-
"[watchdog][Monitor] Finished monitoring serviceName=%s; errors=%d; requestDuration=%s",
36-
service.Name,
37-
len(result.Errors),
38-
result.Duration.Round(time.Millisecond),
39-
)
40-
log.Printf("[watchdog][Monitor] Waiting interval=%s before monitoring serviceName=%s", service.Interval, service.Name)
41-
time.Sleep(service.Interval)
42-
}
43-
}(service)
44-
// To prevent multiple requests from running exactly at the same time
45-
time.Sleep(100 * time.Millisecond)
23+
go monitor(service)
24+
// To prevent multiple requests from running at the same time
25+
time.Sleep(500 * time.Millisecond)
26+
}
27+
}
28+
29+
func monitor(service *core.Service) {
30+
for {
31+
// By placing the lock here, we prevent multiple services from being monitored at the exact same time, which
32+
// could cause performance issues and return inaccurate results
33+
rwLock.Lock()
34+
log.Printf("[watchdog][Monitor] Monitoring serviceName=%s", service.Name)
35+
result := service.EvaluateConditions()
36+
metric.PublishMetricsForService(service, result)
37+
serviceResults[service.Name] = append(serviceResults[service.Name], result)
38+
if len(serviceResults[service.Name]) > 20 {
39+
serviceResults[service.Name] = serviceResults[service.Name][1:]
40+
}
41+
rwLock.Unlock()
42+
log.Printf(
43+
"[watchdog][Monitor] Finished monitoring serviceName=%s; errors=%d; requestDuration=%s",
44+
service.Name,
45+
len(result.Errors),
46+
result.Duration.Round(time.Millisecond),
47+
)
48+
log.Printf("[watchdog][Monitor] Waiting interval=%s before monitoring serviceName=%s", service.Interval, service.Name)
49+
time.Sleep(service.Interval)
4650
}
4751
}

0 commit comments

Comments
 (0)