Skip to content

Commit ce8a360

Browse files
feat(metrics): add metrics (#79)
* include and expose default metrics * update dependencies * add metrics for scheduled lifetime tasks and for the docker registry client * add more metrics * add metrics to deploymentmanager * add metrics to deploymentmanager * add more metrics to the k8s parts * add websocket metrics * fix metric names * release name is human-readable Co-authored-by: Tom Schoener <[email protected]>
1 parent bfd295a commit ce8a360

26 files changed

+805
-362
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ development versions of your software into Kubernetes via Helm to allow everybod
2929
* You need Helm charts for each project you want to deploy. The charts need to be hosted in a chart registry.
3030
* Currently we support standard Helm chart registries and Helm GCS
3131
* The Docker image tag and the image pull policy need to be configurable
32-
* O-Neko works with kubernetes versions 1.10.0 - 1.22.1 (these versions are officially supported by the Kubernetes client library we use)
32+
* O-Neko works with kubernetes versions 1.10.0 - 1.23.3 (these versions are *officially* supported by the Kubernetes client library we use)
3333

3434
## How does it work?
3535

pom.xml

Lines changed: 20 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -23,11 +23,11 @@
2323
<properties>
2424
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
2525
<project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
26-
<spring.boot.version>2.4.1</spring.boot.version>
27-
<feign.version>10.11</feign.version>
28-
<org.mapstruct.version>1.4.1.Final</org.mapstruct.version>
29-
<org.projectlombok.lombok.version>1.18.16</org.projectlombok.lombok.version>
30-
<springdoc.version>1.5.10</springdoc.version>
26+
<spring.boot.version>2.7.0</spring.boot.version>
27+
<feign.version>11.8</feign.version>
28+
<org.mapstruct.version>1.5.2.Final</org.mapstruct.version>
29+
<org.projectlombok.lombok.version>1.18.24</org.projectlombok.lombok.version>
30+
<springdoc.version>1.6.9</springdoc.version>
3131
<branchName></branchName>
3232
<buildNumber></buildNumber>
3333
</properties>
@@ -69,6 +69,11 @@
6969
<groupId>org.springframework.boot</groupId>
7070
<artifactId>spring-boot-starter-security</artifactId>
7171
</dependency>
72+
<dependency>
73+
<groupId>io.micrometer</groupId>
74+
<artifactId>micrometer-registry-prometheus</artifactId>
75+
<scope>runtime</scope>
76+
</dependency>
7277
<dependency>
7378
<groupId>io.github.openfeign</groupId>
7479
<artifactId>feign-core</artifactId>
@@ -115,7 +120,7 @@
115120
<dependency>
116121
<groupId>com.github.ben-manes.caffeine</groupId>
117122
<artifactId>caffeine</artifactId>
118-
<version>2.8.8</version>
123+
<version>3.1.1</version>
119124
</dependency>
120125
<dependency>
121126
<groupId>org.projectlombok</groupId>
@@ -127,7 +132,7 @@
127132
<dependency>
128133
<groupId>org.apache.commons</groupId>
129134
<artifactId>commons-lang3</artifactId>
130-
<version>3.11</version>
135+
<version>3.12.0</version>
131136
</dependency>
132137
<dependency>
133138
<groupId>org.apache.commons</groupId>
@@ -142,34 +147,34 @@
142147
<dependency>
143148
<groupId>commons-io</groupId>
144149
<artifactId>commons-io</artifactId>
145-
<version>2.8.0</version>
150+
<version>2.11.0</version>
146151
</dependency>
147152
<dependency>
148153
<groupId>com.google.code.gson</groupId>
149154
<artifactId>gson</artifactId>
150-
<version>2.8.9</version>
155+
<version>2.9.0</version>
151156
<scope>compile</scope>
152157
</dependency>
153158
<dependency>
154159
<groupId>org.yaml</groupId>
155160
<artifactId>snakeyaml</artifactId>
156-
<version>1.27</version>
161+
<version>1.30</version>
157162
</dependency>
158163
<dependency>
159164
<groupId>de.flapdoodle.embed</groupId>
160165
<artifactId>de.flapdoodle.embed.mongo</artifactId>
161-
<version>3.0.0</version>
166+
<version>3.4.6</version>
162167
<scope>test</scope>
163168
</dependency>
164169
<dependency>
165170
<groupId>com.google.guava</groupId>
166171
<artifactId>guava</artifactId>
167-
<version>30.1.1-jre</version>
172+
<version>31.1-jre</version>
168173
</dependency>
169174
<dependency>
170175
<groupId>io.fabric8</groupId>
171176
<artifactId>kubernetes-client</artifactId>
172-
<version>5.8.0</version>
177+
<version>5.12.2</version>
173178
</dependency>
174179
<dependency>
175180
<groupId>org.mapstruct</groupId>
@@ -179,7 +184,7 @@
179184
<dependency>
180185
<groupId>org.assertj</groupId>
181186
<artifactId>assertj-core</artifactId>
182-
<version>3.18.1</version>
187+
<version>3.23.1</version>
183188
<scope>test</scope>
184189
</dependency>
185190
<dependency>
@@ -190,7 +195,7 @@
190195
<dependency>
191196
<groupId>net.logstash.logback</groupId>
192197
<artifactId>logstash-logback-encoder</artifactId>
193-
<version>6.6</version>
198+
<version>7.2</version>
194199
</dependency>
195200
<dependency>
196201
<groupId>org.springdoc</groupId>

src/main/java/io/oneko/automations/ScheduledLifetimeController.java

Lines changed: 54 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,7 @@
11
package io.oneko.automations;
22

3-
import static io.oneko.util.MoreStructuredArguments.projectKv;
4-
import static io.oneko.util.MoreStructuredArguments.versionKv;
5-
import static net.logstash.logback.argument.StructuredArguments.kv;
3+
import static io.oneko.util.MoreStructuredArguments.*;
4+
import static net.logstash.logback.argument.StructuredArguments.*;
65

76
import java.util.List;
87
import java.util.Optional;
@@ -15,52 +14,86 @@
1514
import org.springframework.scheduling.annotation.Scheduled;
1615
import org.springframework.stereotype.Component;
1716

17+
import io.micrometer.core.instrument.MeterRegistry;
18+
import io.micrometer.core.instrument.Timer;
1819
import io.oneko.kubernetes.DeploymentManager;
1920
import io.oneko.kubernetes.deployments.DeployableStatus;
2021
import io.oneko.kubernetes.deployments.Deployment;
2122
import io.oneko.kubernetes.deployments.DeploymentRepository;
23+
import io.oneko.metrics.MetricNameBuilder;
2224
import io.oneko.project.ProjectRepository;
2325
import io.oneko.project.ProjectVersion;
2426
import io.oneko.project.ReadableProject;
2527
import io.oneko.project.WritableProjectVersion;
26-
import lombok.AllArgsConstructor;
2728
import lombok.extern.slf4j.Slf4j;
2829

2930
@Component
3031
@Slf4j
31-
@AllArgsConstructor
3232
public class ScheduledLifetimeController {
3333

3434
private final LifetimeBehaviourService lifetimeBehaviourService;
3535
private final ProjectRepository projectRepository;
3636
private final DeploymentRepository deploymentRepository;
3737
private final DeploymentManager deploymentManager;
3838

39+
private final Timer scheduledProjectCheckTimer;
40+
private final Timer expiredDeploymentStopTimer;
41+
private final Timer retrieveExpiredDeploymentsTimer;
42+
43+
public ScheduledLifetimeController(LifetimeBehaviourService lifetimeBehaviourService,
44+
ProjectRepository projectRepository,
45+
DeploymentRepository deploymentRepository,
46+
DeploymentManager deploymentManager,
47+
MeterRegistry meterRegistry) {
48+
this.lifetimeBehaviourService = lifetimeBehaviourService;
49+
this.projectRepository = projectRepository;
50+
this.deploymentRepository = deploymentRepository;
51+
this.deploymentManager = deploymentManager;
52+
53+
this.scheduledProjectCheckTimer = Timer.builder(new MetricNameBuilder().durationOf("lifetime.scheduled.checkProjects").build())
54+
.description("the time it takes O-Neko to check all projects for versions which have a lifetime configuration which needs to be checked")
55+
.publishPercentileHistogram()
56+
.register(meterRegistry);
57+
this.retrieveExpiredDeploymentsTimer = Timer.builder(new MetricNameBuilder().durationOf("lifetime.scheduled.deployments.retrieveExpired").build())
58+
.description("the time it takes O-Neko to filter and retrieve expired deployments")
59+
.publishPercentileHistogram()
60+
.register(meterRegistry);
61+
this.expiredDeploymentStopTimer = Timer.builder(new MetricNameBuilder().durationOf("lifetime.scheduled.deployments.stopExpired").build())
62+
.description("the time it takes O-Neko to stop an individual expired deployment")
63+
.publishPercentileHistogram()
64+
.register(meterRegistry);
65+
}
66+
3967
@Scheduled(fixedRate = 5 * 60000)
4068
public void checkProjects() {
41-
final List<ProjectVersion<?,?>> versions = projectRepository.getAll().stream()
69+
final var sample = Timer.start();
70+
final List<ProjectVersion<?, ?>> versions = projectRepository.getAll().stream()
4271
.map(ReadableProject::writable)
4372
.flatMap(project -> project.getVersions().stream())
4473
.filter(this::shouldConsiderVersion)
4574
.collect(Collectors.toList());
46-
75+
sample.stop(scheduledProjectCheckTimer);
4776
stopExpiredDeployments(versions,
4877
projectVersion -> log.info("deployment expired ({}, {})", versionKv(projectVersion), projectKv(projectVersion.getProject())));
4978
}
5079

51-
private void stopExpiredDeployments(List<ProjectVersion<?,?>> deployables, Consumer<ProjectVersion<?,?>> beforeStopDeployment) {
80+
private void stopExpiredDeployments(List<ProjectVersion<?, ?>> deployables, Consumer<ProjectVersion<?, ?>> beforeStopDeployment) {
81+
final Timer.Sample retrieveDeploymentsStart = Timer.start();
5282
final var deployments = getRelevantDeploymentsFor(deployables);
5383
final var expiredPairsOfDeployableAndDeployment = getExpiredPairsOfDeployableAndDeployment(deployables, deployments);
54-
55-
expiredPairsOfDeployableAndDeployment.forEach(expiredVersionDeploymentPair -> {
56-
final var projectVersion = expiredVersionDeploymentPair.getLeft();
57-
beforeStopDeployment.accept(projectVersion);
58-
if (projectVersion instanceof WritableProjectVersion) {
59-
deploymentManager.stopDeployment((WritableProjectVersion) projectVersion);
60-
} else {
61-
log.error("stopping is not supported ({})", kv("class_name", projectVersion.getClass()));
62-
}
63-
});
84+
retrieveDeploymentsStart.stop(retrieveExpiredDeploymentsTimer);
85+
86+
expiredPairsOfDeployableAndDeployment.forEach(expiredDeploymentStopTimer.record(() ->
87+
expiredVersionDeploymentPair -> {
88+
final var projectVersion = expiredVersionDeploymentPair.getLeft();
89+
beforeStopDeployment.accept(projectVersion);
90+
if (projectVersion instanceof WritableProjectVersion) {
91+
deploymentManager.stopDeployment((WritableProjectVersion) projectVersion);
92+
} else {
93+
log.error("stopping is not supported ({})", kv("class_name", projectVersion.getClass()));
94+
}
95+
})
96+
);
6497
}
6598

6699
private boolean shouldConsiderVersion(ProjectVersion<?, ?> version) {
@@ -72,14 +105,14 @@ private boolean shouldConsider(Optional<LifetimeBehaviour> behaviour) {
72105
return behaviour.isPresent() && !behaviour.get().isInfinite();
73106
}
74107

75-
private List<Deployment> getRelevantDeploymentsFor(List<ProjectVersion<?,?>> deployables) {
108+
private List<Deployment> getRelevantDeploymentsFor(List<ProjectVersion<?, ?>> deployables) {
76109
final var uuids = deployables.stream().map(ProjectVersion::getId).collect(Collectors.toSet());
77110
return deploymentRepository.findAllByProjectVersionIdIn(uuids).stream()
78111
.filter(deployment -> !deployment.getStatus().equals(DeployableStatus.NotScheduled))
79112
.collect(Collectors.toList());
80113
}
81114

82-
private Set<Pair<ProjectVersion<?,?>, Deployment>> getExpiredPairsOfDeployableAndDeployment(List<ProjectVersion<?,?>> versions, List<Deployment> deployments) {
115+
private Set<Pair<ProjectVersion<?, ?>, Deployment>> getExpiredPairsOfDeployableAndDeployment(List<ProjectVersion<?, ?>> versions, List<Deployment> deployments) {
83116
var combiningFunction = createExpiredDeployableDeploymentCombiningFunction(versions);
84117
return deployments.stream()
85118
.map(combiningFunction)
@@ -89,7 +122,7 @@ private Set<Pair<ProjectVersion<?,?>, Deployment>> getExpiredPairsOfDeployableAn
89122
}
90123

91124
//what a method name
92-
private Function<Deployment, Optional<Pair<ProjectVersion<?,?>, Deployment>>> createExpiredDeployableDeploymentCombiningFunction(List<ProjectVersion<?,?>> deployables) {
125+
private Function<Deployment, Optional<Pair<ProjectVersion<?, ?>, Deployment>>> createExpiredDeployableDeploymentCombiningFunction(List<ProjectVersion<?, ?>> deployables) {
93126
return (deployment) -> {
94127

95128
final var matchingDeployableOptional = deployables.stream()

src/main/java/io/oneko/docker/DockerRegistryPolling.java

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,8 @@
2424

2525
import com.google.common.collect.Sets;
2626

27+
import io.micrometer.core.instrument.MeterRegistry;
28+
import io.micrometer.core.instrument.Timer;
2729
import io.oneko.docker.event.NewProjectVersionFoundEvent;
2830
import io.oneko.docker.event.ObsoleteProjectVersionRemovedEvent;
2931
import io.oneko.docker.v2.DockerRegistryClientFactory;
@@ -34,6 +36,7 @@
3436
import io.oneko.event.EventTrigger;
3537
import io.oneko.event.ScheduledTask;
3638
import io.oneko.kubernetes.DeploymentManager;
39+
import io.oneko.metrics.MetricNameBuilder;
3740
import io.oneko.project.ProjectRepository;
3841
import io.oneko.project.ProjectVersion;
3942
import io.oneko.project.ReadableProject;
@@ -47,6 +50,8 @@
4750
@Slf4j
4851
class DockerRegistryPolling {
4952

53+
54+
5055
@Data
5156
private static class VersionWithDockerManifest {
5257
private final WritableProjectVersion version;
@@ -63,18 +68,29 @@ private static class VersionWithDockerManifest {
6368
private final EventTrigger asTrigger;
6469
private final ExpiringBucket<UUID> failedManifestRequests = new ExpiringBucket<UUID>(Duration.ofMinutes(5)).concurrent();
6570
private final CurrentEventTrigger currentEventTrigger;
71+
private final Timer pollingJobTimer;
72+
private final Timer updateDatesJobTimer;
6673

6774
DockerRegistryPolling(ProjectRepository projectRepository,
6875
DockerRegistryClientFactory dockerRegistryClientFactory,
6976
DeploymentManager deploymentManager,
7077
EventDispatcher eventDispatcher,
71-
CurrentEventTrigger currentEventTrigger) {
78+
CurrentEventTrigger currentEventTrigger,
79+
MeterRegistry meterRegistry) {
7280
this.projectRepository = projectRepository;
7381
this.dockerRegistryClientFactory = dockerRegistryClientFactory;
7482
this.deploymentManager = deploymentManager;
7583
this.eventDispatcher = eventDispatcher;
7684
this.currentEventTrigger = currentEventTrigger;
7785
this.asTrigger = new ScheduledTask("Docker Registry Polling");
86+
this.pollingJobTimer = Timer.builder(new MetricNameBuilder().durationOf("docker.registry.polling.pollingJob").build())
87+
.description("the duration of the docker polling job")
88+
.publishPercentileHistogram()
89+
.register(meterRegistry);
90+
this.updateDatesJobTimer = Timer.builder(new MetricNameBuilder().durationOf("docker.registry.polling.updateDatesJob").build())
91+
.description("the duration of the image date update job")
92+
.publishPercentileHistogram()
93+
.register(meterRegistry);
7894
}
7995

8096
@Scheduled(fixedDelay = 20000, initialDelay = 10000)
@@ -99,6 +115,7 @@ protected void updateAndRedeployAllIfRequired() {
99115
}
100116

101117
log.trace("finished polling job ({})", kv("duration_millis", stopWatch.getTime()));
118+
pollingJobTimer.record(Duration.ofMillis(stopWatch.getTime()));
102119
}
103120
}
104121

@@ -121,6 +138,7 @@ protected void updateDatesForAllImagesAndAllTags() {
121138
}
122139

123140
log.trace("finished updating dates for all projects ({})", kv("duration_millis", stopWatch.getTime()));
141+
updateDatesJobTimer.record(Duration.ofMillis(stopWatch.getTime()));
124142
}
125143

126144
/**

0 commit comments

Comments
 (0)