Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add basic reconciler metrics #689

Merged
merged 6 commits into from
Sep 19, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -297,3 +297,9 @@ redhat-certificated-image-build:
redhat-certificated-image-push: ## Push the bundle image.
echo $(OPERATOR_IMG)
$(MAKE) image-push IMG=$(OPERATOR_IMG)

##@ Generate the metrics documentation
.PHONY: generate-metricsdocs
generate-metricsdocs:
mkdir -p $(shell pwd)/docs/monitoring
go run -ldflags="${LDFLAGS}" ./pkg/monitoring/metricsdocs > docs/monitoring/metrics.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,10 @@ spec:
app.kubernetes.io/name: {{ template "function-mesh-operator.name" . }}
app.kubernetes.io/instance: {{ .Release.Name }}
app.kubernetes.io/component: controller-manager
annotations:
prometheus.io/scrape: "true"
prometheus.io/path: "/metrics"
prometheus.io/port: "{{ .Values.controllerManager.metrics.port }}"
spec:
{{- if .Values.controllerManager.serviceAccount }}
serviceAccount: {{ .Values.controllerManager.serviceAccount }}
Expand All @@ -38,12 +42,15 @@ spec:
resources:
{{ toYaml .Values.controllerManager.resources | indent 12 }}
{{- end }}
{{- if .Values.admissionWebhook.enabled }}
ports:
{{- if .Values.admissionWebhook.enabled }}
- containerPort: 9443
name: webhook-server
protocol: TCP
{{- end}}
- containerPort: {{ .Values.controllerManager.metrics.port }}
name: http-metrics
protocol: TCP
command:
- /manager
args:
Expand All @@ -63,6 +70,18 @@ spec:
value: {{ .Values.admissionWebhook.enabled | quote }}
volumeMounts:
{{- include "function-mesh-operator.volumeMounts" . | nindent 8 }}
livenessProbe:
httpGet:
path: /healthz
port: {{ .Values.controllerManager.healthProbe.port }}
initialDelaySeconds: 15
periodSeconds: 20
readinessProbe:
httpGet:
path: /readyz
port: {{ .Values.controllerManager.healthProbe.port }}
initialDelaySeconds: 5
periodSeconds: 10
{{- with .Values.controllerManager.nodeSelector }}
nodeSelector:
{{ toYaml . | indent 8 }}
Expand All @@ -79,4 +98,4 @@ spec:
priorityClassName: {{ .Values.controllerManager.priorityClassName }}
{{- end }}
volumes:
{{- include "function-mesh-operator.volumes" . | nindent 6 -}}
{{- include "function-mesh-operator.volumes" . | nindent 6 -}}
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
apiVersion: v1
kind: Service
metadata:
name: function-mesh-controller-manager-metrics-service
namespace: {{ .Release.Namespace }}
labels:
{{- include "function-mesh-operator.labels" . | nindent 4 }}
spec:
ports:
- name: http-metrics
port: {{ .Values.controllerManager.metrics.port }}
protocol: TCP
targetPort: {{ .Values.controllerManager.metrics.port }}
selector:
app.kubernetes.io/instance: {{ .Release.Name }}
app.kubernetes.io/component: controller-manager
3 changes: 3 additions & 0 deletions config/default/kustomization.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,9 @@ patchesStrategicMerge:
# 'CERTMANAGER' needs to be enabled to use ca injection
- webhookcainjection_patch.yaml

# [PROMETHEUS] To enable prometheus monitor, uncomment all sections with 'PROMETHEUS'.
- manager_metrics_patch.yaml

# the following config is for teaching kustomize how to do var substitution
vars:
# [CERTMANAGER] To enable cert-manager, uncomment all sections with 'CERTMANAGER' prefix.
Expand Down
19 changes: 19 additions & 0 deletions config/default/manager_metrics_patch.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: controller-manager
namespace: system
spec:
template:
metadata:
annotations:
prometheus.io/scrape: "true"
prometheus.io/path: "/metrics"
prometheus.io/port: "8080"
spec:
containers:
- name: manager
ports:
- containerPort: 8080
name: http-metrics
protocol: TCP
12 changes: 12 additions & 0 deletions config/manager/manager.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -42,5 +42,17 @@ spec:
requests:
cpu: 100m
memory: 20Mi
livenessProbe:
httpGet:
path: /healthz
port: 8000
initialDelaySeconds: 15
periodSeconds: 20
readinessProbe:
httpGet:
path: /readyz
port: 8000
initialDelaySeconds: 5
periodSeconds: 10
serviceAccountName: controller-manager
terminationGracePeriodSeconds: 10
12 changes: 12 additions & 0 deletions controllers/function_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@ package controllers

import (
"context"
"time"

"github.com/streamnative/function-mesh/pkg/monitoring"

v1 "k8s.io/api/batch/v1"
"k8s.io/client-go/rest"
Expand Down Expand Up @@ -63,6 +66,15 @@ type FunctionReconciler struct {
func (r *FunctionReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
_ = r.Log.WithValues("function", req.NamespacedName)

startTime := time.Now()

defer func() {
monitoring.FunctionMeshControllerReconcileCount.WithLabelValues("function", req.NamespacedName.Name,
req.NamespacedName.Namespace).Inc()
monitoring.FunctionMeshControllerReconcileLatency.WithLabelValues("function", req.NamespacedName.Name,
req.NamespacedName.Namespace).Observe(float64(time.Since(startTime).Milliseconds()))
}()

// your logic here
function := &v1alpha1.Function{}
err := r.Get(ctx, req.NamespacedName, function)
Expand Down
12 changes: 12 additions & 0 deletions controllers/sink_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@ package controllers

import (
"context"
"time"

"github.com/streamnative/function-mesh/pkg/monitoring"

v1 "k8s.io/api/batch/v1"
"k8s.io/client-go/rest"
Expand Down Expand Up @@ -62,6 +65,15 @@ type SinkReconciler struct {
func (r *SinkReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
_ = r.Log.WithValues("sink", req.NamespacedName)

startTime := time.Now()

defer func() {
monitoring.FunctionMeshControllerReconcileCount.WithLabelValues("sink", req.NamespacedName.Name,
req.NamespacedName.Namespace).Inc()
monitoring.FunctionMeshControllerReconcileLatency.WithLabelValues("sink", req.NamespacedName.Name,
req.NamespacedName.Namespace).Observe(float64(time.Since(startTime).Milliseconds()))
}()

// your logic here
sink := &v1alpha1.Sink{}
err := r.Get(ctx, req.NamespacedName, sink)
Expand Down
12 changes: 12 additions & 0 deletions controllers/source_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@ package controllers

import (
"context"
"time"

"github.com/streamnative/function-mesh/pkg/monitoring"

"github.com/go-logr/logr"
"github.com/streamnative/function-mesh/api/compute/v1alpha1"
Expand Down Expand Up @@ -62,6 +65,15 @@ type SourceReconciler struct {
func (r *SourceReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
_ = r.Log.WithValues("source", req.NamespacedName)

startTime := time.Now()

defer func() {
monitoring.FunctionMeshControllerReconcileCount.WithLabelValues("source", req.NamespacedName.Name,
req.NamespacedName.Namespace).Inc()
monitoring.FunctionMeshControllerReconcileLatency.WithLabelValues("source", req.NamespacedName.Name,
req.NamespacedName.Namespace).Observe(float64(time.Since(startTime).Milliseconds()))
}()

// your logic here
source := &v1alpha1.Source{}
err := r.Get(ctx, req.NamespacedName, source)
Expand Down
13 changes: 13 additions & 0 deletions docs/monitoring/metrics.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# Operator Metrics
This document aims to help users that are not familiar with metrics exposed by this operator.
The metrics documentation is auto-generated by the utility tool "pkg/monitoring/metricsdocs" and reflects all of the metrics that are exposed by the operator.

## Operator Metrics List
### function_mesh_reconcile_count
Number of reconcile operations Type: Counter.
### function_mesh_reconcile_latency
Latency of reconcile operations, bucket boundaries are 10ms, 100ms, 1s, 10s, 30s and 60s. Type: Histogram.
## Developing new metrics
After developing new metrics or changing old ones, please run "make generate-metricsdocs" to regenerate this document.

If you feel that the new metric doesn't follow these rules, please change "pkg/monitoring/metricsdocs" according to your needs.
15 changes: 15 additions & 0 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,10 @@ import (
"os"
"strconv"

"sigs.k8s.io/controller-runtime/pkg/healthz"

"github.com/streamnative/function-mesh/pkg/monitoring"

"github.com/go-logr/logr"
computev1alpha1 "github.com/streamnative/function-mesh/api/compute/v1alpha1"
"github.com/streamnative/function-mesh/controllers"
Expand Down Expand Up @@ -53,6 +57,8 @@ func init() {
utilruntime.Must(clientgoscheme.AddToScheme(scheme))

utilruntime.Must(computev1alpha1.AddToScheme(scheme))

monitoring.RegisterMetrics()
// +kubebuilder:scaffold:scheme
}

Expand Down Expand Up @@ -203,6 +209,15 @@ func main() {
}
// +kubebuilder:scaffold:builder

if err := mgr.AddHealthzCheck("healthz", healthz.Ping); err != nil {
setupLog.Error(err, "unable to set up health check")
os.Exit(1)
}
if err := mgr.AddReadyzCheck("readyz", healthz.Ping); err != nil {
setupLog.Error(err, "unable to set up ready check")
os.Exit(1)
}

setupLog.Info("starting manager")
if err := mgr.Start(ctrl.SetupSignalHandler()); err != nil {
setupLog.Error(err, "problem running manager")
Expand Down
50 changes: 50 additions & 0 deletions manifests/crd.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,8 @@ spec:
type: string
downloaderImage:
type: string
filebeatImage:
type: string
forwardSourceMessageProperty:
type: boolean
funcConfig:
Expand Down Expand Up @@ -256,6 +258,11 @@ spec:
type: object
logTopic:
type: string
logTopicAgent:
enum:
- runtime
- sidecar
type: string
maxMessageRetry:
format: int32
type: integer
Expand Down Expand Up @@ -3623,6 +3630,8 @@ spec:
type: string
downloaderImage:
type: string
filebeatImage:
type: string
golang:
properties:
go:
Expand Down Expand Up @@ -3817,6 +3826,13 @@ spec:
required:
- jar
type: object
logTopic:
type: string
logTopicAgent:
enum:
- runtime
- sidecar
type: string
maxMessageRetry:
format: int32
type: integer
Expand Down Expand Up @@ -6929,6 +6945,8 @@ spec:
type: string
downloaderImage:
type: string
filebeatImage:
type: string
forwardSourceMessageProperty:
type: boolean
golang:
Expand Down Expand Up @@ -7053,6 +7071,13 @@ spec:
required:
- jar
type: object
logTopic:
type: string
logTopicAgent:
enum:
- runtime
- sidecar
type: string
maxReplicas:
format: int32
type: integer
Expand Down Expand Up @@ -10300,6 +10325,8 @@ spec:
type: string
downloaderImage:
type: string
filebeatImage:
type: string
forwardSourceMessageProperty:
type: boolean
funcConfig:
Expand Down Expand Up @@ -10501,6 +10528,11 @@ spec:
type: object
logTopic:
type: string
logTopicAgent:
enum:
- runtime
- sidecar
type: string
maxMessageRetry:
format: int32
type: integer
Expand Down Expand Up @@ -13941,6 +13973,8 @@ spec:
type: string
downloaderImage:
type: string
filebeatImage:
type: string
golang:
properties:
go:
Expand Down Expand Up @@ -14135,6 +14169,13 @@ spec:
required:
- jar
type: object
logTopic:
type: string
logTopicAgent:
enum:
- runtime
- sidecar
type: string
maxMessageRetry:
format: int32
type: integer
Expand Down Expand Up @@ -17320,6 +17361,8 @@ spec:
type: string
downloaderImage:
type: string
filebeatImage:
type: string
forwardSourceMessageProperty:
type: boolean
golang:
Expand Down Expand Up @@ -17444,6 +17487,13 @@ spec:
required:
- jar
type: object
logTopic:
type: string
logTopicAgent:
enum:
- runtime
- sidecar
type: string
maxReplicas:
format: int32
type: integer
Expand Down
Loading