Skip to content

Commit 6283872

Browse files
committed
WIP: prometheus_exporter iteration 9
- More cleanup - Removed Gauges for each status of playbooks and tasks, they were not useful once understanding how to use Summaries and generated a lot of needless metrics in hindsight - Added a package extra for [prometheus] - First iteration of docs - Add first iteration of grafana dashboard
1 parent 7558a6f commit 6283872

11 files changed

+2008
-34
lines changed

ara/cli/prometheus.py

+21-34
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@
3939
DEFAULT_HOST_LABELS = ["name", "playbook", "updated"]
4040

4141

42-
# TODO: This method should be more flexible and live in a library
42+
# TODO: This could be made more flexible and live in a library
4343
def get_search_results(client, kind, limit, created_after):
4444
"""
4545
kind: string, one of ["playbooks", "hosts", "tasks"]
@@ -72,31 +72,22 @@ def __init__(self, client, log, limit, labels=DEFAULT_PLAYBOOK_LABELS):
7272
self.labels = labels
7373

7474
self.metrics = {
75-
"completed": Gauge("ara_playbooks_completed", "Completed Ansible playbooks", labels),
76-
"expired": Gauge("ara_playbooks_expired", "Expired Ansible playbooks", labels),
77-
"failed": Gauge("ara_playbooks_failed", "Failed Ansible playbooks", labels),
7875
"range": Gauge("ara_playbooks_range", "Limit metric collection to the N most recent playbooks"),
79-
"running": Gauge("ara_playbooks_running", "Running Ansible playbooks", labels),
8076
"total": Gauge("ara_playbooks_total", "Total number of playbooks recorded by ara"),
81-
"duration": Summary("ara_playbooks_duration", "Duration (in seconds) of playbooks recorded by ara", labels),
77+
"playbooks": Summary(
78+
"ara_playbooks", "Labels and duration (in seconds) of playbooks recorded by ara", labels
79+
),
8280
}
81+
self.metrics["range"].set(self.limit)
8382

84-
def collect_metrics(self, created_after=None, limit=1000):
85-
self.metrics["range"].set(limit)
86-
83+
def collect_metrics(self, created_after=None):
8784
playbooks = get_search_results(self.client, "playbooks", self.limit, created_after)
8885
# Save the most recent timestamp so we only scrape beyond it next time
8986
if playbooks:
9087
created_after = cli_utils.increment_timestamp(playbooks[0]["created"])
9188
self.log.info(f"updating metrics for {len(playbooks)} playbooks...")
9289

9390
for playbook in playbooks:
94-
self.metrics["total"].inc()
95-
96-
# Gather the values of each label so we can attach them to our metrics
97-
labels = {label: playbook[label] for label in self.labels}
98-
self.metrics[playbook["status"]].labels(**labels).inc()
99-
10091
# The API returns a duration in string format, convert it back to seconds
10192
# so we can use it as a value for the metric.
10293
if playbook["duration"] is not None:
@@ -108,7 +99,12 @@ def collect_metrics(self, created_after=None, limit=1000):
10899
seconds = 0
109100
else:
110101
seconds = 0
111-
self.metrics["duration"].labels(**labels).observe(seconds)
102+
103+
# Gather the values of each label so we can attach them to our metrics
104+
labels = {label: playbook[label] for label in self.labels}
105+
106+
self.metrics["playbooks"].labels(**labels).observe(seconds)
107+
self.metrics["total"].inc()
112108

113109
return created_after
114110

@@ -121,33 +117,20 @@ def __init__(self, client, log, limit, labels=DEFAULT_TASK_LABELS):
121117
self.labels = labels
122118

123119
self.metrics = {
124-
"completed": Gauge("ara_tasks_completed", "Completed Ansible tasks", labels),
125-
"expired": Gauge("ara_tasks_expired", "Expired Ansible tasks", labels),
126-
"failed": Gauge("ara_tasks_failed", "Failed Ansible tasks", labels),
127120
"range": Gauge("ara_tasks_range", "Limit metric collection to the N most recent tasks"),
128-
"running": Gauge("ara_tasks_running", "Running Ansible tasks", labels),
129121
"total": Gauge("ara_tasks_total", "Number of tasks recorded by ara in prometheus"),
130-
"duration": Summary(
131-
"ara_tasks_duration", "Duration, in seconds, of playbook tasks recorded by ara", labels
132-
),
122+
"tasks": Summary("ara_tasks", "Labels and duration, in seconds, of playbook tasks recorded by ara", labels),
133123
}
134-
135-
def collect_metrics(self, created_after=None):
136124
self.metrics["range"].set(self.limit)
137125

126+
def collect_metrics(self, created_after=None):
138127
tasks = get_search_results(self.client, "tasks", self.limit, created_after)
139128
# Save the most recent timestamp so we only scrape beyond it next time
140129
if tasks:
141130
created_after = cli_utils.increment_timestamp(tasks[0]["created"])
142131
self.log.info(f"updating metrics for {len(tasks)} tasks...")
143132

144133
for task in tasks:
145-
self.metrics["total"].inc()
146-
147-
# Gather the values of each label so we can attach them to our metrics
148-
labels = {label: task[label] for label in self.labels}
149-
self.metrics[task["status"]].labels(**labels).inc()
150-
151134
# The API returns a duration in string format, convert it back to seconds
152135
# so we can use it as a value for the metric.
153136
if task["duration"] is not None:
@@ -159,7 +142,12 @@ def collect_metrics(self, created_after=None):
159142
seconds = 0
160143
else:
161144
seconds = 0
162-
self.metrics["duration"].labels(**labels).observe(seconds)
145+
146+
# Gather the values of each label so we can attach them to our metrics
147+
labels = {label: task[label] for label in self.labels}
148+
149+
self.metrics["tasks"].labels(**labels).observe(seconds)
150+
self.metrics["total"].inc()
163151

164152
return created_after
165153

@@ -180,10 +168,9 @@ def __init__(self, client, log, limit, labels=DEFAULT_HOST_LABELS):
180168
"total": Gauge("ara_hosts_total", "Hosts recorded by ara"),
181169
"unreachable": Gauge("ara_hosts_unreachable", "Number of unreachable errors on a host", labels),
182170
}
183-
184-
def collect_metrics(self, created_after=None):
185171
self.metrics["range"].set(self.limit)
186172

173+
def collect_metrics(self, created_after=None):
187174
hosts = get_search_results(self.client, "hosts", self.limit, created_after)
188175
# Save the most recent timestamp so we only scrape beyond it next time
189176
if hosts:

0 commit comments

Comments
 (0)