-
-
Notifications
You must be signed in to change notification settings - Fork 537
WIP: feat: add prometheus role #989
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
46c0514
3659836
16b075e
4ab09f6
f107f90
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -150,6 +150,14 @@ haproxy_compile_requirements: | |
| - ca-certificates | ||
| - libssl-dev | ||
|
|
||
| # node exporter | ||
| node_exporter_version: "1.9.1" | ||
| node_exporter_package_repo: "https://github.com/prometheus/node_exporter/releases/download/v{{ node_exporter_version }}/node_exporter-{{ node_exporter_version }}.linux-{{ prometheus_architecture_map[ansible_architecture] }}.tar.gz" | ||
|
|
||
| # postgres exporter | ||
| postgres_exporter_version: "0.17.1" | ||
| postgres_exporter_package_repo: "https://github.com/prometheus-community/postgres_exporter/releases/download/v{{ postgres_exporter_version }}/postgres_exporter-{{ postgres_exporter_version }}.linux-{{ prometheus_architecture_map[ansible_architecture] }}.tar.gz" | ||
|
|
||
|
Comment on lines
+153
to
+160
Owner
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think |
||
| # ================================================================================================= # | ||
| # Offline installation (if installation_method: "file") | ||
| # | ||
|
|
||
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -720,3 +720,25 @@ netdata_conf: | |||||
| # You can fine-tune retention for each tier by setting a time limit or size limit. Setting a limit to 0 disables it. | ||||||
| # More options you can specify in the roles/netdata/templates/netdata.conf.j2 | ||||||
| # https://learn.netdata.cloud/docs/netdata-agent/configuration | ||||||
|
|
||||||
| ### Metric ### | ||||||
|
Owner
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
| with_metric_exporters: false # or true | ||||||
|
|
||||||
| # Node Exporter | ||||||
| node_exporter_bin_path: /opt/prometheus/node-exporter | ||||||
| node_exporter_host: "" | ||||||
| node_exporter_port: 9100 | ||||||
| node_exporter_options: "" | ||||||
|
|
||||||
| # Postgres Exporter | ||||||
| postgres_exporter_bin_path: /opt/prometheus/postgres-exporter | ||||||
| postgres_exporter_host: "" | ||||||
| postgres_exporter_port: 9187 | ||||||
| postgres_exporter_db: postgres | ||||||
| postgres_exporter_user: postgres | ||||||
| postgres_exporter_data_source_name: "user={{ postgres_exporter_user }} dbname={{ postgres_exporter_db }} host={{ postgresql_unix_socket_dir }} sslmode=disable" | ||||||
| postgres_exporter_query_filenames: | ||||||
| - queries-default.yml | ||||||
| # - queries-pg_stat_statements.yml | ||||||
|
Owner
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
I suggest setting pg_stat_statements by default postgresql_extensions:
- { ext: "pg_stat_statements", db: "postgres" } |
||||||
| # - queries-pg_statio_user_tables.yml | ||||||
|
Comment on lines
+742
to
+743
Owner
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why do we disable these metrics? |
||||||
| postgres_exporter_query_directory: files/ | ||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,10 @@ | ||
| --- | ||
| prometheus_architecture_map: | ||
| amd64: amd64 | ||
| x86_64: amd64 | ||
| armv6l: armhfv6 | ||
| armv7l: armhfv6 | ||
| aarch64: arm64 | ||
| arm64: arm64 | ||
| 32-bit: "386" | ||
| 64-bit: amd64 |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,120 @@ | ||
| --- | ||
| # {{ ansible_managed }} | ||
|
|
||
| ###################################################################### | ||
| # Copied from | ||
| # https://github.com/wrouesnel/postgres_exporter/blob/1afbd62ab194c045a88488d77de2f116400dedb7/queries.yaml | ||
|
Owner
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It looks like the data state of this commit was 8 years ago. Perhaps newer metrics should be used? Latest version 0.17.1 / 2025-02-26
Owner
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. let's update this part, we would not like to add old versions of metrics. |
||
|
|
||
| pg_replication: | ||
| query: >- | ||
| SELECT EXTRACT(EPOCH FROM (now() - pg_last_xact_replay_timestamp()))::INT | ||
| as lag | ||
| metrics: | ||
| - lag: | ||
| usage: "GAUGE" | ||
| description: "Replication lag behind master in seconds" | ||
|
|
||
| pg_postmaster: | ||
| query: >- | ||
| SELECT pg_postmaster_start_time as start_time_seconds from | ||
| pg_postmaster_start_time() | ||
| metrics: | ||
| - start_time_seconds: | ||
| usage: "GAUGE" | ||
| description: "Time at which postmaster started" | ||
|
|
||
| pg_stat_user_tables: | ||
| query: >- | ||
| SELECT schemaname, relname, seq_scan, seq_tup_read, idx_scan, | ||
| idx_tup_fetch, n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, | ||
| n_dead_tup, n_mod_since_analyze, last_vacuum, last_autovacuum, | ||
| last_analyze, last_autoanalyze, vacuum_count, autovacuum_count, | ||
| analyze_count, autoanalyze_count FROM pg_stat_user_tables | ||
| metrics: | ||
| - schemaname: | ||
| usage: "LABEL" | ||
| description: "Name of the schema that this table is in" | ||
| - relname: | ||
| usage: "LABEL" | ||
| description: "Name of this table" | ||
| - seq_scan: | ||
| usage: "COUNTER" | ||
| description: "Number of sequential scans initiated on this table" | ||
| - seq_tup_read: | ||
| usage: "COUNTER" | ||
| description: "Number of live rows fetched by sequential scans" | ||
| - idx_scan: | ||
| usage: "COUNTER" | ||
| description: "Number of index scans initiated on this table" | ||
| - idx_tup_fetch: | ||
| usage: "COUNTER" | ||
| description: "Number of live rows fetched by index scans" | ||
| - n_tup_ins: | ||
| usage: "COUNTER" | ||
| description: "Number of rows inserted" | ||
| - n_tup_upd: | ||
| usage: "COUNTER" | ||
| description: "Number of rows updated" | ||
| - n_tup_del: | ||
| usage: "COUNTER" | ||
| description: "Number of rows deleted" | ||
| - n_tup_hot_upd: | ||
| usage: "COUNTER" | ||
| description: >- | ||
| Number of rows HOT updated (i.e., with no separate index update | ||
| required) | ||
| - n_live_tup: | ||
| usage: "GAUGE" | ||
| description: "Estimated number of live rows" | ||
| - n_dead_tup: | ||
| usage: "GAUGE" | ||
| description: "Estimated number of dead rows" | ||
| - n_mod_since_analyze: | ||
| usage: "GAUGE" | ||
| description: "Estimated number of rows changed since last analyze" | ||
| - last_vacuum: | ||
| usage: "GAUGE" | ||
| description: >- | ||
| Last time at which this table was manually vacuumed (not counting | ||
| VACUUM FULL) | ||
| - last_autovacuum: | ||
| usage: "GAUGE" | ||
| description: >- | ||
| Last time at which this table was vacuumed by the autovacuum daemon | ||
| - last_analyze: | ||
| usage: "GAUGE" | ||
| description: "Last time at which this table was manually analyzed" | ||
| - last_autoanalyze: | ||
| usage: "GAUGE" | ||
| description: >- | ||
| Last time at which this table was analyzed by the autovacuum daemon | ||
| - vacuum_count: | ||
| usage: "COUNTER" | ||
| description: >- | ||
| Number of times this table has been manually vacuumed (not counting | ||
| VACUUM FULL) | ||
| - autovacuum_count: | ||
| usage: "COUNTER" | ||
| description: >- | ||
| Number of times this table has been vacuumed by the autovacuum daemon | ||
| - analyze_count: | ||
| usage: "COUNTER" | ||
| description: "Number of times this table has been manually analyzed" | ||
| - autoanalyze_count: | ||
| usage: "COUNTER" | ||
| description: >- | ||
| Number of times this table has been analyzed by the autovacuum daemon | ||
| pg_database: | ||
| query: >- | ||
| SELECT pg_database.datname, pg_database_size(pg_database.datname) as size | ||
| FROM pg_database | ||
| metrics: | ||
| - datname: | ||
| usage: "LABEL" | ||
| description: "Name of the database" | ||
| - size: | ||
| usage: "GAUGE" | ||
| description: "Disk space used by the database" | ||
|
|
||
| ###################################################################### | ||
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
| @@ -0,0 +1,110 @@ | ||||||
| --- | ||||||
| # {{ ansible_managed }} | ||||||
|
|
||||||
| # This requires the pg_stat_statements module (disabled by default) | ||||||
| # https://www.postgresql.org/docs/9.6/static/pgstatstatements.html | ||||||
|
Owner
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
| pg_stat_statements: | ||||||
| metrics: | ||||||
| - rolname: | ||||||
| description: "Name of the user who executed the statement" | ||||||
| usage: LABEL | ||||||
| - datname: | ||||||
| description: "Name of the database" | ||||||
| usage: "LABEL" | ||||||
| - queryid: | ||||||
| description: Internal hash code, computed from the statement's parse tree | ||||||
| usage: LABEL | ||||||
| - query: | ||||||
| description: Text of a representative statement | ||||||
| usage: LABEL | ||||||
| - calls: | ||||||
| description: Number of times executed | ||||||
| usage: COUNTER | ||||||
| - total_time: | ||||||
| description: Total time spent in the statement, in milliseconds | ||||||
| usage: COUNTER | ||||||
| - min_time: | ||||||
| description: Minimum time spent in the statement, in milliseconds | ||||||
| usage: COUNTER | ||||||
| - max_time: | ||||||
| description: Maximum time spent in the statement, in milliseconds | ||||||
| usage: COUNTER | ||||||
| - mean_time: | ||||||
| description: Mean time spent in the statement, in milliseconds | ||||||
| usage: COUNTER | ||||||
| - stddev_time: | ||||||
| description: >- | ||||||
| Population standard deviation of time spent in the statement, in | ||||||
| milliseconds | ||||||
| usage: COUNTER | ||||||
| - rows: | ||||||
| description: Total number of rows retrieved or affected by the statement | ||||||
| usage: COUNTER | ||||||
| - shared_blks_hit: | ||||||
| description: Total number of shared block cache hits by the statement | ||||||
| usage: COUNTER | ||||||
| - shared_blks_read: | ||||||
| description: Total number of shared blocks read by the statement | ||||||
| usage: COUNTER | ||||||
| - shared_blks_dirtied: | ||||||
| description: Total number of shared blocks dirtied by the statement | ||||||
| usage: COUNTER | ||||||
| - shared_blks_written: | ||||||
| description: Total number of shared blocks written by the statement | ||||||
| usage: COUNTER | ||||||
| - local_blks_hit: | ||||||
| description: Total number of local block cache hits by the statement | ||||||
| usage: COUNTER | ||||||
| - local_blks_read: | ||||||
| description: Total number of local blocks read by the statement | ||||||
| usage: COUNTER | ||||||
| - local_blks_dirtied: | ||||||
| description: Total number of local blocks dirtied by the statement | ||||||
| usage: COUNTER | ||||||
| - local_blks_written: | ||||||
| description: Total number of local blocks written by the statement | ||||||
| usage: COUNTER | ||||||
| - temp_blks_read: | ||||||
| description: Total number of temp blocks read by the statement | ||||||
| usage: COUNTER | ||||||
| - temp_blks_written: | ||||||
| description: Total number of temp blocks written by the statement | ||||||
| usage: COUNTER | ||||||
| - blk_read_time: | ||||||
| description: >- | ||||||
| Total time the statement spent reading blocks, in milliseconds (if | ||||||
| track_io_timing is enabled, otherwise zero) | ||||||
| usage: GAUGE | ||||||
| - blk_write_time: | ||||||
| description: >- | ||||||
| Total time the statement spent writing blocks, in milliseconds (if | ||||||
| track_io_timing is enabled, otherwise zero) | ||||||
| usage: GAUGE | ||||||
| query: >- | ||||||
| SELECT | ||||||
| r.rolname, | ||||||
| d.datname, | ||||||
| queryid, | ||||||
| query, | ||||||
| calls, | ||||||
| total_time, | ||||||
| min_time, | ||||||
| max_time, | ||||||
| mean_time, | ||||||
| stddev_time, | ||||||
| rows, | ||||||
| shared_blks_hit, | ||||||
| shared_blks_read, | ||||||
| shared_blks_dirtied, | ||||||
| shared_blks_written, | ||||||
| local_blks_hit, | ||||||
| local_blks_read, | ||||||
| local_blks_dirtied, | ||||||
| local_blks_written, | ||||||
| temp_blks_read, | ||||||
| temp_blks_written, | ||||||
| blk_read_time, | ||||||
| blk_write_time | ||||||
| FROM pg_stat_statements s | ||||||
| LEFT JOIN pg_roles r on (s.userid = r.oid) | ||||||
| LEFT JOIN pg_database d on (s.dbid = d.oid) | ||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,43 @@ | ||
| --- | ||
| # {{ ansible_managed }} | ||
|
|
||
| # These stats may not be available | ||
| pg_statio_user_tables: | ||
| metrics: | ||
| - schemaname: | ||
| description: Name of the schema that this table is in | ||
| usage: LABEL | ||
| - relname: | ||
| description: Name of this table | ||
| usage: LABEL | ||
| - heap_blks_read: | ||
| description: Number of disk blocks read from this table | ||
| usage: COUNTER | ||
| - heap_blks_hit: | ||
| description: Number of buffer hits in this table | ||
| usage: COUNTER | ||
| - idx_blks_read: | ||
| description: Number of disk blocks read from all indexes on this table | ||
| usage: COUNTER | ||
| - idx_blks_hit: | ||
| description: Number of buffer hits in all indexes on this table | ||
| usage: COUNTER | ||
| - toast_blks_read: | ||
| description: >- | ||
| Number of disk blocks read from this table's TOAST table (if any) | ||
| usage: COUNTER | ||
| - toast_blks_hit: | ||
| description: Number of buffer hits in this table's TOAST table (if any) | ||
| usage: COUNTER | ||
| - tidx_blks_read: | ||
| description: >- | ||
| Number of disk blocks read from this table's TOAST table index (if any) | ||
| usage: COUNTER | ||
| - tidx_blks_hit: | ||
| description: >- | ||
| Number of buffer hits in this table's TOAST table index (if any) | ||
| usage: COUNTER | ||
| query: >- | ||
| SELECT schemaname, relname, heap_blks_read, heap_blks_hit, idx_blks_read, | ||
| idx_blks_hit, toast_blks_read, toast_blks_hit, tidx_blks_read, | ||
| tidx_blks_hit FROM pg_statio_user_tables |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,33 @@ | ||
| --- | ||
| - name: Restart postgres_exporter | ||
| ansible.builtin.systemd: | ||
| daemon_reload: true | ||
| name: prometheus-postgres-exporter | ||
| state: restarted | ||
| listen: "restart postgres_exporter" | ||
|
|
||
| - name: Verify postgres_exporter is responding to requests | ||
| ansible.builtin.uri: | ||
| url: "http://{% if postgres_exporter_host != '' %}{{ postgres_exporter_host }}{% else %}localhost{% endif %}:{{ postgres_exporter_port }}/" | ||
| return_content: true | ||
| retries: 5 | ||
| delay: 3 | ||
| register: metrics_output | ||
| failed_when: "'Metrics' not in metrics_output.content" | ||
| listen: "restart postgres_exporter" | ||
|
|
||
| - name: Restart node_exporter | ||
| ansible.builtin.service: | ||
| name: prometheus-node-exporter | ||
| state: restarted | ||
| listen: "restart node_exporter" | ||
|
|
||
| - name: Verify node_exporter is responding to requests | ||
| ansible.builtin.uri: | ||
| url: "http://{% if node_exporter_host != '' %}{{ node_exporter_host }}{% else %}localhost{% endif %}:{{ node_exporter_port }}/" | ||
| return_content: true | ||
| retries: 5 | ||
| delay: 3 | ||
| register: metrics_output | ||
| failed_when: "'Metrics' not in metrics_output.content" | ||
| listen: "restart node_exporter" |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,6 @@ | ||
| --- | ||
| - name: Install Prometheus Node Exporter | ||
| ansible.builtin.import_tasks: node_exporter.yml | ||
|
|
||
| - name: Install Prometheus Postgres Exporter | ||
| ansible.builtin.import_tasks: postgres_exporter.yml |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
perhaps it is better to rename with_metric_exporters to
prometheus_metricsorprometheus_exporters.