Skip to content

Commit 49df3c6

Browse files
authored
Improve Patroni leader health check logic (#1306)
Updated the health check to fail only if no healthy Patroni leader node is detected, using a more robust condition. The error message now provides clearer instructions for troubleshooting cluster status and REST API connectivity.
1 parent 8eac936 commit 49df3c6

File tree

3 files changed

+43
-15
lines changed

3 files changed

+43
-15
lines changed

automation/playbooks/config_pgcluster.yml

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -66,14 +66,19 @@
6666
no_proxy: "{{ patroni_bind_address | default(bind_address, true) }}"
6767
when: inventory_hostname in groups['postgres_cluster']
6868

69-
# Stop, if Patroni is unavailable
69+
# Stop, if no healthy Patroni leader node
7070
- name: The Patroni cluster is unhealthy
7171
ansible.builtin.fail:
72-
msg: "Patroni is unavailable on {{ ansible_hostname }}. Please check the cluster status."
73-
changed_when: false
72+
msg: >
73+
No healthy leader node detected in cluster '{{ patroni_cluster_name | default('postgres-cluster') }}'.
74+
Please ensure the cluster is up and node responds on REST API port ({{ patroni_restapi_port | default('8008') }}).
7475
when:
75-
- inventory_hostname in groups['postgres_cluster']
76-
- (patroni_leader_result is undefined or patroni_leader_result.status == -1)
76+
- inventory_hostname == groups['postgres_cluster'][0]
77+
- (hostvars
78+
| dict2items
79+
| selectattr('value.patroni_leader_result.status', 'defined')
80+
| selectattr('value.patroni_leader_result.status', 'equalto', 200)
81+
| list | length) == 0
7782

7883
roles:
7984
- role: vitabaks.autobase.pre_checks
@@ -98,7 +103,7 @@
98103
name: "{{ item }}"
99104
groups: secondary
100105
postgresql_exists: true
101-
when: hostvars[item]['patroni_leader_result']['status'] != 200
106+
when: hostvars[item]['patroni_leader_result']['status'] == 503
102107
loop: "{{ groups['postgres_cluster'] }}"
103108
changed_when: false
104109
check_mode: false
@@ -108,7 +113,7 @@
108113
msg:
109114
- "Cluster Name: {{ patroni_cluster_name | default('postgres-cluster') }}"
110115
- "Cluster Leader: {{ ansible_hostname }}"
111-
when: inventory_hostname in groups['primary']
116+
when: inventory_hostname in groups['primary'] | default([])
112117

113118
# if 'cloud_provider' is 'aws', 'gcp', or 'azure'
114119
# set_fact: 'pgbackrest_install' to configure Postgres backups (TODO: Add the ability to configure backups in the UI)

automation/playbooks/pg_upgrade.yml

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -30,15 +30,23 @@
3030
register: patroni_leader_result
3131
changed_when: false
3232
failed_when: false
33+
check_mode: false
3334
environment:
3435
no_proxy: "{{ patroni_bind_address | default(bind_address, true) }}"
3536

36-
# Stop, if Patroni is unavailable
37+
# Stop, if no healthy Patroni leader node
3738
- name: The Patroni cluster is unhealthy
3839
ansible.builtin.fail:
39-
msg: "Patroni is unavailable on {{ ansible_hostname }}. Please check the cluster status."
40-
changed_when: false
41-
when: patroni_leader_result is undefined or patroni_leader_result.status == -1
40+
msg: >
41+
No healthy leader node detected in cluster '{{ patroni_cluster_name | default('postgres-cluster') }}'.
42+
Please ensure the cluster is up and node responds on REST API port ({{ patroni_restapi_port | default('8008') }}).
43+
when:
44+
- inventory_hostname == groups['postgres_cluster'][0]
45+
- (hostvars
46+
| dict2items
47+
| selectattr('value.patroni_leader_result.status', 'defined')
48+
| selectattr('value.patroni_leader_result.status', 'equalto', 200)
49+
| list | length) == 0
4250

4351
- name: '[Prepare] Add host to group "primary" (in-memory inventory)'
4452
ansible.builtin.add_host:
@@ -52,7 +60,7 @@
5260
ansible.builtin.add_host:
5361
name: "{{ item }}"
5462
groups: secondary
55-
when: hostvars[item]['patroni_leader_result']['status'] != 200
63+
when: hostvars[item]['patroni_leader_result']['status'] == 503
5664
loop: "{{ groups['postgres_cluster'] }}"
5765
changed_when: false
5866

@@ -61,7 +69,7 @@
6169
msg:
6270
- "Cluster Name: {{ patroni_cluster_name | default('postgres-cluster') }}"
6371
- "Cluster Leader: {{ ansible_hostname }}"
64-
when: inventory_hostname in groups['primary']
72+
when: inventory_hostname in groups['primary'] | default([])
6573
tags:
6674
- always
6775

automation/playbooks/update_pgcluster.yml

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,21 @@
3838
no_proxy: "{{ patroni_bind_address | default(bind_address, true) }}"
3939
tags: always
4040

41+
# Stop, if no healthy Patroni leader node
42+
- name: The Patroni cluster is unhealthy
43+
ansible.builtin.fail:
44+
msg: >
45+
No healthy leader node detected in cluster '{{ patroni_cluster_name | default('postgres-cluster') }}'.
46+
Please ensure the cluster is up and node responds on REST API port ({{ patroni_restapi_port | default('8008') }}).
47+
when:
48+
- inventory_hostname == groups['postgres_cluster'][0]
49+
- (hostvars
50+
| dict2items
51+
| selectattr('value.patroni_leader_result.status', 'defined')
52+
| selectattr('value.patroni_leader_result.status', 'equalto', 200)
53+
| list | length) == 0
54+
tags: always
55+
4156
- name: '[Prepare] Add host to group "primary" (in-memory inventory)'
4257
ansible.builtin.add_host:
4358
name: "{{ item }}"
@@ -51,7 +66,7 @@
5166
ansible.builtin.add_host:
5267
name: "{{ item }}"
5368
groups: secondary
54-
when: hostvars[item]['patroni_leader_result']['status'] != 200
69+
when: hostvars[item]['patroni_leader_result']['status'] == 503
5570
loop: "{{ groups['postgres_cluster'] }}"
5671
changed_when: false
5772
tags: always
@@ -61,7 +76,7 @@
6176
msg:
6277
- "Cluster Name: {{ patroni_cluster_name | default('postgres-cluster') }}"
6378
- "Cluster Leader: {{ ansible_hostname }}"
64-
when: inventory_hostname in groups['primary']
79+
when: inventory_hostname in groups['primary'] | default([])
6580
tags: always
6681

6782
- name: "(1/4) PRE-UPDATE: Perform pre-update tasks"

0 commit comments

Comments
 (0)