Skip to content

Commit dcba6d4

Browse files
pat-svitabaks
andauthored
feat: add remove_node playbook (#1207)
Co-authored-by: Vitaliy Kukharik <[email protected]>
1 parent 9634eb5 commit dcba6d4

File tree

10 files changed

+459
-22
lines changed

10 files changed

+459
-22
lines changed

automation/playbooks/remove_cluster.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
] | reject('equalto', '') | list
2525
}}
2626
vars:
27-
default_postgresql_version: 17
27+
default_postgresql_version: "{{ postgresql_version | default('17') }}"
2828
default_postgresql_home_dir: "{{ '/var/lib/postgresql' if ansible_os_family == 'Debian' else '/var/lib/pgsql' }}"
2929
default_postgresql_cluster_name: "{{ 'main' if ansible_os_family == 'Debian' else 'data' }}"
3030
default_postgresql_data_dir: "\
Lines changed: 346 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,346 @@
1+
---
2+
- name: vitabaks.autobase.remove_node | Remove node from PostgreSQL cluster
3+
hosts: postgres_cluster
4+
become: true
5+
gather_facts: true
6+
vars:
7+
target_node: "{{ node_to_remove | default('') }}"
8+
9+
pre_tasks:
10+
- block:
11+
- name: Define bind_address
12+
ansible.builtin.include_role:
13+
name: vitabaks.autobase.bind_address
14+
15+
- name: Validate that node_to_remove is specified
16+
run_once: true # noqa run-once
17+
ansible.builtin.fail:
18+
msg: >-
19+
Please specify the node_to_remove variable with the node
20+
to remove from the cluster.
21+
when: target_node | length == 0
22+
23+
- name: Fetch Patroni cluster members before removal
24+
run_once: true # noqa run-once
25+
ansible.builtin.command: >-
26+
patronictl -c {{ patroni_config_file | default('/etc/patroni/patroni.yml') }} list
27+
register: patronictl_list_before
28+
changed_when: false
29+
environment:
30+
PATH: "{{ ansible_env.PATH }}:/usr/bin:/usr/local/bin"
31+
when: inventory_hostname != target_node
32+
33+
- name: Show Patroni cluster members before removal
34+
run_once: true # noqa run-once
35+
ansible.builtin.debug:
36+
msg: "{{ patronictl_list_before.stdout_lines }}"
37+
when: inventory_hostname != target_node
38+
39+
- name: Fail if trying to remove primary node
40+
run_once: true # noqa run-once
41+
ansible.builtin.fail:
42+
msg: >-
43+
Cannot remove primary node '{{ target_node }}'. Please perform
44+
switchover first or specify a replica node.
45+
when: patronictl_list_before.stdout is search(target_node + '.*Leader')
46+
tags: always
47+
48+
tasks:
49+
- block:
50+
- name: Stop and disable patroni service on target node
51+
ansible.builtin.service:
52+
name: patroni
53+
state: stopped
54+
enabled: false
55+
56+
- name: Delete PostgreSQL content on target node
57+
ansible.builtin.file:
58+
path: "{{ item }}"
59+
state: absent
60+
loop: >-
61+
{{
62+
[
63+
postgresql_data_dir | default(default_postgresql_data_dir),
64+
postgresql_conf_dir | default(default_postgresql_conf_dir),
65+
postgresql_wal_dir | default('')
66+
] | reject('equalto', '') | list
67+
}}
68+
vars:
69+
default_postgresql_version: "{{ postgresql_version | default('17') }}"
70+
default_postgresql_home_dir: "{{ '/var/lib/postgresql' if ansible_os_family == 'Debian' else '/var/lib/pgsql' }}"
71+
default_postgresql_cluster_name: "{{ 'main' if ansible_os_family == 'Debian' else 'data' }}"
72+
default_postgresql_data_dir: "\
73+
{% if cloud_provider | default('') | length > 0 %}\
74+
{{ pg_data_mount_path | default('/pgdata') }}/{{ default_postgresql_version }}/{{ default_postgresql_cluster_name }}\
75+
{% else %}\
76+
{{ default_postgresql_home_dir }}/{{ default_postgresql_version }}/{{ default_postgresql_cluster_name }}\
77+
{% endif %}"
78+
default_postgresql_conf_dir: "\
79+
{% if ansible_os_family == 'Debian' %}\
80+
/etc/postgresql/{{ default_postgresql_version }}/{{ default_postgresql_cluster_name }}\
81+
{% else %}\
82+
{{ default_postgresql_data_dir }}\
83+
{% endif %}"
84+
when: remove_postgres_data | default(true) | bool
85+
when: inventory_hostname == target_node
86+
tags: postgres, postgresql
87+
88+
- block:
89+
- name: Read postgres public SSH key from target node
90+
ansible.builtin.slurp:
91+
src: "~postgres/.ssh/id_rsa.pub"
92+
delegate_to: "{{ target_node }}"
93+
register: target_node_pubkey
94+
changed_when: false
95+
when: target_node in (groups['postgres_cluster'] | default([]))
96+
97+
- name: Remove target node pubkey from authorized_keys
98+
ansible.posix.authorized_key:
99+
user: postgres
100+
state: absent
101+
key: "{{ target_node_pubkey.content | default('') | b64decode }}"
102+
when: target_node_pubkey.content | default('') | b64decode | length > 0
103+
104+
- name: Remove known_hosts entries for target node
105+
become: true
106+
become_user: postgres
107+
ansible.builtin.known_hosts:
108+
state: absent
109+
path: "~postgres/.ssh/known_hosts"
110+
name: "{{ item }}"
111+
loop: >-
112+
{{ target_node_hostvars if target_node in (groups['postgres_cluster'] | default([])) else [target_node] }}
113+
vars:
114+
target_node_hostvars: >-
115+
{{
116+
[
117+
hostvars[target_node].get('bind_address',''),
118+
hostvars[target_node].get('ansible_hostname',''),
119+
target_node
120+
] | reject('equalto','') | unique | list
121+
}}
122+
ignore_errors: true
123+
when: inventory_hostname != target_node
124+
tags: postgres, postgresql, ssh_keys
125+
126+
- name: vitabaks.autobase.remove_node | Remove node from etcd cluster
127+
hosts: etcd_cluster
128+
become: true
129+
gather_facts: false
130+
vars:
131+
target_node: "{{ node_to_remove | default('') }}"
132+
tasks:
133+
- block:
134+
- name: Fetch etcd cluster members before removal
135+
run_once: true # noqa run-once
136+
ansible.builtin.command: >-
137+
/usr/local/bin/etcdctl member list
138+
--write-out=table
139+
{% if etcd_tls_enable | default(true) | bool %}
140+
--cacert={{ etcd_tls_dir | default('/etc/etcd/tls') }}/{{ etcd_tls_ca_crt | default('ca.crt') }}
141+
--cert={{ etcd_tls_dir | default('/etc/etcd/tls') }}/{{ etcd_tls_server_crt | default('server.crt') }}
142+
--key={{ etcd_tls_dir | default('/etc/etcd/tls') }}/{{ etcd_tls_server_key | default('server.key') }}
143+
{% endif %}
144+
environment:
145+
ETCDCTL_API: "3"
146+
register: etcd_members_list_before
147+
changed_when: false
148+
when: inventory_hostname != target_node
149+
150+
- name: Show etcd cluster members before removal
151+
run_once: true # noqa run-once
152+
ansible.builtin.debug:
153+
msg: "{{ etcd_members_list_before.stdout_lines }}"
154+
when: inventory_hostname != target_node
155+
156+
- name: Remove target node from etcd cluster
157+
ansible.builtin.include_role:
158+
name: vitabaks.autobase.etcd
159+
tasks_from: member_remove
160+
161+
- name: Fetch etcd cluster members after removal
162+
run_once: true # noqa run-once
163+
ansible.builtin.command: >-
164+
/usr/local/bin/etcdctl member list
165+
--write-out=table
166+
{% if etcd_tls_enable | default(true) | bool %}
167+
--cacert={{ etcd_tls_dir | default('/etc/etcd/tls') }}/{{ etcd_tls_ca_crt | default('ca.crt') }}
168+
--cert={{ etcd_tls_dir | default('/etc/etcd/tls') }}/{{ etcd_tls_server_crt | default('server.crt') }}
169+
--key={{ etcd_tls_dir | default('/etc/etcd/tls') }}/{{ etcd_tls_server_key | default('server.key') }}
170+
{% endif %}
171+
environment:
172+
ETCDCTL_API: "3"
173+
changed_when: false
174+
register: etcd_members_list_after
175+
until: etcd_members_list_after.rc == 0
176+
retries: 3
177+
delay: 5
178+
when: inventory_hostname != target_node
179+
180+
- name: Show etcd cluster members after removal
181+
run_once: true # noqa run-once
182+
ansible.builtin.debug:
183+
msg: "{{ etcd_members_list_after.stdout_lines }}"
184+
when: inventory_hostname != target_node
185+
when: dcs_type | default('etcd') == 'etcd'
186+
tags: etcd
187+
188+
- name: vitabaks.autobase.remove_node | Remove node from Consul cluster
189+
hosts: consul_instances
190+
become: true
191+
gather_facts: true
192+
vars:
193+
target_node: "{{ node_to_remove | default('') }}"
194+
tasks:
195+
- block:
196+
- name: Fetch consul cluster members before removal
197+
run_once: true # noqa run-once
198+
ansible.builtin.command: >-
199+
consul operator raft list-peers \
200+
-http-addr=https://127.0.0.1:8500 \
201+
-ca-file=/etc/consul/tls/ca.crt
202+
changed_when: false
203+
register: consul_members_list_before
204+
until: consul_members_list_before.rc == 0
205+
retries: 3
206+
delay: 5
207+
when: inventory_hostname != target_node
208+
209+
- name: Show consul cluster members before removal
210+
run_once: true # noqa run-once
211+
ansible.builtin.debug:
212+
msg: "{{ consul_members_list_before.stdout_lines }}"
213+
when: inventory_hostname != target_node
214+
215+
- name: No target node in Consul cluster, skipping removal
216+
run_once: true # noqa run-once
217+
ansible.builtin.debug:
218+
msg: >-
219+
Target node '{{ hostvars[target_node].ansible_hostname | default(target_node) }}' not found in Consul cluster members,
220+
skipping removal.
221+
when:
222+
- inventory_hostname != target_node
223+
- consul_members_list_before.stdout | default('') is not search(hostvars[target_node].ansible_hostname | default(target_node))
224+
225+
- name: Force-leave target node from consul cluster
226+
run_once: true # noqa run-once
227+
ansible.builtin.command: >-
228+
consul force-leave \
229+
-http-addr=https://127.0.0.1:8500 \
230+
-ca-file=/etc/consul/tls/ca.crt \
231+
{{ hostvars[target_node].ansible_hostname | default(target_node) }}
232+
when:
233+
- inventory_hostname != target_node
234+
- consul_members_list_before.stdout | default('') is search(hostvars[target_node].ansible_hostname | default(target_node))
235+
236+
- name: Extract target node Consul Raft ID
237+
run_once: true # noqa run-once
238+
ansible.builtin.set_fact:
239+
target_raft_id: >-
240+
{{ target_line | regex_replace('^\S+\s+([0-9a-f-]{36}).*', '\1') if target_line | length > 0 else '' }}
241+
vars:
242+
target_line: >-
243+
{{
244+
(consul_members_list_before.stdout_lines
245+
| select('match', '^' ~ (hostvars[target_node].ansible_hostname | default(target_node)) ~ '\s+')
246+
| list | first) | default('')
247+
}}
248+
when:
249+
- inventory_hostname != target_node
250+
- hostvars[target_node].consul_node_role | default('client') == 'server'
251+
- consul_members_list_before.stdout | default('') is search(hostvars[target_node].ansible_hostname | default(target_node))
252+
253+
- name: Remove target node from the Raft configuration
254+
run_once: true # noqa run-once
255+
ansible.builtin.command: >-
256+
consul operator raft remove-peer -id="{{ target_raft_id }}" \
257+
-http-addr=https://127.0.0.1:8500 \
258+
-ca-file=/etc/consul/tls/ca.crt \
259+
-client-cert=/etc/consul/tls/server.crt \
260+
-client-key=/etc/consul/tls/server.key
261+
register: raft_remove_result
262+
until: raft_remove_result.rc == 0
263+
retries: 3
264+
delay: 2
265+
when:
266+
- inventory_hostname != target_node
267+
- hostvars[target_node].consul_node_role | default('client') == 'server'
268+
- target_raft_id | default('') | length > 0
269+
270+
- name: Stop and disable consul service on target node
271+
ansible.builtin.service:
272+
name: consul
273+
state: stopped
274+
enabled: false
275+
when: inventory_hostname == target_node
276+
277+
- name: Delete consul content on target node
278+
ansible.builtin.file:
279+
path: "{{ item }}"
280+
state: absent
281+
loop:
282+
- "{{ consul_data_path | default('/var/lib/consul') }}"
283+
- "{{ consul_config_path | default('/etc/consul') }}"
284+
when:
285+
- inventory_hostname == target_node
286+
- remove_consul_data | default(true) | bool
287+
288+
- name: Fetch consul cluster members after removal
289+
run_once: true # noqa run-once
290+
ansible.builtin.command: >-
291+
consul operator raft list-peers \
292+
-http-addr=https://127.0.0.1:8500 \
293+
-ca-file=/etc/consul/tls/ca.crt
294+
changed_when: false
295+
register: consul_members_list_after
296+
until: consul_members_list_after.rc == 0
297+
retries: 3
298+
delay: 5
299+
when:
300+
- inventory_hostname != target_node
301+
- consul_members_list_before.stdout | default('') is search(hostvars[target_node].ansible_hostname | default(target_node))
302+
303+
- name: Show consul cluster members after removal
304+
run_once: true # noqa run-once
305+
ansible.builtin.debug:
306+
msg: "{{ consul_members_list_after.stdout_lines }}"
307+
when:
308+
- inventory_hostname != target_node
309+
- consul_members_list_after.stdout_lines is defined
310+
when: dcs_type | default('etcd') == 'consul'
311+
tags: consul
312+
313+
- name: vitabaks.autobase.remove_node | Finalizing
314+
hosts: postgres_cluster
315+
become: true
316+
gather_facts: true
317+
vars:
318+
target_node: "{{ node_to_remove | default('') }}"
319+
tasks:
320+
- name: Update patroni config file
321+
ansible.builtin.include_role:
322+
name: vitabaks.autobase.patroni
323+
tasks_from: patroni # update etcd hosts
324+
vars:
325+
etcd_hosts: "{{ groups['etcd_cluster'] | default([]) | difference([target_node]) }}"
326+
when:
327+
- dcs_type | default('etcd') == 'etcd'
328+
- inventory_hostname != target_node
329+
tags: postgres, postgresql, etcd
330+
331+
- block:
332+
- name: Fetch Patroni cluster members
333+
run_once: true # noqa run-once
334+
ansible.builtin.command: >-
335+
patronictl -c {{ patroni_config_file | default('/etc/patroni/patroni.yml') }} list
336+
register: patronictl_list_after
337+
changed_when: false
338+
environment:
339+
PATH: "{{ ansible_env.PATH }}:/usr/bin:/usr/local/bin"
340+
341+
- name: Show Patroni cluster members
342+
run_once: true # noqa run-once
343+
ansible.builtin.debug:
344+
msg: "{{ patronictl_list_after.stdout_lines }}"
345+
when: inventory_hostname != target_node
346+
tags: postgres, postgresql

automation/roles/common/defaults/main.yml

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,7 @@ patroni_etcd_hosts: [] # list of servers of an existing etcd cluster
140140
# - { host: "10.128.64.140", port: "2379" }
141141
# - { host: "10.128.64.142", port: "2379" }
142142
# - { host: "10.128.64.143", port: "2379" }
143+
143144
patroni_etcd_namespace: "service" # (optional) etcd namespace (prefix)
144145
patroni_etcd_username: "" # (optional) username for etcd authentication
145146
patroni_etcd_password: "" # (optional) password for etcd authentication
@@ -800,9 +801,9 @@ netdata_conf:
800801
############################################################
801802

802803
# DNS servers (/etc/resolv.conf)
803-
nameservers: []
804-
# - "8.8.8.8" # example (Google Public DNS)
805-
# - "9.9.9.9" # (Quad9 Public DNS)
804+
nameservers:
805+
- "1.1.1.1" # Cloudflare DNS (primary)
806+
- "8.8.8.8" # Google DNS (secondary)
806807

807808
# /etc/hosts (optional)
808809
etc_hosts: []

automation/roles/etcd/tasks/member_add.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
---
2-
# Add new node to existing etcd cluster
2+
# Add new node to existing etcd cluster (add_node.yml)
33
- block:
44
- name: Build etcd endpoints for existing cluster members
55
ansible.builtin.set_fact:

0 commit comments

Comments
 (0)