Skip to content

Commit c040524

Browse files
authored
Add cluster scaling support for cloud providers (#1267)
1 parent 312b8f7 commit c040524

File tree

10 files changed

+349
-117
lines changed

10 files changed

+349
-117
lines changed

automation/playbooks/add_node.yml

Lines changed: 36 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,29 @@
11
---
22
- name: vitabaks.autobase.add_node | PostgreSQL HA Cluster Scaling (add a new node)
3+
hosts: localhost
4+
gather_facts: true
5+
any_errors_fatal: true
6+
pre_tasks:
7+
# set_fact: 'pgbackrest_install' to configure Postgres backups (TODO: Add the ability to configure backups in the UI)
8+
# Note: Applicable only for "aws", "gcp", "azure", because:
9+
# "digitalocean" - requires the Spaces access keys ("AWS_ACCESS_KEY_ID" and "AWS_SECRET_ACCESS_KEY" variables)
10+
# "hetzner" - requires the S3 credentials ("hetzner_object_storage_access_key" and "hetzner_object_storage_secret_key" variables).
11+
- name: "Set variable: 'pgbackrest_install' to configure Postgres backups"
12+
ansible.builtin.set_fact:
13+
pgbackrest_install: true
14+
when:
15+
- not (pgbackrest_install | default(false) | bool or wal_g_install | default(false) | bool)
16+
- cloud_provider | default('') | lower in ['aws', 'gcp', 'azure']
17+
- pgbackrest_auto_conf | default(true) | bool # to be able to disable auto backup settings
18+
tags: always
19+
roles:
20+
- role: vitabaks.autobase.cloud_resources
21+
when: cloud_provider | default('') | length > 0
22+
vars:
23+
cluster_scaling: true
24+
tags: always
25+
26+
- name: vitabaks.autobase.add_node | Prepare and perform pre-checks
327
hosts: postgres_cluster:etcd_cluster:consul_instances:balancers
428
become: true
529
become_method: sudo
@@ -265,22 +289,21 @@
265289
- role: vitabaks.autobase.pgbackrest
266290
when: pgbackrest_install | bool
267291

268-
- name: vitabaks.autobase.add_node | Add new PostgreSQL replica to the cluster
269-
hosts: new_replica
292+
- name: vitabaks.autobase.add_node | Regenerate Postgres TLS certificates
293+
hosts: postgres_cluster
270294
become: true
271295
become_method: sudo
272296
gather_facts: true
273297
any_errors_fatal: true
274298
vars:
275299
cluster_scaling: true
276-
277-
pre_tasks:
300+
tasks:
278301
- name: Generate Postgres TLS certificate
279302
ansible.builtin.include_role:
280303
name: vitabaks.autobase.tls_certificate
281304
vars:
282305
tls_group_name: "postgres_cluster"
283-
tls_cert_regenerate: true # Regenerate when adding nodes
306+
tls_cert_regenerate: true
284307
when: tls_cert_generate | default(true) | bool
285308

286309
- name: Copy Postgres TLS certificate, key and CA to all nodes
@@ -289,6 +312,14 @@
289312
tasks_from: copy
290313
when: tls_cert_generate | default(true) | bool
291314

315+
- name: vitabaks.autobase.add_node | Add new PostgreSQL replica to the cluster
316+
hosts: new_replica
317+
become: true
318+
become_method: sudo
319+
gather_facts: true
320+
any_errors_fatal: true
321+
vars:
322+
cluster_scaling: true
292323
roles:
293324
- role: vitabaks.autobase.wal_g
294325
when: wal_g_install | default(false) | bool

automation/requirements.yml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,17 @@
11
---
22
collections:
33
- name: amazon.aws
4-
version: ">=10.1.0"
4+
version: ">=10.1.1"
55
- name: community.aws
66
version: ">=10.0.0"
77
- name: google.cloud
8-
version: ">=1.7.0"
8+
version: ">=1.8.0"
99
- name: azure.azcollection
10-
version: ">=3.7.0"
10+
version: ">=3.8.0"
1111
- name: community.digitalocean
1212
version: ">=1.27.0"
1313
- name: hetzner.hcloud
14-
version: ">=5.1.0"
14+
version: ">=5.2.0"
1515
- name: community.postgresql
1616
version: ">=3.14.2"
1717
- name: community.docker

automation/roles/cloud_resources/tasks/aws.yml

Lines changed: 98 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -240,42 +240,59 @@
240240
ip_address_type: "{{ 'public_ip_address' if server_public_ip | bool else 'private_ip_address' }}"
241241

242242
# Server and volume
243-
- name: "AWS: Create or modify EC2 instance"
244-
amazon.aws.ec2_instance:
245-
access_key: "{{ lookup('ansible.builtin.env', 'AWS_ACCESS_KEY_ID') }}"
246-
secret_key: "{{ lookup('ansible.builtin.env', 'AWS_SECRET_ACCESS_KEY') }}"
247-
name: "{{ server_name | lower }}{{ '%02d' % (idx + 1) }}"
248-
state: present
249-
instance_type: "{{ server_type }}"
250-
image_id: "{{ server_image }}"
251-
key_name: "{{ ssh_key_name }}"
252-
region: "{{ server_location }}"
253-
network_interfaces:
254-
- subnet_id: "{{ server_network }}"
255-
groups: "{{ ([] if not cloud_firewall | bool else [ec2_security_group_result.group_id]) }}"
256-
assign_public_ip: "{{ server_public_ip | bool }}"
257-
delete_on_termination: true
258-
volumes:
259-
- device_name: /dev/sda1
260-
ebs:
261-
volume_type: "{{ system_volume_type | default('gp3', true) }}"
262-
volume_size: "{{ system_volume_size | default(80) | int }}"
263-
delete_on_termination: true
264-
- device_name: /dev/sdb
265-
ebs:
266-
volume_type: "{{ volume_type | default('gp3', true) }}"
267-
volume_size: "{{ volume_size | int }}"
268-
delete_on_termination: true
269-
loop: "{{ range(0, server_count | int) | list }}"
270-
loop_control:
271-
index_var: idx
272-
label: "{{ server_name | lower }}{{ '%02d' % (idx + 1) }}"
273-
register: server_result
274-
until:
275-
- server_result.instances[0][ip_address_type] is defined
276-
- server_result.instances[0][ip_address_type] | length > 0
277-
retries: 3
278-
delay: 10
243+
- block:
244+
- name: "AWS: Gather information about EC2 instances"
245+
amazon.aws.ec2_instance_info:
246+
access_key: "{{ lookup('ansible.builtin.env', 'AWS_ACCESS_KEY_ID') }}"
247+
secret_key: "{{ lookup('ansible.builtin.env', 'AWS_SECRET_ACCESS_KEY') }}"
248+
region: "{{ server_location }}"
249+
filters:
250+
instance-type: "{{ server_type }}"
251+
instance-state-name: ["pending", "running", "shutting-down", "stopping", "stopped"]
252+
"tag:Name": "{{ server_name | lower }}{{ '%02d' % (idx + 1) }}"
253+
loop: "{{ range(0, server_count | int) | list }}"
254+
loop_control:
255+
index_var: idx
256+
label: "{{ server_name | lower }}{{ '%02d' % (idx + 1) }}"
257+
register: ec2_instance_info
258+
259+
- name: "AWS: Create EC2 instance"
260+
amazon.aws.ec2_instance:
261+
access_key: "{{ lookup('ansible.builtin.env', 'AWS_ACCESS_KEY_ID') }}"
262+
secret_key: "{{ lookup('ansible.builtin.env', 'AWS_SECRET_ACCESS_KEY') }}"
263+
name: "{{ server_name | lower }}{{ '%02d' % (idx + 1) }}"
264+
state: running
265+
instance_type: "{{ server_type }}"
266+
image_id: "{{ server_image }}"
267+
key_name: "{{ ssh_key_name }}"
268+
region: "{{ server_location }}"
269+
network_interfaces:
270+
- subnet_id: "{{ server_network }}"
271+
groups: "{{ ([] if not cloud_firewall | bool else [ec2_security_group_result.group_id]) }}"
272+
assign_public_ip: "{{ server_public_ip | bool }}"
273+
delete_on_termination: true
274+
volumes:
275+
- device_name: /dev/sda1
276+
ebs:
277+
volume_type: "{{ system_volume_type | default('gp3', true) }}"
278+
volume_size: "{{ system_volume_size | default(80) | int }}"
279+
delete_on_termination: true
280+
- device_name: /dev/sdb
281+
ebs:
282+
volume_type: "{{ volume_type | default('gp3', true) }}"
283+
volume_size: "{{ volume_size | int }}"
284+
delete_on_termination: true
285+
tags:
286+
Name: "{{ server_name | lower }}{{ '%02d' % (idx + 1) }}"
287+
Cluster: "{{ patroni_cluster_name }}"
288+
loop: "{{ range(0, server_count | int) | list }}"
289+
loop_control:
290+
index_var: idx
291+
label: "{{ server_name | lower }}{{ '%02d' % (idx + 1) }}"
292+
register: ec2_instance_result
293+
retries: 3
294+
delay: 10
295+
when: ec2_instance_info.results[idx].instances | length == 0 # only if instance does not exist
279296
when: not server_spot | default(aws_ec2_spot_instance | default(false)) | bool
280297

281298
# Spot instance (if 'server_spot' is 'true')
@@ -288,7 +305,6 @@
288305
filters:
289306
instance-lifecycle: "spot"
290307
instance-type: "{{ server_type }}"
291-
image-id: "{{ server_image }}"
292308
instance-state-name: ["pending", "running", "shutting-down", "stopping", "stopped"]
293309
"tag:Name": "{{ server_name | lower }}{{ '%02d' % (idx + 1) }}"
294310
loop: "{{ range(0, server_count | int) | list }}"
@@ -327,6 +343,7 @@
327343
delete_on_termination: true
328344
tags:
329345
Name: "{{ server_name | lower }}{{ '%02d' % (idx + 1) }}"
346+
Cluster: "{{ patroni_cluster_name }}"
330347
loop: "{{ ec2_spot_instance_info.results }}"
331348
loop_control:
332349
index_var: idx
@@ -353,20 +370,25 @@
353370
retries: 3
354371
delay: 10
355372
when: item.spot_request.spot_instance_request_id is defined
356-
357-
# if spot instances are created now
358-
- name: "Set variable: server_result"
359-
ansible.builtin.set_fact:
360-
server_result: "{{ ec2_spot_instance_result }}"
361-
when: ec2_spot_instance_result.changed | default(false)
362-
363-
# if spot instances have already been created
364-
- name: "Set variable: server_result"
365-
ansible.builtin.set_fact:
366-
server_result: "{{ ec2_spot_instance_info }}"
367-
when: not ec2_spot_instance_result.changed | default(false)
368373
when: server_spot | default(aws_ec2_spot_instance | default(false)) | bool
369374

375+
# Combine existing and newly created instances
376+
- name: "Set variable: server_result"
377+
ansible.builtin.set_fact:
378+
server_result:
379+
results: >-
380+
{{
381+
(
382+
(ec2_instance_info.results | default([]))
383+
+ (ec2_instance_result.results | default([]))
384+
+ (ec2_spot_instance_info.results | default([]))
385+
+ (ec2_spot_instance_result.results | default([]))
386+
)
387+
| selectattr('instances', 'defined')
388+
| selectattr('instances', '!=', [])
389+
| list
390+
}}
391+
370392
# Classic Load Balancer (CLB) - previous generation
371393
- name: "AWS: Create Classic Load Balancer (CLB)"
372394
amazon.aws.elb_classic_lb:
@@ -402,13 +424,23 @@
402424
loop_control:
403425
label: "{{ patroni_cluster_name }}-{{ item }}"
404426
register: aws_elb_classic_lb
405-
when: cloud_load_balancer | bool and aws_load_balancer_type | lower == 'clb' and
427+
when: server_result.results | default([]) | length > 0 and
428+
cloud_load_balancer | bool and aws_load_balancer_type | lower == 'clb' and
406429
(item == 'primary' or
407430
(item == 'replica' and server_count | int > 1) or
408431
(item in ['sync', 'async'] and server_count | int > 1 and synchronous_mode | bool))
409432

410433
# Network Load Balancer (NLB)
411-
- name: "AWS: Create NLB Target Group"
434+
- name: Build NLB targets
435+
ansible.builtin.set_fact:
436+
nlb_targets: "{{ (nlb_targets | default([])) + [{'Id': item, 'Port': target_port | int}] }}"
437+
loop: "{{ instance_ids | default([]) }}"
438+
vars:
439+
instance_ids: "{{ (server_result | default({'results': []})) | json_query('results[].instances[].instance_id') }}"
440+
target_port: "{{ pgbouncer_listen_port | default('6432') if pgbouncer_install | bool else postgresql_port | default('5432') }}"
441+
when: cloud_load_balancer | bool and aws_load_balancer_type | lower == 'nlb'
442+
443+
- name: "AWS: Create or modify NLB Target Group"
412444
community.aws.elb_target_group:
413445
access_key: "{{ lookup('ansible.builtin.env', 'AWS_ACCESS_KEY_ID') }}"
414446
secret_key: "{{ lookup('ansible.builtin.env', 'AWS_SECRET_ACCESS_KEY') }}"
@@ -427,16 +459,7 @@
427459
healthy_threshold_count: 3
428460
successful_response_codes: "200"
429461
target_type: instance
430-
targets: >-
431-
{{
432-
server_result.results
433-
| map(attribute='instances')
434-
| map('first')
435-
| map(attribute='instance_id')
436-
| map('community.general.dict_kv', 'Id')
437-
| map('combine', {'Port': target_port | int})
438-
| list
439-
}}
462+
targets: "{{ nlb_targets | default([]) }}"
440463
modify_targets: true
441464
wait: false
442465
state: present
@@ -514,7 +537,7 @@
514537
volume_size: "{{ volume_size }} GB"
515538
public_ip: "{{ item.instances[0].public_ip_address | default('N/A', true) }}"
516539
private_ip: "{{ item.instances[0].private_ip_address | default('N/A', true) }}"
517-
loop: "{{ server_result.results }}"
540+
loop: "{{ server_result.results | default([]) }}"
518541
loop_control:
519542
index_var: idx
520543
label: "{{ server_name | lower }}{{ '%02d' % (idx + 1) }}"
@@ -529,7 +552,7 @@
529552
port: 22
530553
delay: 5
531554
timeout: 300
532-
loop: "{{ server_result.results }}"
555+
loop: "{{ server_result.results | default([]) }}"
533556
loop_control:
534557
index_var: idx
535558
label: "{{ server_name | lower }}{{ '%02d' % (idx + 1) }}"
@@ -545,9 +568,20 @@
545568
{{ ip_addresses | default([]) +
546569
[{
547570
'public_ip': item.instances[0].public_ip_address | default(''),
548-
'private_ip': item.instances[0].private_ip_address | default('')
571+
'private_ip': item.instances[0].private_ip_address | default(''),
572+
'new_node': (cluster_scaling | default(false) | bool and item.instances[0].instance_id not in existing_instance_ids)
549573
}]
550574
}}
575+
vars:
576+
existing_instance_ids: >-
577+
{{
578+
(
579+
(ec2_instance_info.results | default([])) +
580+
(ec2_spot_instance_info.results | default([]))
581+
)
582+
| selectattr('instances','defined') | map(attribute='instances') | list | flatten
583+
| selectattr('instance_id','defined') | map(attribute='instance_id') | list
584+
}}
551585
loop: "{{ server_result.results | selectattr('instances', 'defined') }}"
552586
loop_control:
553587
label: >-

0 commit comments

Comments
 (0)