Skip to content

Commit 259a01a

Browse files
committed
scylla-ansible-roles: Adds example playbook "kernel_version_enforcer"
"kernel_version_enforcer" playbook allow the user to: - Pin a specific kernel version (and ensure it will be picked in the next reboot) if required - Upgrade kernel version to the latest available - Purge all old kernel versions - Upgrade all upgradable packages Signed-off-by: Eduardo Benzecri <[email protected]>
1 parent 34f0e15 commit 259a01a

File tree

5 files changed

+381
-0
lines changed

5 files changed

+381
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
---
2+
3+
- name: Kernel Version Enforcer
4+
hosts: scylla
5+
gather_facts: true
6+
serial: 1
7+
vars:
8+
api_address: 127.0.0.1
9+
api_delay: 10
10+
api_port: 10000
11+
api_retries: 360
12+
api_timeout: 300
13+
cql_timeout: 86400
14+
reboot_timeout: 600
15+
systemd_unit_retries: 5
16+
systemd_unit_delay: 30
17+
grub_config_file: /boot/grub/grub.cfg
18+
image_package_prefix: linux-image
19+
image_version: 5.15.0-1051-gcp
20+
kernel_related_packages:
21+
- linux-gcp
22+
- linux-image-gcp
23+
- linux-headers-gcp
24+
pid_kill_delay: 12
25+
pid_kill_retries: 5
26+
pause_time: 15
27+
pin_kernel_version: false
28+
purge_older_images: false
29+
upgrade_all_packages: false
30+
upgrade_latest_kernel: false
31+
tasks:
32+
- name: Enforce kernel version for Ubuntu
33+
ansible.builtin.include_tasks: ubuntu/main.yml
34+
when: ansible_distribution == "Ubuntu"
+30
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
---
2+
3+
- name: Get {{ grub_config_file }} metadata
4+
ansible.builtin.stat:
5+
path: "{{ grub_config_file }}"
6+
register: grub_config
7+
8+
- name: Fail if GRUB config file doesn't exist
9+
ansible.builtin.fail:
10+
msg: "{{ grub_config_file }} doesn't exist"
11+
when: not grub_config.stat.exists
12+
13+
- name: Get GRUB entries
14+
ansible.builtin.command: grep -E "^\smenuentry" {{ grub_config_file }}
15+
register: grub_entries
16+
17+
- name: Get GRUB index for '{{ image_package_prefix }}-{{ image_version }}'
18+
ansible.builtin.set_fact:
19+
target_grub_index="{{ grub_index }}"
20+
when:
21+
- image_version in item
22+
- not "recovery mode" in item
23+
- target_grub_index is not defined
24+
loop: "{{ grub_entries.stdout_lines }}"
25+
loop_control:
26+
index_var: grub_index
27+
28+
- name: Set index '1>{{ target_grub_index }}' to be used in the next reboot
29+
ansible.builtin.command: grub-reboot "1>{{ target_grub_index }}"
30+
become: true
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
---
2+
3+
- name: Purge all kernel images newer than '{{ final_image_version }}'
4+
ansible.builtin.apt:
5+
name: "{{ image_package_prefix }}-{{ item }}"
6+
state: absent
7+
purge: true
8+
become: true
9+
when: item is version(final_image_version, '>')
10+
loop: "{{ vmlinuz_versions.stdout_lines }}"
11+
12+
- name: Erase all kernel images related files newer than '{{ final_image_version }}'
13+
ansible.builtin.shell: rm -f /boot/*-{{ item }}
14+
become: true
15+
when: item is version(final_image_version, '>')
16+
loop: "{{ vmlinuz_versions.stdout_lines }}"
17+
18+
- name: Purge all kernel images older than '{{ final_image_version }}'
19+
ansible.builtin.apt:
20+
name: "{{ image_package_prefix }}-{{ item }}"
21+
state: absent
22+
purge: true
23+
become: true
24+
when:
25+
- purge_older_images
26+
- item is version(final_image_version, '<')
27+
loop: "{{ vmlinuz_versions.stdout_lines }}"
28+
29+
- name: Erase all kernel images related files older than '{{ final_image_version }}'
30+
ansible.builtin.shell: rm -f /boot/*-{{ item }}
31+
become: true
32+
when:
33+
- purge_older_images
34+
- item is version(final_image_version, '<')
35+
loop: "{{ vmlinuz_versions.stdout_lines }}"
36+
37+
- name: Reconfigure '{{ image_package_prefix }}-{{ final_image_version }}' package
38+
ansible.builtin.command: dpkg-reconfigure {{ image_package_prefix }}-{{ final_image_version }} -f noninteractive -p critical
39+
become: true
40+
41+
- name: Get /boot/vmlinuz metadata
42+
ansible.builtin.stat:
43+
path: /boot/vmlinuz
44+
register: vmlinuz
45+
46+
- name: Fail if /boot/vmlinuz is not a symbolic link of /boot/vmlinuz-{{ final_image_version }}
47+
ansible.builtin.fail:
48+
msg: "/boot/vmlinuz is not a symbolic link of /boot/vmlinuz-{{ final_image_version }}"
49+
when:
50+
- not vmlinuz.stat.islnk
51+
- not vmlinuz.stat.lnk_source is /boot/vmlinuz-{{ final_image_version }}
52+
53+
- name: Get /boot/initrd.img metadata
54+
ansible.builtin.stat:
55+
path: /boot/initrd.img
56+
register: initrd
57+
58+
- name: Fail if /boot/initrd.img is not a symbolic link of /boot/initrd.img-{{ final_image_version }}
59+
ansible.builtin.fail:
60+
msg: "/boot/initrd.img is not a symbolic link of /boot/initrd.img-{{ final_image_version }}"
61+
when:
62+
- not initrd.stat.islnk
63+
- not initrd.stat.lnk_source is /boot/initrd.img-{{ final_image_version }}
+132
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
---
2+
3+
- name: Get current kernel image version
4+
ansible.builtin.command: uname --kernel-release
5+
register: uname_pre_output
6+
7+
- name: Save kernel image version
8+
ansible.builtin.set_fact:
9+
detected_image_version="{{ uname_pre_output.stdout_lines | first }}"
10+
11+
- name: Define if the kernel image should be installed
12+
ansible.builtin.set_fact:
13+
kernel_image_required="{{ image_version is version(detected_image_version, 'ne') or upgrade_latest_kernel }}"
14+
15+
- name: Ensure apt is ready to be used
16+
ansible.builtin.apt:
17+
update_cache: true
18+
autoclean: true
19+
autoremove: true
20+
force_apt_get: true
21+
become: true
22+
23+
- name: Ensure kernel-related packages are installed
24+
ansible.builtin.apt:
25+
name: "{{ item }}"
26+
state: present
27+
loop: "{{ kernel_related_packages }}"
28+
become: true
29+
30+
- name: Mark to unhold kernel-related packages
31+
ansible.builtin.dpkg_selections:
32+
name: "{{ item }}"
33+
selection: install
34+
loop: "{{ kernel_related_packages }}"
35+
become: true
36+
when: kernel_image_required
37+
38+
- name: Ensure kernel image '{{ image_package_prefix }}-{{ image_version }}' is installed
39+
ansible.builtin.apt:
40+
name: "{{ image_package_prefix }}-{{ image_version }}"
41+
state: present
42+
become: true
43+
when:
44+
- kernel_image_required
45+
- not upgrade_latest_kernel
46+
47+
- name: Upgrade kernel-related packages to the latest version available
48+
ansible.builtin.apt:
49+
name: "{{ item }}"
50+
state: latest
51+
loop: "{{ kernel_related_packages }}"
52+
become: true
53+
when: upgrade_latest_kernel
54+
55+
- name: Mark to hold kernel-related packages
56+
ansible.builtin.dpkg_selections:
57+
name: "{{ item }}"
58+
selection: hold
59+
loop: "{{ kernel_related_packages }}"
60+
become: true
61+
when: pin_kernel_version
62+
63+
- name: Upgrade all upgradable packages
64+
ansible.builtin.apt:
65+
name: "*"
66+
state: latest
67+
become: true
68+
when: upgrade_all_packages
69+
70+
- name: Get all vmlinuz files available
71+
ansible.builtin.shell: ls /boot/vmlinuz-* | sed 's/\/boot\/vmlinuz-*//'
72+
register: vmlinuz_versions
73+
74+
- name: Define if reconfiguration is required due to the presence of serveral vmlinuz files
75+
ansible.builtin.set_fact:
76+
reconfiguration_required="{{ vmlinuz_versions.stdout_lines | length > 1 }}"
77+
78+
- name: Mark to unhold kernel-related packages
79+
ansible.builtin.dpkg_selections:
80+
name: "{{ item }}"
81+
selection: install
82+
loop: "{{ kernel_related_packages }}"
83+
become: true
84+
when: reconfiguration_required
85+
86+
- name: Prepare GRUB modifications
87+
ansible.builtin.include_tasks: grub.yml
88+
when:
89+
- reconfiguration_required
90+
- not upgrade_latest_kernel
91+
92+
- name: Stop, reboot and start each node (if required)
93+
ansible.builtin.include_tasks: stop_reboot_start.yml
94+
when: reconfiguration_required
95+
96+
- name: Set final kernel image version if '{{ image_version }}' was installed
97+
ansible.builtin.set_fact:
98+
final_image_version="{{ image_version }}"
99+
when: not upgrade_latest_kernel
100+
101+
- name: Set final kernel image version if the latest one was installed
102+
ansible.builtin.set_fact:
103+
final_image_version="{{ target_image_version }}"
104+
when:
105+
- reconfiguration_required
106+
- upgrade_latest_kernel
107+
108+
- name: Enforce kernel version '{{ final_image_version }}' usage
109+
ansible.builtin.include_tasks: kernel_enforce_cleanup.yml
110+
when: reconfiguration_required
111+
112+
- name: Mark to hold kernel-related packages
113+
ansible.builtin.dpkg_selections:
114+
name: "{{ item }}"
115+
selection: hold
116+
loop: "{{ kernel_related_packages }}"
117+
become: true
118+
when:
119+
- pin_kernel_version
120+
- reconfiguration_required
121+
122+
- name: Remove useless packages from the cache
123+
ansible.builtin.apt:
124+
autoremove: true
125+
autoclean: true
126+
force_apt_get: true
127+
become: true
128+
129+
- name: Make a pause of {{ pause_time }} seconds
130+
ansible.builtin.wait_for:
131+
timeout: "{{ pause_time | int }}"
132+
when: kernel_image_required
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
---
2+
- name: Populate service facts
3+
ansible.builtin.service_facts:
4+
5+
- name: Check if Scylla is installed
6+
ansible.builtin.set_fact:
7+
scylla_installation="{{ true if ansible_facts.services['scylla-server.service'] is defined else false }}"
8+
9+
- name: Stop Scylla
10+
block:
11+
- name: Mask scylla-server service
12+
ansible.builtin.systemd:
13+
name: scylla-server
14+
masked: true
15+
become: true
16+
17+
- name: Drain node
18+
ansible.builtin.uri:
19+
url: "http://{{ api_address }}:{{ api_port }}/storage_service/drain"
20+
method: POST
21+
retries: "{{ api_retries }}"
22+
delay: "{{ api_delay }}"
23+
timeout: "{{ api_timeout }}"
24+
25+
- name: Check if the node if fully drained
26+
ansible.builtin.uri:
27+
url: "http://{{ api_address }}:{{ api_port }}/storage_service/operation_mode"
28+
method: GET
29+
retries: "{{ api_retries }}"
30+
delay: "{{ api_delay }}"
31+
timeout: "{{ api_timeout }}"
32+
register: node_drain_status
33+
failed_when: "'DRAINED' not in node_drain_status.json"
34+
35+
- name: Stop scylla-manager-agent service (if exists)
36+
ansible.builtin.systemd:
37+
name: scylla-manager-agent
38+
enabled: true
39+
state: stopped
40+
become: true
41+
when: ansible_facts.services['scylla-manager-agent.service'] is defined
42+
43+
- name: Stop scylla-server service
44+
ansible.builtin.systemd:
45+
name: scylla-server
46+
state: stopped
47+
become: true
48+
when: "'DRAINED' in node_drain_status.json"
49+
rescue:
50+
- name: Send a SIGKILL to Scylla PID
51+
ansible.builtin.shell: kill -9 $(pidof scylla)
52+
register: scylla_kill_pid
53+
retries: "{{ pid_kill_retries }}"
54+
delay: "{{ pid_kill_delay }}"
55+
until: scylla_kill_pid.rc == 2
56+
failed_when: scylla_kill_pid.rc != 2
57+
become: true
58+
always:
59+
- name: Unmask scylla-server service
60+
ansible.builtin.systemd:
61+
name: scylla-server
62+
masked: false
63+
become: true
64+
when:
65+
- scylla_installation
66+
- kernel_image_required
67+
68+
- name: Reboot and post-reboot checks
69+
block:
70+
- name: Reboot the node
71+
ansible.builtin.reboot:
72+
reboot_timeout: "{{ reboot_timeout }}"
73+
become: true
74+
75+
- name: Get current kernel image version
76+
ansible.builtin.shell: uname --kernel-release
77+
register: uname_post_output
78+
79+
- name: Save kernel image version
80+
ansible.builtin.set_fact:
81+
target_image_version="{{ uname_post_output.stdout_lines | first }}"
82+
83+
- name: Fail if kernel image version '{{ image_version }}' is not currently in use
84+
ansible.builtin.fail:
85+
msg: "'{{ image_version }}' is not currently used"
86+
when:
87+
- target_image_version is version(image_version, 'ne')
88+
- not upgrade_latest_kernel
89+
when: kernel_image_required
90+
91+
- name: Start Scylla
92+
block:
93+
- name: Get listen address
94+
ansible.builtin.shell: grep '^listen_address:' /etc/scylla/scylla.yaml | awk '{ print $2 }'
95+
register: listen_address
96+
97+
- name: Start scylla-server service
98+
ansible.builtin.systemd:
99+
name: scylla-server
100+
state: started
101+
retries: "{{ systemd_unit_retries }}"
102+
delay: "{{ systemd_unit_delay }}"
103+
become: true
104+
when:
105+
- ansible_facts.services['scylla-server.service'] is defined
106+
- ansible_facts.services['scylla-server.service'].status == "disabled"
107+
108+
- name: Wait for CQL port on {{ listen_address.stdout }}
109+
ansible.builtin.wait_for:
110+
port: 9042
111+
host: "{{ listen_address.stdout }}"
112+
timeout: "{{ cql_timeout }}"
113+
114+
- name: Wait for the cluster to become healthy
115+
ansible.builtin.shell: nodetool status | grep "{{ listen_address.stdout }}" | grep '^UN'
116+
register: node_status
117+
until: node_status.rc == 0
118+
retries: "{{ api_retries }}"
119+
delay: "{{ api_delay }}"
120+
when:
121+
- scylla_installation
122+
- kernel_image_required

0 commit comments

Comments
 (0)