Skip to content

Commit ace9014

Browse files
committed
Add playbook to automate Rocky 9.7 with OFED upgrade
Tested on multinode. Fix install-doca.yml to not install doca-ofed anymore (avoid dkms). The stackhpc_doca_kernel_version_matrix variable contains kernel module versions to install for last 2 supported minor RockyLinux versions. It must be changed after a new pre-compiled kernel module version has been built.
1 parent 110d7e0 commit ace9014

File tree

3 files changed

+141
-9
lines changed

3 files changed

+141
-9
lines changed
Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
---
2+
# This playbook performs the system upgrade from RockyLinux 9.6 to 9.7 on hosts
3+
# using doca ofed kernel modules and utilities.
4+
# It prevents the install of the dkms toolchain and dkms modules that would be
5+
# used instead or our precompiled modules if a direct `dnf upgrade` was run.
6+
#
7+
# It must be run after new package snapshots have been merged and
8+
# `pulp-repo-sync.yml` and `pulp-repo-publish.yml` have been run.
9+
# Also `kayobe overcloud host configure -t dnf` must have been run for the new
10+
# `doca.repo` to be present (the doca version is in the url) on the mlnx hosts.
11+
12+
- name: Prepare upgrade from Rocky Linux 9.6 to 9.7
13+
hosts: mlnx
14+
serial: "{{ lookup('env', 'ANSIBLE_SERIAL') | default(1, true) }}"
15+
gather_facts: true
16+
tags: pre
17+
vars:
18+
# we don't build kernel modules for each version, eg 5.14.0-611.13.1 has been built,
19+
# but not 5.14.0-611.20.1.
20+
doca_kernel_version: "{{ stackhpc_doca_kernel_version_matrix[stackhpc_pulp_repo_rocky_9_minor_version] }}"
21+
tasks:
22+
- name: Assert that hosts are running Rocky Linux 9.6
23+
ansible.builtin.assert:
24+
that:
25+
- ansible_facts.distribution == 'Rocky'
26+
- ansible_facts.distribution_version == '9.6'
27+
- os_distribution == 'rocky'
28+
fail_msg: >-
29+
This playbook is only designed for Rocky Linux 9.6 hosts. Ensure
30+
that you are limiting it to only run on Rocky Linux 9.6 hosts and
31+
os_distribution is set to rocky.
32+
33+
- name: Ensure doca kernel repo is up to date
34+
ansible.builtin.dnf:
35+
name: doca-kernel-repo
36+
state: latest
37+
update_cache: true
38+
become: true
39+
40+
# This is required by mlnx-ofa_kernel, and comes from the doca repository.
41+
# It is already present when doca-ofed is installed, but will be upgraded.
42+
- name: Ensure mlnx-tools is installed
43+
ansible.builtin.dnf:
44+
name: mlnx-tools
45+
state: latest
46+
update_cache: true
47+
become: true
48+
49+
- name: Ensure appropriate doca-kernel is installed
50+
ansible.builtin.dnf:
51+
name: "doca-kernel-{{ doca_kernel_version }}"
52+
state: latest
53+
disablerepo: doca
54+
become: true
55+
56+
# doca-ofed 3.2 starts to depend on the dkms modules. It was not the case
57+
# in doca-ofed 2.9.3.
58+
- name: Ensure doca-ofed is not present (upgrading it brings dkms)
59+
ansible.builtin.dnf:
60+
name: doca-ofed
61+
state: absent
62+
autoremove: false
63+
become: true
64+
65+
- name: Ensure latest doca-ofed-userspace instead of doca-ofed
66+
ansible.builtin.dnf:
67+
name: doca-ofed-userspace
68+
state: latest
69+
become: true
70+
71+
- name: Upgrade all
72+
ansible.builtin.dnf:
73+
name: "*"
74+
state: latest
75+
become: true
76+
77+
- name: Fix potential grub config preventing new kernel from being used
78+
ansible.builtin.import_playbook: reset-bls-entries.yml
79+
vars:
80+
reset_bls_hosts: mlnx
81+
82+
- name: Reboot to apply updates
83+
ansible.builtin.import_playbook: reboot.yml
84+
vars:
85+
reboot_hosts: mlnx
86+
87+
- name: Confirm the host is upgraded to Rocky Linux 9.7
88+
hosts: mlnx
89+
tags: post
90+
tasks:
91+
- name: Update distribution facts
92+
ansible.builtin.setup:
93+
filter: "{{ kayobe_ansible_setup_filter }}"
94+
gather_subset: "{{ kayobe_ansible_setup_gather_subset }}"
95+
96+
# Can fail (eg in multinode) when there are bad entries in grub config
97+
# Fixed by `kayobe playbook run ansible/maintenance/reset-bls-entries.yml`
98+
# and manual reboot
99+
- name: Assert that hosts are now using Rocky Linux 9.7
100+
ansible.builtin.assert:
101+
that:
102+
- ansible_facts.distribution == 'Rocky'
103+
- ansible_facts.distribution_version == '9.7'
104+
- os_distribution == 'rocky'

etc/kayobe/ansible/tools/install-doca.yml

Lines changed: 32 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3,27 +3,50 @@
33
become: true
44
hosts: mlnx
55
gather_facts: true
6+
vars:
7+
# we don't build kernel modules for each version, eg 5.14.0-611.13.1 has been built,
8+
# but not 5.14.0-611.20.1
9+
doca_kernel_version: "{{ stackhpc_doca_kernel_version_matrix[stackhpc_pulp_repo_rocky_9_minor_version] }}"
610
tasks:
7-
- name: Get running kernel
8-
ansible.builtin.command:
9-
cmd: "uname -r"
10-
register: kernel
11-
check_mode: false
12-
1311
- name: Install kernel repo
1412
ansible.builtin.dnf:
1513
name: doca-kernel-repo
1614
state: latest
1715
update_cache: true
1816

17+
# not the same as doca_kernel_version: some dots changed to underscore or dash
18+
- name: Discover kernel repo filename
19+
ansible.builtin.shell: |
20+
set -o pipefail
21+
rpm -ql doca-kernel-repo | grep /etc/yum.repos.d/
22+
register: kernel_repo_filename
23+
changed_when: false
24+
1925
- name: Ensure correct priority for DOCA modules
2026
ansible.builtin.lineinfile:
2127
line: "priority=-2"
2228
insertafter: EOF
23-
path: "/etc/yum.repos.d/doca-kernel-{{ kernel.stdout }}.repo"
29+
path: "{{ kernel_repo_filename.stdout }}"
30+
31+
# This is required by mlnx-ofa_kernel, and comes from the doca repository.
32+
# It is already present when doca-ofed is installed, but will be upgraded.
33+
- name: Ensure mlnx-tools is installed
34+
ansible.builtin.dnf:
35+
name: mlnx-tools
36+
state: latest
37+
update_cache: true
38+
become: true
39+
40+
- name: Ensure appropriate doca-kernel is installed
41+
ansible.builtin.dnf:
42+
name: "doca-kernel-{{ doca_kernel_version }}"
43+
state: latest
44+
disablerepo: doca
45+
become: true
2446

25-
- name: Install DOCA OFED
47+
- name: Ensure DOCA OFED userspace is installed
2648
ansible.builtin.dnf:
27-
name: doca-ofed
49+
name:
50+
- doca-ofed-userspace
2851
state: latest
2952
update_cache: true

etc/kayobe/ofed.yml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,11 @@ stackhpc_pulp_doca_version_matrix:
88
"7": 3.2.1
99
stackhpc_pulp_doca_version: "{{ stackhpc_pulp_doca_version_matrix[stackhpc_pulp_repo_rocky_9_minor_version] | default('2.9.1') }}"
1010

11+
# Available and tested versions of the pre-compiled doca-ofed kernel modules
12+
stackhpc_doca_kernel_version_matrix:
13+
"6": 5.14.0.570.21.1.el9.6.x86.64
14+
"7": 5.14.0.611.13.1.el9.7.x86.64
15+
1116
###############################################################################
1217
# Pulp configuration for DOCA OFED
1318

0 commit comments

Comments
 (0)