Skip to content

Fix snapshot reload on ec2 instances #1157

Fix snapshot reload on ec2 instances

Fix snapshot reload on ec2 instances #1157

Workflow file for this run

---
name: ci
on:
push:
branches:
- master
- 'release**'
- 'debug/**'
pull_request: ~
permissions:
contents: read
id-token: write
env:
AWS_DEFAULT_REGION: ${{ secrets.AWS_REGION }}
jobs:
build-packages:
name: Build Packages
runs-on:
- metal
- ${{ matrix.arch }}
env:
FS: ext2 ext3 ext4 xfs
strategy:
# Do not cancel all jobs when one is failing. The CI of the driver is fast.
# But the fast failure hinders to observe possible errors on other distros.
fail-fast: false
matrix:
distro: [
amazon2, amazon2023,
centos7, centos8, centos9,
debian8, debian9, debian10, debian11, debian12,
fedora31, fedora32, fedora34, fedora35, fedora36, fedora37, fedora38,
ubuntu2004, ubuntu2204
]
arch: [ amd64 ]
include:
- distro: amazon2
arch: arm64
- distro: amazon2023
arch: arm64
- distro: centos8
arch: arm64
- distro: centos9
arch: arm64
- distro: debian10
arch: arm64
- distro: debian11
arch: arm64
- distro: debian12
arch: arm64
- distro: fedora35
arch: arm64
- distro: fedora36
arch: arm64
- distro: fedora37
arch: arm64
- distro: fedora38
arch: arm64
- distro: ubuntu2204
arch: arm64
steps:
- name: Checkout sources
uses: actions/checkout@v3
- name: Set ENV
if: always()
env:
DISTRO: ${{ matrix.distro }}
ARCH: ${{ matrix.arch }}
run: .github/scripts/set_env.sh
- name: Check ENV
run: .github/scripts/check_env.sh
- name: Configure AWS Credentials
uses: aws-actions/configure-aws-credentials@v1-node16
with:
aws-region: ${{ env.AWS_DEFAULT_REGION }}
role-to-assume: ${{ secrets.ASSURIODEV_OIDC_AWS_ROLE_ARN }}
role-duration-seconds: 10800 # 3 hours
- name: Start a box
if: always()
run: .github/scripts/start_box.sh
- name: Boot Fedora 32, 35, 36, 37 into original kernel version
if: "${{ matrix.distro == 'fedora32' || matrix.distro == 'fedora35' || matrix.distro == 'fedora36' || matrix.distro == 'fedora37' }}"
run: |
vagrant ssh ${{env.INSTANCE_NAME}} -c '
set -x
arch=$(rpm --eval \%_arch)
ver=$(rpm -E \%fedora)
case $ver in
32) k_ver=5.9.16
k_patch=100
;;
35) k_ver=5.15.18
k_patch=200
;;
36) k_ver=5.17.14
k_patch=300
;;
37) k_ver=6.0.14
k_patch=300
;;
esac
for package in "kernel-core" "kernel-modules" "kernel" "kernel-devel"; do
sudo rpm -ivh --force https://kojipkgs.fedoraproject.org/packages/kernel/${k_ver}/${k_patch}.fc${ver}/${arch}/${package}-${k_ver}-${k_patch}.fc${ver}.${arch}.rpm
done
'
vagrant reload ${{env.INSTANCE_NAME}}
working-directory: ${{env.BOX_DIR}}
# For some reason we have too new (unofficial) xfsprogs installed on Debian 8. Replace them with the official ones.
- name: Reinstall xfsprogs on Debian 8
if: "${{ matrix.distro == 'debian8' }}"
run: |
vagrant ssh ${{env.INSTANCE_NAME}} -c '
curl --retry 5 --retry-max-time 120 https://s3.eu-central-1.wasabisys.com/blobs-wasabi.elastio.dev/build_utils/xfsprogs_3.2.1.tar.gz | tar xz && cd xfsprogs-3.2.1 &&
make && sudo make install && sudo make install-dev &&
cd .. && rm -rf xfsprogs-3.2.1'
working-directory: ${{env.BOX_DIR}}
timeout-minutes: 5
# Amazon 2 has installed devtoolset-8 which upgrades GCC from 7.3.1 to 8.3.1.
# The new gcc doesn't compile rpm packages properly, because of the /usr/lib/rpm/redhat/macros provided
# by the package system-rpm-config-9.1.0-76.amzn2.0.13.noarch. And this macros has compilation flags applicable
# for GCC 7 and already removed from GCC 8. The workaround is to disable devtoolset-8 on the next build step.
- name: Remove devtoolset
if: "${{ matrix.distro == 'amazon2' && matrix.arch == 'arm64' }}"
run: vagrant ssh ${{env.INSTANCE_NAME}} -c 'sudo rm /etc/profile.d/enable-llvm-toolset.sh'
working-directory: ${{env.BOX_DIR}}
- name: Install LVM and RAID tools
if: "${{ matrix.distro != 'debian8'}}"
run: |
vagrant ssh ${{env.INSTANCE_NAME}} -c '
set -x
if $(which apt-get >/dev/null 2>&1); then
export DEBIAN_FRONTEND=noninteractive
sudo apt-get update
sudo -E apt-get install -y --force-yes lvm2 mdadm
else
# Fedora has rather weak mirrors. But we do not want to have failing builds because of this.
set +e
for i in {1..5}; do
sudo yum install -y lvm2 mdadm && break
echo "Failed to install LVM and RAID packages. Retrying..."
sleep 5
done
set -e
mdadm -V
fi
'
working-directory: ${{env.BOX_DIR}}
timeout-minutes: 5
- name: Build packages
run: vagrant ssh ${{env.INSTANCE_NAME}} -c 'sudo make ${PKG_TYPE}'
working-directory: ${{env.BOX_DIR}}
- name: Collect artifacts
run: vagrant ssh ${{env.INSTANCE_NAME}} -c 'repobuild/collect_artifacts.sh'
working-directory: ${{env.BOX_DIR}}
- name: Build kernel module
run: vagrant ssh ${{env.INSTANCE_NAME}} -c 'sudo make'
working-directory: ${{env.BOX_DIR}}
- name: Install kernel module
run: |
vagrant ssh ${{env.INSTANCE_NAME}} -c 'sudo make install'
working-directory: ${{env.BOX_DIR}}
- name: Run tests (loop device)
run: |
for fs in ${FS[*]}; do
vagrant ssh ${{env.INSTANCE_NAME}} -c "cd tests && sudo ./elio-test.sh -f $fs"
done
working-directory: ${{env.BOX_DIR}}
# For now tests are taking up to 40 seconds per 1 fs on amd64 and about 2 minutes on arm64 or older 3.x kernels. But they can hang.
# 20 minutes seems to be reasonable timeout.
timeout-minutes: 35
- name: Show debug information
if: always()
run: vagrant ssh ${{env.INSTANCE_NAME}} -c "cat tests/dmesg.log; sudo dmesg -c; lsmod"
working-directory: ${{env.BOX_DIR}}
timeout-minutes: 2
- name: Run tests on LVM (loop device)
if: "${{ matrix.distro != 'debian8'}}"
run: |
for fs in ${FS[*]}; do
vagrant ssh ${{env.INSTANCE_NAME}} -c "cd tests && sudo ./elio-test.sh -f $fs --lvm"
done
working-directory: ${{env.BOX_DIR}}
# For now tests are taking 10-20 seconds per 1 fs on amd64 and about 2 minutes on arm64. But they can hang.
# 20 minutes seems to be reasonable timeout.
timeout-minutes: 35
- name: Show debug information
if: always()
run: vagrant ssh ${{env.INSTANCE_NAME}} -c "cat tests/dmesg.log; sudo dmesg -c; lsmod"
working-directory: ${{env.BOX_DIR}}
timeout-minutes: 2
- name: Run tests on RAID (loop device)
if: "${{ matrix.distro != 'debian8'}}"
run: |
for fs in ${FS[*]}; do
vagrant ssh ${{env.INSTANCE_NAME}} -c "cd tests && sudo ./elio-test.sh -f $fs --raid"
done
working-directory: ${{env.BOX_DIR}}
# For now tests are taking up to 40 seconds per 1 fs on amd64 and about 2 minutes on arm64 or older 3.x kernels. But they can hang.
# 20 minutes seems to be reasonable timeout.
timeout-minutes: 35
- name: Show debug information
if: always()
run: vagrant ssh ${{env.INSTANCE_NAME}} -c "cat tests/dmesg.log; sudo dmesg -c; lsmod"
working-directory: ${{env.BOX_DIR}}
timeout-minutes: 2
- name: Attach qcow2 disks
run: |
SZ="2200M"
ARCH=$(uname -m)
TEST_IMAGES=(${TEST_IMAGES})
TEST_DRIVES=(${TEST_DRIVES})
[ ${ARCH} != "x86_64" ] && VIRSH_FLAGS="--config" || true
for i in ${!TEST_IMAGES[*]}; do
qemu-img create -f qcow2 ${TEST_IMAGES[i]} $SZ
virsh attach-disk --domain ${BOX_DIR##*/}_${INSTANCE_NAME} --source ${TEST_IMAGES[i]} --target ${TEST_DRIVES[i]} --driver qemu --subdriver qcow2 --targetbus virtio ${VIRSH_FLAGS-}
done
# ARM64 boxes don't support "hot plug" w/o reboot
if [ ${ARCH} != "x86_64" ]; then
virsh destroy --domain ${BOX_DIR##*/}_${INSTANCE_NAME}
virsh start --domain ${BOX_DIR##*/}_${INSTANCE_NAME}
while ! vagrant ssh ${INSTANCE_NAME} -c 'uptime'; do
echo "Waiting..."
sleep 1
done
fi
for drive in ${TEST_DRIVES[@]}; do
vagrant ssh ${{env.INSTANCE_NAME}} -c "echo -e \"n\\np\\n\\n\\n\\nw\" | sudo fdisk /dev/$drive"
done
working-directory: ${{env.BOX_DIR}}
- name: Run tests (qcow2 disk)
run: |
for fs in ${FS[*]}; do
vagrant ssh ${{env.INSTANCE_NAME}} -c "cd tests && sudo ./elio-test.sh -d /dev/vdb1 -f $fs"
done
working-directory: ${{env.BOX_DIR}}
timeout-minutes: 90
- name: Show debug information
if: always()
run: vagrant ssh ${{env.INSTANCE_NAME}} -c "cat tests/dmesg.log; sudo dmesg -c; lsmod"
working-directory: ${{env.BOX_DIR}}
timeout-minutes: 2
- name: Run tests on LVM (qcow2 disks)
if: "${{ matrix.distro != 'debian8'}}"
run: |
for fs in ${FS[*]}; do
vagrant ssh ${{env.INSTANCE_NAME}} -c "cd tests && sudo ./elio-test.sh -d /dev/vdb -d /dev/vdc -f $fs --lvm"
done
working-directory: ${{env.BOX_DIR}}
timeout-minutes: 90
- name: Show debug information
if: always()
run: vagrant ssh ${{env.INSTANCE_NAME}} -c "cat tests/dmesg.log; sudo dmesg -c; lsmod"
working-directory: ${{env.BOX_DIR}}
timeout-minutes: 2
- name: Run tests on RAID (qcow2 disks)
if: "${{ matrix.distro != 'debian8'}}"
run: |
for fs in ${FS[*]}; do
# An issue is observed in virtio driver whith XFS and kernel 3.16 on Debian 8. It's a known issue, it happens on
# mount of the raid1 device with XFS even if elastio-snap is not loaded. See https://bugzilla.redhat.com/show_bug.cgi?id=1111290
[ ${{ matrix.distro }} == debian8 ] && [ $fs == xfs ] && continue
vagrant ssh ${{env.INSTANCE_NAME}} -c "cd tests && sudo ./elio-test.sh -d /dev/vdb -d /dev/vdc -f $fs --raid"
done
working-directory: ${{env.BOX_DIR}}
timeout-minutes: 90
- name: Show debug information
if: always()
run: vagrant ssh ${{env.INSTANCE_NAME}} -c "cat tests/dmesg.log; sudo dmesg -c; lsmod"
working-directory: ${{env.BOX_DIR}}
timeout-minutes: 2
- name: Run tests multipart (qcow2 disk)
run: |
for fs in ${FS[*]}; do
vagrant ssh ${{env.INSTANCE_NAME}} -c "cd tests && sudo ./elio-test.sh -d /dev/vdb -f $fs -t test_multipart"
done
working-directory: ${{env.BOX_DIR}}
timeout-minutes: 25
- name: Show debug information
if: always()
run: vagrant ssh ${{env.INSTANCE_NAME}} -c "cat tests/dmesg.log; sudo dmesg -c; lsmod"
working-directory: ${{env.BOX_DIR}}
timeout-minutes: 2
- name: Uninstall kernel module
run: vagrant ssh ${{env.INSTANCE_NAME}} -c 'sudo make uninstall'
working-directory: ${{env.BOX_DIR}}
- name: Detach external drives
if: always()
run: |
TEST_IMAGES=(${TEST_IMAGES})
for test_image in ${TEST_IMAGES[@]}; do
if virsh domblklist ${BOX_DIR##*/}_${INSTANCE_NAME} --details | grep "file" | awk '{ print $NF }' | grep ${test_image} ; then
virsh detach-disk --domain ${BOX_DIR##*/}_${INSTANCE_NAME} ${test_image}
fi
rm -f ${TEST_IMAGE}
done
- name: Upload artifacts
if: ${{ github.event_name == 'push' }}
run: |
excl_ptrn="*GPG-KEY"
# We have to avoid a race condition when package like elastio-snap-dkms_X.XX.XX-1debian11_all.deb is uploaded from
# 2 Debian 11 VMs amd64 and arm64 at the same time to the same location.
[ $(uname -m) != "x86_64" ] && [ -f /etc/debian_version ] && excl_ptrn=$excl_ptrn",*_all.deb" || true
vagrant ssh ${{env.INSTANCE_NAME}} -c '
if ! [[ ${SOURCE_BRANCH} =~ ^release ]] && ! [[ ${SOURCE_BRANCH} =~ ^master ]]; then \
tag="--tag elastio:dev=true"; \
fi; \
repobuild/upload.sh \
--source repobuild/artifacts/ \
--bucket artifacts.assur.io \
--target /linux/elastio-snap/${SOURCE_BRANCH}/${GITHUB_RUN_NUMBER}/${PKG_TYPE} \
${tag} \
--exclude '"'$excl_ptrn'"''
working-directory: ${{env.BOX_DIR}}
- name: Destroy a box
if: always()
run: .github/scripts/destroy_box.sh
manifest:
if: ${{ github.event_name == 'push' }}
name: Artifacts manifest
needs: build-packages
runs-on:
- baremetal
steps:
- name: Checkout sources
uses: actions/checkout@v3
- name: Configure AWS Credentials
uses: aws-actions/configure-aws-credentials@v1-node16
with:
aws-region: ${{ env.AWS_DEFAULT_REGION }}
role-to-assume: ${{ secrets.ASSURIODEV_OIDC_AWS_ROLE_ARN }}
- name: Make manifest
run: echo $GITHUB_RUN_NUMBER > latest && cat latest | grep -E '^[0-9]+$'
- name: Upload manifest
run: |
branch=$(.github/scripts/detect_branch.sh)
if ! [[ ${branch} =~ ^release ]] && ! [[ ${branch} =~ ^master ]]; then
tag="--tag elastio:dev=true"
fi
repobuild/upload.sh \
--source latest \
--bucket artifacts.assur.io \
--target /linux/elastio-snap/$branch \
${tag}
dispatch-packaging-repo:
if: ${{ github.event_name == 'push' }}
name: Trigger repo upload
needs: manifest
runs-on:
- baremetal
steps:
- name: Checkout sources
uses: actions/checkout@v3
- name: Dispatch packaging repo
env:
REPO_HOOK_TOKEN: ${{ secrets.REPO_HOOK_TOKEN }}
run: .github/scripts/dispatch_packaging.sh