Fix snapshot reload on ec2 instances #1157

Workflow file for this run

	---
	name: ci
	on:
	push:
	branches:
	- master
	- 'release**'
	- 'debug/**'
	pull_request: ~

	permissions:
	contents: read
	id-token: write

	env:
	AWS_DEFAULT_REGION: ${{ secrets.AWS_REGION }}

	jobs:
	build-packages:
	name: Build Packages
	runs-on:
	- metal
	- ${{ matrix.arch }}
	env:
	FS: ext2 ext3 ext4 xfs

	strategy:
	# Do not cancel all jobs when one is failing. The CI of the driver is fast.
	# But the fast failure hinders to observe possible errors on other distros.
	fail-fast: false
	matrix:
	distro: [
	amazon2, amazon2023,
	centos7, centos8, centos9,
	debian8, debian9, debian10, debian11, debian12,
	fedora31, fedora32, fedora34, fedora35, fedora36, fedora37, fedora38,
	ubuntu2004, ubuntu2204
	]
	arch: [ amd64 ]
	include:
	- distro: amazon2
	arch: arm64
	- distro: amazon2023
	arch: arm64
	- distro: centos8
	arch: arm64
	- distro: centos9
	arch: arm64
	- distro: debian10
	arch: arm64
	- distro: debian11
	arch: arm64
	- distro: debian12
	arch: arm64
	- distro: fedora35
	arch: arm64
	- distro: fedora36
	arch: arm64
	- distro: fedora37
	arch: arm64
	- distro: fedora38
	arch: arm64
	- distro: ubuntu2204
	arch: arm64

	steps:
	- name: Checkout sources
	uses: actions/checkout@v3

	- name: Set ENV
	if: always()
	env:
	DISTRO: ${{ matrix.distro }}
	ARCH: ${{ matrix.arch }}
	run: .github/scripts/set_env.sh

	- name: Check ENV
	run: .github/scripts/check_env.sh

	- name: Configure AWS Credentials
	uses: aws-actions/configure-aws-credentials@v1-node16
	with:
	aws-region: ${{ env.AWS_DEFAULT_REGION }}
	role-to-assume: ${{ secrets.ASSURIODEV_OIDC_AWS_ROLE_ARN }}
	role-duration-seconds: 10800 # 3 hours

	- name: Start a box
	if: always()
	run: .github/scripts/start_box.sh

	- name: Boot Fedora 32, 35, 36, 37 into original kernel version
	if: "${{ matrix.distro == 'fedora32' \|\| matrix.distro == 'fedora35' \|\| matrix.distro == 'fedora36' \|\| matrix.distro == 'fedora37' }}"
	run: \|
	vagrant ssh ${{env.INSTANCE_NAME}} -c '
	set -x
	arch=$(rpm --eval \%_arch)
	ver=$(rpm -E \%fedora)
	case $ver in
	32) k_ver=5.9.16
	k_patch=100
	;;
	35) k_ver=5.15.18
	k_patch=200
	;;
	36) k_ver=5.17.14
	k_patch=300
	;;
	37) k_ver=6.0.14
	k_patch=300
	;;
	esac
	for package in "kernel-core" "kernel-modules" "kernel" "kernel-devel"; do
	sudo rpm -ivh --force https://kojipkgs.fedoraproject.org/packages/kernel/${k_ver}/${k_patch}.fc${ver}/${arch}/${package}-${k_ver}-${k_patch}.fc${ver}.${arch}.rpm
	done
	'
	vagrant reload ${{env.INSTANCE_NAME}}
	working-directory: ${{env.BOX_DIR}}

	# For some reason we have too new (unofficial) xfsprogs installed on Debian 8. Replace them with the official ones.
	- name: Reinstall xfsprogs on Debian 8
	if: "${{ matrix.distro == 'debian8' }}"
	run: \|
	vagrant ssh ${{env.INSTANCE_NAME}} -c '
	curl --retry 5 --retry-max-time 120 https://s3.eu-central-1.wasabisys.com/blobs-wasabi.elastio.dev/build_utils/xfsprogs_3.2.1.tar.gz \| tar xz && cd xfsprogs-3.2.1 &&
	make && sudo make install && sudo make install-dev &&
	cd .. && rm -rf xfsprogs-3.2.1'
	working-directory: ${{env.BOX_DIR}}
	timeout-minutes: 5

	# Amazon 2 has installed devtoolset-8 which upgrades GCC from 7.3.1 to 8.3.1.
	# The new gcc doesn't compile rpm packages properly, because of the /usr/lib/rpm/redhat/macros provided
	# by the package system-rpm-config-9.1.0-76.amzn2.0.13.noarch. And this macros has compilation flags applicable
	# for GCC 7 and already removed from GCC 8. The workaround is to disable devtoolset-8 on the next build step.
	- name: Remove devtoolset
	if: "${{ matrix.distro == 'amazon2' && matrix.arch == 'arm64' }}"
	run: vagrant ssh ${{env.INSTANCE_NAME}} -c 'sudo rm /etc/profile.d/enable-llvm-toolset.sh'
	working-directory: ${{env.BOX_DIR}}

	- name: Install LVM and RAID tools
	if: "${{ matrix.distro != 'debian8'}}"
	run: \|
	vagrant ssh ${{env.INSTANCE_NAME}} -c '
	set -x
	if $(which apt-get >/dev/null 2>&1); then
	export DEBIAN_FRONTEND=noninteractive
	sudo apt-get update
	sudo -E apt-get install -y --force-yes lvm2 mdadm
	else
	# Fedora has rather weak mirrors. But we do not want to have failing builds because of this.
	set +e
	for i in {1..5}; do
	sudo yum install -y lvm2 mdadm && break
	echo "Failed to install LVM and RAID packages. Retrying..."
	sleep 5
	done
	set -e
	mdadm -V
	fi
	'
	working-directory: ${{env.BOX_DIR}}
	timeout-minutes: 5

	- name: Build packages
	run: vagrant ssh ${{env.INSTANCE_NAME}} -c 'sudo make ${PKG_TYPE}'
	working-directory: ${{env.BOX_DIR}}

	- name: Collect artifacts
	run: vagrant ssh ${{env.INSTANCE_NAME}} -c 'repobuild/collect_artifacts.sh'
	working-directory: ${{env.BOX_DIR}}

	- name: Build kernel module
	run: vagrant ssh ${{env.INSTANCE_NAME}} -c 'sudo make'
	working-directory: ${{env.BOX_DIR}}

	- name: Install kernel module
	run: \|
	vagrant ssh ${{env.INSTANCE_NAME}} -c 'sudo make install'
	working-directory: ${{env.BOX_DIR}}

	- name: Run tests (loop device)
	run: \|
	for fs in ${FS[*]}; do
	vagrant ssh ${{env.INSTANCE_NAME}} -c "cd tests && sudo ./elio-test.sh -f $fs"
	done
	working-directory: ${{env.BOX_DIR}}
	# For now tests are taking up to 40 seconds per 1 fs on amd64 and about 2 minutes on arm64 or older 3.x kernels. But they can hang.
	# 20 minutes seems to be reasonable timeout.
	timeout-minutes: 35

	- name: Show debug information
	if: always()
	run: vagrant ssh ${{env.INSTANCE_NAME}} -c "cat tests/dmesg.log; sudo dmesg -c; lsmod"
	working-directory: ${{env.BOX_DIR}}
	timeout-minutes: 2

	- name: Run tests on LVM (loop device)
	if: "${{ matrix.distro != 'debian8'}}"
	run: \|
	for fs in ${FS[*]}; do
	vagrant ssh ${{env.INSTANCE_NAME}} -c "cd tests && sudo ./elio-test.sh -f $fs --lvm"
	done
	working-directory: ${{env.BOX_DIR}}
	# For now tests are taking 10-20 seconds per 1 fs on amd64 and about 2 minutes on arm64. But they can hang.
	# 20 minutes seems to be reasonable timeout.
	timeout-minutes: 35

	- name: Show debug information
	if: always()
	run: vagrant ssh ${{env.INSTANCE_NAME}} -c "cat tests/dmesg.log; sudo dmesg -c; lsmod"
	working-directory: ${{env.BOX_DIR}}
	timeout-minutes: 2

	- name: Run tests on RAID (loop device)
	if: "${{ matrix.distro != 'debian8'}}"
	run: \|
	for fs in ${FS[*]}; do
	vagrant ssh ${{env.INSTANCE_NAME}} -c "cd tests && sudo ./elio-test.sh -f $fs --raid"
	done
	working-directory: ${{env.BOX_DIR}}
	# For now tests are taking up to 40 seconds per 1 fs on amd64 and about 2 minutes on arm64 or older 3.x kernels. But they can hang.
	# 20 minutes seems to be reasonable timeout.
	timeout-minutes: 35

	- name: Show debug information
	if: always()
	run: vagrant ssh ${{env.INSTANCE_NAME}} -c "cat tests/dmesg.log; sudo dmesg -c; lsmod"
	working-directory: ${{env.BOX_DIR}}
	timeout-minutes: 2

	- name: Attach qcow2 disks
	run: \|
	SZ="2200M"
	ARCH=$(uname -m)
	TEST_IMAGES=(${TEST_IMAGES})
	TEST_DRIVES=(${TEST_DRIVES})
	[ ${ARCH} != "x86_64" ] && VIRSH_FLAGS="--config" \|\| true
	for i in ${!TEST_IMAGES[*]}; do
	qemu-img create -f qcow2 ${TEST_IMAGES[i]} $SZ
	virsh attach-disk --domain ${BOX_DIR##*/}_${INSTANCE_NAME} --source ${TEST_IMAGES[i]} --target ${TEST_DRIVES[i]} --driver qemu --subdriver qcow2 --targetbus virtio ${VIRSH_FLAGS-}
	done
	# ARM64 boxes don't support "hot plug" w/o reboot
	if [ ${ARCH} != "x86_64" ]; then
	virsh destroy --domain ${BOX_DIR##*/}_${INSTANCE_NAME}
	virsh start --domain ${BOX_DIR##*/}_${INSTANCE_NAME}
	while ! vagrant ssh ${INSTANCE_NAME} -c 'uptime'; do
	echo "Waiting..."
	sleep 1
	done
	fi
	for drive in ${TEST_DRIVES[@]}; do
	vagrant ssh ${{env.INSTANCE_NAME}} -c "echo -e \"n\\np\\n\\n\\n\\nw\" \| sudo fdisk /dev/$drive"
	done
	working-directory: ${{env.BOX_DIR}}

	- name: Run tests (qcow2 disk)
	run: \|
	for fs in ${FS[*]}; do
	vagrant ssh ${{env.INSTANCE_NAME}} -c "cd tests && sudo ./elio-test.sh -d /dev/vdb1 -f $fs"
	done
	working-directory: ${{env.BOX_DIR}}
	timeout-minutes: 90

	- name: Show debug information
	if: always()
	run: vagrant ssh ${{env.INSTANCE_NAME}} -c "cat tests/dmesg.log; sudo dmesg -c; lsmod"
	working-directory: ${{env.BOX_DIR}}
	timeout-minutes: 2

	- name: Run tests on LVM (qcow2 disks)
	if: "${{ matrix.distro != 'debian8'}}"
	run: \|
	for fs in ${FS[*]}; do
	vagrant ssh ${{env.INSTANCE_NAME}} -c "cd tests && sudo ./elio-test.sh -d /dev/vdb -d /dev/vdc -f $fs --lvm"
	done
	working-directory: ${{env.BOX_DIR}}
	timeout-minutes: 90

	- name: Show debug information
	if: always()
	run: vagrant ssh ${{env.INSTANCE_NAME}} -c "cat tests/dmesg.log; sudo dmesg -c; lsmod"
	working-directory: ${{env.BOX_DIR}}
	timeout-minutes: 2

	- name: Run tests on RAID (qcow2 disks)
	if: "${{ matrix.distro != 'debian8'}}"
	run: \|
	for fs in ${FS[*]}; do
	# An issue is observed in virtio driver whith XFS and kernel 3.16 on Debian 8. It's a known issue, it happens on
	# mount of the raid1 device with XFS even if elastio-snap is not loaded. See https://bugzilla.redhat.com/show_bug.cgi?id=1111290
	[ ${{ matrix.distro }} == debian8 ] && [ $fs == xfs ] && continue
	vagrant ssh ${{env.INSTANCE_NAME}} -c "cd tests && sudo ./elio-test.sh -d /dev/vdb -d /dev/vdc -f $fs --raid"
	done
	working-directory: ${{env.BOX_DIR}}
	timeout-minutes: 90

	- name: Show debug information
	if: always()
	run: vagrant ssh ${{env.INSTANCE_NAME}} -c "cat tests/dmesg.log; sudo dmesg -c; lsmod"
	working-directory: ${{env.BOX_DIR}}
	timeout-minutes: 2

	- name: Run tests multipart (qcow2 disk)
	run: \|
	for fs in ${FS[*]}; do
	vagrant ssh ${{env.INSTANCE_NAME}} -c "cd tests && sudo ./elio-test.sh -d /dev/vdb -f $fs -t test_multipart"
	done
	working-directory: ${{env.BOX_DIR}}
	timeout-minutes: 25

	- name: Show debug information
	if: always()
	run: vagrant ssh ${{env.INSTANCE_NAME}} -c "cat tests/dmesg.log; sudo dmesg -c; lsmod"
	working-directory: ${{env.BOX_DIR}}
	timeout-minutes: 2

	- name: Uninstall kernel module
	run: vagrant ssh ${{env.INSTANCE_NAME}} -c 'sudo make uninstall'
	working-directory: ${{env.BOX_DIR}}

	- name: Detach external drives
	if: always()
	run: \|
	TEST_IMAGES=(${TEST_IMAGES})
	for test_image in ${TEST_IMAGES[@]}; do
	if virsh domblklist ${BOX_DIR##*/}_${INSTANCE_NAME} --details \| grep "file" \| awk '{ print $NF }' \| grep ${test_image} ; then
	virsh detach-disk --domain ${BOX_DIR##*/}_${INSTANCE_NAME} ${test_image}
	fi
	rm -f ${TEST_IMAGE}
	done

	- name: Upload artifacts
	if: ${{ github.event_name == 'push' }}
	run: \|
	excl_ptrn="*GPG-KEY"
	# We have to avoid a race condition when package like elastio-snap-dkms_X.XX.XX-1debian11_all.deb is uploaded from
	# 2 Debian 11 VMs amd64 and arm64 at the same time to the same location.
	[ $(uname -m) != "x86_64" ] && [ -f /etc/debian_version ] && excl_ptrn=$excl_ptrn",*_all.deb" \|\| true
	vagrant ssh ${{env.INSTANCE_NAME}} -c '
	if ! [[ ${SOURCE_BRANCH} =~ ^release ]] && ! [[ ${SOURCE_BRANCH} =~ ^master ]]; then \
	tag="--tag elastio:dev=true"; \
	fi; \
	repobuild/upload.sh \
	--source repobuild/artifacts/ \
	--bucket artifacts.assur.io \
	--target /linux/elastio-snap/${SOURCE_BRANCH}/${GITHUB_RUN_NUMBER}/${PKG_TYPE} \
	${tag} \
	--exclude '"'$excl_ptrn'"''
	working-directory: ${{env.BOX_DIR}}

	- name: Destroy a box
	if: always()
	run: .github/scripts/destroy_box.sh

	manifest:
	if: ${{ github.event_name == 'push' }}
	name: Artifacts manifest
	needs: build-packages
	runs-on:
	- baremetal

	steps:
	- name: Checkout sources
	uses: actions/checkout@v3

	- name: Configure AWS Credentials
	uses: aws-actions/configure-aws-credentials@v1-node16
	with:
	aws-region: ${{ env.AWS_DEFAULT_REGION }}
	role-to-assume: ${{ secrets.ASSURIODEV_OIDC_AWS_ROLE_ARN }}

	- name: Make manifest
	run: echo $GITHUB_RUN_NUMBER > latest && cat latest \| grep -E '^[0-9]+$'

	- name: Upload manifest
	run: \|
	branch=$(.github/scripts/detect_branch.sh)
	if ! [[ ${branch} =~ ^release ]] && ! [[ ${branch} =~ ^master ]]; then
	tag="--tag elastio:dev=true"
	fi
	repobuild/upload.sh \
	--source latest \
	--bucket artifacts.assur.io \
	--target /linux/elastio-snap/$branch \
	${tag}

	dispatch-packaging-repo:
	if: ${{ github.event_name == 'push' }}
	name: Trigger repo upload
	needs: manifest
	runs-on:
	- baremetal

	steps:
	- name: Checkout sources
	uses: actions/checkout@v3

	- name: Dispatch packaging repo
	env:
	REPO_HOOK_TOKEN: ${{ secrets.REPO_HOOK_TOKEN }}
	run: .github/scripts/dispatch_packaging.sh

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Fix snapshot reload on ec2 instances #1157

Workflow file

Fix snapshot reload on ec2 instances #1157

Jobs

Run details

Workflow file for this run