Skip to content

Commit fb81dc8

Browse files
committed
Test AWS image before copying to regions and making public
- Fix cleanup script that didn't properly split the versions - Treat beta,alpha,pre and others as non-stable (not just "rc") - Test the aws AMI before copying it to other regions or making it public Signed-off-by: Dimitris Karakasilis <[email protected]>
1 parent 7a7f102 commit fb81dc8

File tree

5 files changed

+282
-45
lines changed

5 files changed

+282
-45
lines changed

.github/public-cloud/cleanup-old-images-aws.sh

+43-42
Original file line numberDiff line numberDiff line change
@@ -21,22 +21,19 @@ amiDeleteIfNotInVersionList() {
2121
shift 2
2222
local versionList=("$@")
2323

24-
# TODO:
25-
# Make the script stop when things fail (it didn't?). Like so:
26-
# $(dosomething | tail -1 | tee /dev/fd/2)
27-
28-
# get all image tags
29-
mapfile -t imgTags < <(AWSNR --region "$reg" ec2 describe-images --image-ids "$img" --query 'Images[].Tags[]' --output text)
24+
# Get all image tags and properly parse them
3025
TagExists=false
31-
for tag in "${imgTags[@]}"; do
32-
for tagToCheck in "${versionList[@]}"; do
33-
if [[ $tag == "KairosVersion"*"$tagToCheck" ]]; then
34-
echo "[$reg] AMI $img has the '$tagToCheck' tag. Skipping cleanup."
35-
TagExists=true
36-
break 2
37-
fi
38-
done
39-
done
26+
while IFS=$'\t' read -r key value; do
27+
if [[ "$key" == "KairosVersion" ]]; then
28+
for tagToCheck in "${versionList[@]}"; do
29+
if [[ "$value" == "$tagToCheck" ]]; then
30+
echo "[$reg] AMI $img has the '$tagToCheck' tag. Skipping cleanup."
31+
TagExists=true
32+
break 2
33+
fi
34+
done
35+
fi
36+
done < <(AWSNR --region "$reg" ec2 describe-images --image-ids "$img" --query 'Images[].Tags[].[Key,Value]' --output text)
4037

4138
if [ "$TagExists" = false ]; then
4239
AWSNR --region "$reg" ec2 deregister-image --image-id "$img"
@@ -50,18 +47,19 @@ snapshotDeleteIfNotInVersionList() {
5047
shift 2
5148
local versionList=("$@")
5249

53-
# Get all snapshot tags
54-
mapfile -t snapshotTags < <(AWSNR --region "$reg" ec2 describe-snapshots --snapshot-ids "$snapshot" --query 'Snapshots[].Tags[]' --output text)
50+
# Get all snapshot tags and properly parse them
5551
TagExists=false
56-
for tag in "${snapshotTags[@]}"; do
57-
for tagToCheck in "${versionList[@]}"; do
58-
if [[ $tag == "KairosVersion"*"$tagToCheck" ]]; then
59-
echo "[$reg] Snapshot $snapshot has the '$tagToCheck' tag. Skipping cleanup."
60-
TagExists=true
61-
break
62-
fi
63-
done
64-
done
52+
while IFS=$'\t' read -r key value; do
53+
if [[ "$key" == "KairosVersion" ]]; then
54+
for tagToCheck in "${versionList[@]}"; do
55+
if [[ "$value" == "$tagToCheck" ]]; then
56+
echo "[$reg] Snapshot $snapshot has the '$tagToCheck' tag. Skipping cleanup."
57+
TagExists=true
58+
break 2
59+
fi
60+
done
61+
fi
62+
done < <(AWSNR --region "$reg" ec2 describe-snapshots --snapshot-ids "$snapshot" --query 'Snapshots[].Tags[].[Key,Value]' --output text)
6563

6664
if [ "$TagExists" = false ]; then
6765
(AWSNR --region "$reg" ec2 delete-snapshot --snapshot-id "$snapshot" && \
@@ -71,27 +69,30 @@ snapshotDeleteIfNotInVersionList() {
7169

7270
s3ObjectDeleteIfNotInVersionList() {
7371
local bucket=$1
74-
local key=$2
72+
local objectKey=$2
7573
shift 2
7674
local versionList=("$@")
7775

78-
# Get all S3 object tags
79-
mapfile -t s3Tags < <(AWSNR s3api get-object-tagging --bucket "$bucket" --key "$key" --query 'TagSet[]' --output text)
80-
76+
# Get all S3 object tags and properly parse them
8177
TagExists=false
82-
for tag in "${s3Tags[@]}"; do
83-
for tagToCheck in "${versionList[@]}"; do
84-
if [[ $tag == "KairosVersion"*"$tagToCheck" ]]; then
85-
echo "S3 object '$key' in bucket '$bucket' has the '$tagToCheck' tag. Skipping cleanup."
86-
TagExists=true
87-
break 2
78+
# Check if the object has any tags first
79+
if AWSNR s3api get-object-tagging --bucket "$bucket" --key "$objectKey" --query 'length(TagSet)' --output text 2>/dev/null | grep -q '^[0-9]'; then
80+
while IFS=$'\t' read -r tagKey tagValue; do
81+
if [[ "$tagKey" == "KairosVersion" ]]; then
82+
for tagToCheck in "${versionList[@]}"; do
83+
if [[ "$tagValue" == "$tagToCheck" ]]; then
84+
echo "S3 object '$objectKey' in bucket '$bucket' has the '$tagToCheck' tag. Skipping cleanup."
85+
TagExists=true
86+
break 2
87+
fi
88+
done
8889
fi
89-
done
90-
done
90+
done < <(AWSNR s3api get-object-tagging --bucket "$bucket" --key "$objectKey" --query 'TagSet[].[Key,Value]' --output text)
91+
fi
9192

9293
if [ "$TagExists" = false ]; then
93-
AWSNR s3api delete-object --bucket "$bucket" --key "$key"
94-
echo "S3 object $key in bucket $bucket deleted because it does not match any of the versions: '${versionList[*]}'."
94+
AWSNR s3api delete-object --bucket "$bucket" --key "$objectKey"
95+
echo "S3 object $objectKey in bucket $bucket deleted because it does not match any of the versions: '${versionList[*]}'."
9596
fi
9697
}
9798

@@ -105,9 +106,9 @@ getHighest4StableVersions() {
105106
# Get all Kairos versions
106107
mapfile -t kairosVersions < <(AWSNR --region "$reg" ec2 describe-images --owners self --query "Images[].Tags[?Key=='KairosVersion'].Value" --output text)
107108

108-
# Filter out non-stable versions (those containing '-rc')
109+
# Filter out non-stable versions (those containing '-rc', '-beta', '-alpha', etc.)
109110
for version in "${kairosVersions[@]}"; do
110-
if [[ ! $version =~ -rc ]]; then
111+
if [[ ! $version =~ -(rc|beta|alpha|dev|pre|test) ]]; then
111112
stableVersions+=("$version")
112113
fi
113114
done

.github/public-cloud/cleanup-old-images-azure.sh

+4-1
Original file line numberDiff line numberDiff line change
@@ -41,8 +41,11 @@ cleanupOldVersions() {
4141
echo "Keeping latest 4 versions:" "${sortedVersions[@]:0:4}"
4242
oldVersions=("${sortedVersions[@]:4}")
4343

44+
# Filter out non-stable versions (those containing '-rc', '-beta', '-alpha', etc.)
4445
for version in "${oldVersions[@]}"; do
45-
deleteVersion "$version"
46+
if [[ ! $version =~ -(rc|beta|alpha|dev|pre|test) ]]; then
47+
deleteVersion "$version"
48+
fi
4649
done
4750
}
4851

.github/public-cloud/cleanup-old-images-gcp.sh

+2-2
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,9 @@ getHighest4StableVersions() {
1818
# Get all Kairos image versions
1919
mapfile -t versions < <(gcloudCmd compute images list --filter="family=kairos" --format="value(labels.version)" | sort -u)
2020

21-
# Filter out non-stable versions (those containing '-rc')
21+
# Filter out non-stable versions (those containing '-rc', '-beta', '-alpha', etc.)
2222
for version in "${versions[@]}"; do
23-
if [[ ! $version =~ -rc ]]; then
23+
if [[ ! $version =~ -(rc|beta|alpha|dev|pre|test) ]]; then
2424
stableVersions+=("$version")
2525
fi
2626
done
+223
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,223 @@
1+
#!/bin/bash
2+
3+
set -e
4+
set -o pipefail
5+
6+
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
7+
8+
# AWS wrapper with region
9+
AWS() {
10+
if [ -z "$AWS_PROFILE" ]; then
11+
aws --region "$AWS_REGION" "$@"
12+
else
13+
aws --region "$AWS_REGION" --profile "$AWS_PROFILE" "$@"
14+
fi
15+
}
16+
17+
checkEnvVars() {
18+
if [ -z "$AWS_REGION" ]; then
19+
echo "Error: AWS_REGION environment variable must be set."
20+
exit 1
21+
fi
22+
}
23+
24+
waitForInstanceStatus() {
25+
local instance_id="$1"
26+
local target_state="$2"
27+
local status=""
28+
29+
echo "Waiting for instance $instance_id to reach state $target_state..."
30+
while true; do
31+
status=$(AWS ec2 describe-instances --instance-ids "$instance_id" \
32+
--query 'Reservations[0].Instances[0].State.Name' --output text)
33+
34+
if [ "$status" == "$target_state" ]; then
35+
echo "Instance reached state: $target_state"
36+
break
37+
elif [ "$status" == "terminated" ] || [ "$status" == "shutting-down" ]; then
38+
if [ "$target_state" == "terminated" ] && [ "$status" == "shutting-down" ]; then
39+
echo "Instance is shutting down, waiting for full termination..."
40+
sleep 10
41+
continue
42+
elif [ "$status" != "$target_state" ]; then
43+
echo "Instance reached terminal state '$status' while waiting for '$target_state'"
44+
exit 1
45+
fi
46+
else
47+
echo "Current instance state: $status"
48+
sleep 10
49+
fi
50+
done
51+
}
52+
53+
waitForSystemStatus() {
54+
local instance_id="$1"
55+
local status=""
56+
57+
echo "Waiting for instance system status checks..."
58+
while true; do
59+
status=$(AWS ec2 describe-instance-status --instance-ids "$instance_id" \
60+
--query 'InstanceStatuses[0].SystemStatus.Status' --output text)
61+
62+
if [ "$status" == "ok" ]; then
63+
echo "System status checks passed"
64+
break
65+
elif [ "$status" == "impaired" ]; then
66+
echo "Instance system status failed - status: $status"
67+
exit 1
68+
else
69+
echo "Current system status: $status"
70+
sleep 10
71+
fi
72+
done
73+
}
74+
75+
waitForKairosActiveBoot() {
76+
local public_ip="$1"
77+
local key_file="$2"
78+
local max_attempts=60 # 10 minutes (60 * 10 seconds)
79+
local attempt=0
80+
local boot_state=""
81+
local ssh_error=""
82+
83+
echo "Waiting for Kairos to reach active_boot state..."
84+
while [ $attempt -lt $max_attempts ]; do
85+
# Try to get the boot state via SSH with host key checking disabled
86+
if boot_state=$(ssh -i "$key_file" \
87+
-o StrictHostKeyChecking=no \
88+
-o UserKnownHostsFile=/dev/null \
89+
-o ConnectTimeout=5 \
90+
-o BatchMode=yes \
91+
-o LogLevel=ERROR \
92+
kairos@"$public_ip" \
93+
kairos-agent state get boot 2>&1); then
94+
# Clean the output to get just the boot state
95+
boot_state=$(echo "$boot_state" | grep -v "WARNING" | grep -v "Offending" | grep -v "authentication" | grep -v "host key" | tr -d '[:space:]')
96+
if [ "$boot_state" == "active_boot" ]; then
97+
echo "Kairos has successfully reached active_boot state!"
98+
return 0
99+
else
100+
echo "Current Kairos boot state: $boot_state"
101+
fi
102+
else
103+
ssh_error=$boot_state
104+
echo "SSH connection attempt failed. Error: $ssh_error"
105+
fi
106+
107+
attempt=$((attempt + 1))
108+
sleep 10
109+
done
110+
111+
echo "Timeout waiting for Kairos to reach active_boot state"
112+
if [ -n "$ssh_error" ]; then
113+
echo "Last SSH error: $ssh_error"
114+
fi
115+
return 1
116+
}
117+
118+
testKairosImage() {
119+
local ami_id="$1"
120+
local instance_type="t3.small"
121+
local test_name="kairos-test-$(date +%s)"
122+
local temp_key_file="/tmp/${test_name}.pem"
123+
124+
# Generate temporary SSH key pair
125+
echo "Generating temporary SSH key pair..."
126+
ssh-keygen -t rsa -b 2048 -f "$temp_key_file" -N "" -q
127+
128+
# Create a security group for testing
129+
echo "Creating security group..."
130+
sg_id=$(AWS ec2 create-security-group \
131+
--group-name "$test_name" \
132+
--description "Temporary security group for Kairos testing" \
133+
--query 'GroupId' --output text)
134+
135+
# Allow SSH access for testing
136+
AWS ec2 authorize-security-group-ingress \
137+
--group-id "$sg_id" \
138+
--protocol tcp \
139+
--port 22 \
140+
--cidr 0.0.0.0/0
141+
142+
# Create test userdata for Kairos with SSH key
143+
userdata=$(cat <<EOF
144+
#cloud-config
145+
install:
146+
auto: true
147+
reboot: true
148+
device: auto
149+
poweroff: false
150+
users:
151+
- name: kairos
152+
ssh_authorized_keys:
153+
- $(cat "${temp_key_file}.pub")
154+
groups:
155+
- admin
156+
EOF
157+
)
158+
159+
echo "Launching test instance..."
160+
instance_id=$(AWS ec2 run-instances \
161+
--image-id "$ami_id" \
162+
--instance-type "$instance_type" \
163+
--security-group-ids "$sg_id" \
164+
--user-data "$userdata" \
165+
--tag-specifications "ResourceType=instance,Tags=[{Key=Name,Value=$test_name}]" \
166+
--block-device-mappings "[{\"DeviceName\":\"/dev/xvda\",\"Ebs\":{\"VolumeSize\":40}}]" \
167+
--query 'Instances[0].InstanceId' \
168+
--output text)
169+
170+
echo "Test instance $instance_id launched"
171+
172+
# Wait for instance to be running
173+
waitForInstanceStatus "$instance_id" "running"
174+
175+
# Wait for system status checks to pass
176+
waitForSystemStatus "$instance_id"
177+
178+
# Get instance public IP
179+
public_ip=$(AWS ec2 describe-instances --instance-ids "$instance_id" \
180+
--query 'Reservations[0].Instances[0].PublicIpAddress' --output text)
181+
182+
echo "Testing Kairos installation and boot state..."
183+
if ! waitForKairosActiveBoot "$public_ip" "$temp_key_file"; then
184+
echo "Failed to verify Kairos active_boot state"
185+
AWS ec2 terminate-instances --instance-ids "$instance_id"
186+
AWS ec2 delete-security-group --group-id "$sg_id"
187+
rm -f "$temp_key_file" "${temp_key_file}.pub"
188+
exit 1
189+
fi
190+
191+
# Cleanup
192+
echo "Test successful! Cleaning up resources..."
193+
AWS ec2 terminate-instances --instance-ids "$instance_id"
194+
195+
# Wait for instance termination
196+
waitForInstanceStatus "$instance_id" "terminated"
197+
198+
# Delete security group
199+
AWS ec2 delete-security-group --group-id "$sg_id"
200+
201+
# Remove temporary SSH key
202+
rm -f "$temp_key_file" "${temp_key_file}.pub"
203+
204+
echo "Cleanup complete"
205+
}
206+
207+
main() {
208+
if [ $# -ne 1 ]; then
209+
echo "Error: You need to specify the AMI ID to test."
210+
echo "Usage: $0 <ami-id>"
211+
exit 1
212+
fi
213+
214+
checkEnvVars
215+
216+
local ami_id="$1"
217+
testKairosImage "$ami_id"
218+
}
219+
220+
# Run main if script is not sourced
221+
if [ "${BASH_SOURCE[0]}" -ef "$0" ]; then
222+
main "$@"
223+
fi

.github/public-cloud/upload-image-to-aws.sh

+10
Original file line numberDiff line numberDiff line change
@@ -231,6 +231,15 @@ checkImageExistsOrCreate() {
231231
fi
232232

233233
waitAMI "$imageID" "$AWS_REGION"
234+
235+
# Test the original AMI before making it public and copying to other regions
236+
echo "Testing Kairos image before making it public..."
237+
if ! "$SCRIPT_DIR/test-aws-image.sh" "$imageID"; then
238+
echo "Image test failed! Not proceeding with making the image public and copying to other regions."
239+
exit 1
240+
fi
241+
echo "Image test passed successfully. Proceeding with making image public and copying to other regions..."
242+
234243
makeAMIpublic "$imageID" "$AWS_REGION"
235244
copyToAllRegions "$imageID" "$imageName" "$description" "$kairosVersion"
236245
}
@@ -259,6 +268,7 @@ makeAMIpublic() {
259268
local imageID="$1"
260269
local region="$2"
261270

271+
echo "[$region] Making image public..."
262272
echo "[$region] calling DisableImageBlockPublicAccess"
263273
AWSNR --region "$region" ec2 disable-image-block-public-access > /dev/null 2>&1
264274
echo "[$region] Making image '$imageID' public..."

0 commit comments

Comments
 (0)