Skip to content

Commit

Permalink
Add Garbage Collection Scripts (#25)
Browse files Browse the repository at this point in the history
[skip ci]
  • Loading branch information
ajschmidt8 authored Dec 13, 2023
1 parent 8e2a51e commit 5d420bc
Show file tree
Hide file tree
Showing 18 changed files with 351 additions and 1 deletion.
2 changes: 2 additions & 0 deletions .github/workflows/build.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ jobs:
-var "arch=${ARCH}" \
-var "driver_version=${DRIVER_VERSION}" \
-var "gh_run_id=${NV_RUN_ID}" \
-var "gh_token=${GH_TOKEN}" \
-var "headless=true" \
-var "image_name=${NV_IMAGE_NAME}" \
-var "os=${OS}" \
Expand All @@ -85,6 +86,7 @@ jobs:
env:
ARCH: ${{ matrix.ARCH }}
DRIVER_VERSION: ${{ matrix.DRIVER_VERSION }}
GH_TOKEN: ${{ github.token }}
OS: ${{ matrix.OS }}
PACKER_GITHUB_API_TOKEN: ${{ github.token }}
PACKER_SOURCE: ${{ matrix.packer_source }}
Expand Down
52 changes: 52 additions & 0 deletions .github/workflows/gc.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
name: garbage collection

on:
workflow_call:
inputs:
dry_run:
default: true
type: boolean
workflow_dispatch:
inputs:
dry_run:
default: true
type: boolean
schedule:
# Run every Monday at 1pm UTC / 8am ET
- cron: "0 13 * * 1"

permissions:
id-token: write
contents: read

concurrency:
# keep `gc-` prefix to avoid collisions with other workflows
# when used with `workflow_call` event
group: "gc-${{ github.workflow }}-${{ github.ref }}"
cancel-in-progress: true

defaults:
run:
working-directory: ./gc

jobs:
gc:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Configure AWS Credentials
uses: aws-actions/configure-aws-credentials@v4
with:
role-to-assume: ${{ vars.VM_IMAGES_AWS_ROLE_ARN }}
aws-region: ${{ vars.AWS_REGION }}
- name: Install requirements
run: pip install -r requirements.txt
- name: Check mypy
run: mypy main.py collectors/*.py
- name: Check black
run: black --check main.py collectors/*.py
- name: Run garbage collection
run: python main.py --dry-run="${DRY_RUN}"
env:
DRY_RUN: ${{ github.event_name == 'schedule' && 'false' || inputs.dry_run }}
5 changes: 5 additions & 0 deletions .github/workflows/pr.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,11 @@ jobs:
uses: actions/checkout@v4
- name: Run shellcheck on shell scripts
run: find . -name "*.sh" | xargs -n 1 shellcheck --color=always
gc:
uses: ./.github/workflows/gc.yaml
with:
dry_run: true
secrets: inherit
build:
uses: ./.github/workflows/build.yaml
with:
Expand Down
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,6 @@
output
*_VARS.fd
cloud-init.iso
venv
__pycache__
.mypy_cache
1 change: 1 addition & 0 deletions build.pkr.hcl
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ build {
provisioner "shell" {
environment_vars = [
"DEBIAN_FRONTEND=noninteractive",
"GH_TOKEN=${var.gh_token}",
"NV_ARCH=${var.arch}",
"NV_DRIVER_VERSION=${var.driver_version}",
"NV_HELPER_SCRIPTS=${local.helpers_directory}",
Expand Down
35 changes: 35 additions & 0 deletions gc/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# gc

This repository contains a Python script that's responsible for pruning old ECR and AMI images from AWS.

## Usage

Create a local environment:

```sh
python -m venv venv
source venv/bin/activate
pip install -r requirements.txt
```

Then run the script:

```sh
# by default, script runs with `--dry-run=true`
python main.py

# set `--dry-run=false` to actually remove AWS resources
python main.py --dry-run=false
```

Use `black` to check your formatting:

```sh
black main.py collectors/*.py
```

Use `mypy` for type checking:

```sh
mypy main.py collectors/*.py
```
Empty file added gc/collectors/__init__.py
Empty file.
90 changes: 90 additions & 0 deletions gc/collectors/amis.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
import boto3
from collectors import gc
from collections import defaultdict
from dateutil import parser
from mypy_boto3_ec2.type_defs import ImageTypeDef
from mypy_boto3_ec2.paginator import (
DescribeImagesPaginator as EC2DescribeImagesPaginator,
)


class AMIGarbageCollector(gc.GarbageCollector):
def __init__(self, current_images: list[str], dry_run: bool):
super().__init__("AMI Collector", dry_run)
self.ec2_client = boto3.client("ec2", region_name="us-east-2")
self.current_images = current_images
self.search_tag_name = "vm-images"
self.search_tag_value = "true"

def _run(self) -> None:
amis = self._get_amis()
expired_amis = self._find_expired_amis(amis)
if expired_amis:
self._delete_amis(expired_amis)
return
print("No expired AMIs found.")

def _get_amis(self) -> list[ImageTypeDef]:
amis = []
paginator: EC2DescribeImagesPaginator = self.ec2_client.get_paginator(
"describe_images"
)
for page in paginator.paginate(
Owners=["self"],
Filters=[
{
"Name": f"tag:{self.search_tag_name}",
"Values": [self.search_tag_value],
}
],
):
page_amis = page["Images"]
amis.extend(page_amis)
return amis

def _find_expired_amis(self, amis: list[ImageTypeDef]) -> list[ImageTypeDef]:
expired_amis = []
ami_groups = defaultdict(list)

# Group AMIs by "image-name" tag
for ami in amis:
img_tags = ami["Tags"]
if img_tags:
for tag in img_tags:
if tag["Key"] == "image-name":
img_name = tag["Value"]
ami_groups[img_name].append(ami)
break

# Sort AMIs by creation date.
# If image is currently supported, keep only the newest AMI. Expire the rest.
# If image is not currently supported, expire all AMIs.
for img_name, amis in ami_groups.items():
amis = sorted(
amis, key=lambda x: parser.parse(x["CreationDate"]), reverse=True
)
if img_name in self.current_images:
expired_amis.extend(amis[1:])
else:
expired_amis.extend(amis)

return expired_amis

def _delete_amis(self, amis: list[ImageTypeDef]) -> None:
for ami in amis:
self._deregister_ami(ami["ImageId"], ami["Name"])
for snapshot in ami["BlockDeviceMappings"]:
if snapshot.get("Ebs"):
self._delete_snapshot(snapshot["Ebs"]["SnapshotId"])

def _deregister_ami(self, ami_id: str, ami_name: str) -> None:
self.log_removal("AMI", f"{ami_id} ({ami_name})")
if self.dry_run:
return
self.ec2_client.deregister_image(ImageId=ami_id)

def _delete_snapshot(self, snapshot_id: str) -> None:
self.log_removal("EBS Snapshot", snapshot_id)
if self.dry_run:
return
self.ec2_client.delete_snapshot(SnapshotId=snapshot_id)
65 changes: 65 additions & 0 deletions gc/collectors/ecr.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
import boto3
from collectors import gc
from mypy_boto3_ecr_public.type_defs import ImageDetailTypeDef
from mypy_boto3_ecr_public.paginator import (
DescribeImagesPaginator as ECRPublicDescribeImagesPaginator,
)


class ECRGarbageCollector(gc.GarbageCollector):
def __init__(self, current_images: list[str], dry_run: bool):
super().__init__("ECR Collector", dry_run)
self.ecr_client = boto3.client("ecr-public", region_name="us-east-1")
self.current_images = current_images
self.repository_name = "kubevirt-images"

def _run(self) -> None:
ecr_images = self._get_ecr_images()
expired_ecr_images = self._find_expired_ecr_images(ecr_images)
if expired_ecr_images:
self._delete_images(expired_ecr_images)
return
print("No expired ECR images found.")

def _get_ecr_images(self) -> list[ImageDetailTypeDef]:
images = []
paginator: ECRPublicDescribeImagesPaginator = self.ecr_client.get_paginator(
"describe_images"
)
for page in paginator.paginate(repositoryName=self.repository_name):
page_images = page["imageDetails"]
images.extend(page_images)
return images

def _find_expired_ecr_images(
self, images: list[ImageDetailTypeDef]
) -> list[ImageDetailTypeDef]:
expired_images = []

for image in images:
image_tags = image.get("imageTags")
hasSupportedTags = False
if image_tags:
for tag in image_tags:
if tag in self.current_images:
hasSupportedTags = True
break

# Remove images that don't have any tags or don't have any supported tags
if not image_tags or not hasSupportedTags:
expired_images.append(image)
continue
return expired_images

def _delete_images(self, images: list[ImageDetailTypeDef]) -> None:
for img in images:
tag_name = (
img.get("imageTags") and ", ".join(img["imageTags"]) or "untagged"
)
self.log_removal("ECR Image", f"{img['imageDigest']} ({tag_name})")
if self.dry_run:
return
self.ecr_client.batch_delete_image(
repositoryName=self.repository_name,
imageIds=[{"imageDigest": img["imageDigest"]} for img in images],
)
19 changes: 19 additions & 0 deletions gc/collectors/gc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
from abc import ABC, abstractmethod


class GarbageCollector(ABC):
def __init__(self, collector_name: str, dry_run: bool):
self.dry_run = dry_run
self.collector_name = collector_name
self.removal_action = dry_run and "Would have deleted" or "Deleting"

def log_removal(self, resource_name: str, resource_id: str) -> None:
print(f"{self.removal_action} {resource_name}: {resource_id}")

def run(self) -> None:
print(f"Running {self.collector_name}: dry_run={self.dry_run}")
self._run()

@abstractmethod
def _run(self) -> None:
pass
50 changes: 50 additions & 0 deletions gc/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
import argparse
import subprocess
import json
from collectors.ecr import ECRGarbageCollector
from collectors.amis import AMIGarbageCollector
from collectors.gc import GarbageCollector


def load_current_images() -> list[str]:
"""
Loads the currently supported images from the matrix and returns them as a list.
"""
result = subprocess.run(
"ci/compute-matrix.sh", cwd="..", capture_output=True, check=True
)
matrix = json.loads(result.stdout.decode("utf-8"))["include"]
images = []
for entry in matrix:
result = subprocess.run(
"ci/compute-image-name.sh",
cwd="..",
capture_output=True,
env=entry,
check=True,
)
images.append(result.stdout.decode("utf-8").strip())
return images


if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Process some integers.")
parser.add_argument(
"--dry-run",
dest="dry_run",
type=str,
default="true",
help="an optional argument to run the script in dry-run mode (default: true)",
)
args = parser.parse_args()
dry_run = args.dry_run != "false"
current_images = load_current_images()
if not current_images:
print()
print("No current images found. Something's not right.")
print("Exiting to prevent all images from being deleted from AWS.")
exit(1)
for GC in [ECRGarbageCollector, AMIGarbageCollector]:
gc: GarbageCollector = GC(current_images, dry_run)
gc.run()
print()
2 changes: 2 additions & 0 deletions gc/mypy.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
[mypy]
strict = True
5 changes: 5 additions & 0 deletions gc/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
boto3
boto3-stubs[boto3,ec2,ecr-public]
types-python-dateutil
black
mypy
7 changes: 7 additions & 0 deletions matrix.yaml
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@
# This file represents the supported image versions at any given time. When an
# element is removed from this matrix, its resources will be removed from AWS
# the next time the garbage collection job runs.
#
# Therefore, entries should only be removed from the matrix when their
# corresponding image is no longer needed.

OS:
- linux
# - win
Expand Down
7 changes: 6 additions & 1 deletion scripts/helpers/github.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,13 @@ set -euo pipefail
get_github_latest_release_tag() {
local REPO_ORG=$1
local RESULTS_PER_PAGE="100"
local CURL_HEADERS=()

json=$(curl -fsSL "https://api.github.com/repos/${REPO_ORG}/releases?per_page=${RESULTS_PER_PAGE}")
if [[ -n "${GH_TOKEN:-}" ]]; then
CURL_HEADERS+=("-H" "Authorization: Bearer ${GH_TOKEN}")
fi

json=$(curl "${CURL_HEADERS[@]}" -fsSL "https://api.github.com/repos/${REPO_ORG}/releases?per_page=${RESULTS_PER_PAGE}")
tagName=$(echo "${json}" | jq -r '.[] | select((.prerelease==false) and (.assets | length > 0)).tag_name' | sort --unique --version-sort | grep -Ev ".*-[a-z]|beta" | tail -1)

echo "${tagName}"
Expand Down
Loading

0 comments on commit 5d420bc

Please sign in to comment.