CI/CD

feat: new /v1/responses #359

Workflow file for this run

	name: CI/CD

	on:
	push:
	branches: [ "main" ]
	pull_request:
	branches: [ "**" ] # Adjust branches as needed
	release:
	types: [published]

	permissions:
	id-token: write # Required for OIDC
	contents: read # Required for checkout

	jobs:
	test:
	runs-on: ubuntu-latest

	steps:
	- uses: actions/checkout@v4

	- uses: astral-sh/setup-uv@v4
	with:
	enable-cache: true
	cache-dependency-glob: "**/pyproject.toml"

	- name: Cache dependencies
	uses: actions/cache@v3
	with:
	path: ${{ env.UV_CACHE_DIR }}
	key: ${{ runner.os }}-uv-${{ hashFiles('**/pyproject.toml') }}
	restore-keys: \|
	${{ runner.os }}-uv-

	- name: Install dependencies
	run: \|
	export ACLOCAL=aclocal
	export AUTOMAKE=automake
	uv sync

	- name: Run Ruff format check
	run: uv run ruff format --check

	- name: Run Ruff linting
	run: uv run ruff check --exclude packages/verifier/

	- name: Create .env for tests
	run: \|
	cp .env.ci .env
	# Set dummy secrets for unit tests
	sed -i 's/HF_TOKEN=.*/HF_TOKEN=dummy_token/' .env
	sed -i 's/BRAVE_SEARCH_API=.*/BRAVE_SEARCH_API=dummy_api/' .env
	sed -i 's/E2B_API_KEY=.*/BRAVEE2B_API_KEY_SEARCH_API=dummy_token/' .env

	- name: pyright
	run: uv run pyright

	- name: Run unit tests
	run: uv run pytest -v tests/unit

	- name: Run integration tests
	run: uv run pytest -v tests/integration

	start-runner:
	name: Start self-hosted EC2 runner
	runs-on: ubuntu-24.04
	needs: test
	outputs:
	label: ${{ steps.start-ec2-runner.outputs.label }}
	ec2-instances-ids: ${{ steps.start-ec2-runner.outputs.ec2-instances-ids }}
	steps:
	- name: Configure AWS credentials
	uses: aws-actions/[email protected]
	with:
	aws-access-key-id: ${{ secrets.GH_AWS_ACCESS_KEY }}
	aws-secret-access-key: ${{ secrets.GH_AWS_SECRET_KEY }}
	aws-region: "us-east-1"
	- name: Start EC2 runner
	id: start-ec2-runner
	uses: NillionNetwork/[email protected]
	with:
	mode: start
	github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }}
	runners-per-machine: 3
	number-of-machines: 1
	ec2-image-id: ami-0e70d84403fc045d7
	ec2-instance-type: g6.xlarge
	subnet-id: subnet-0bb357f46d1bc355c
	security-group-id: sg-022a5cdcf57e9618b
	key-name: us-east-1-github-runner-key
	iam-role-name: github-runners-us-east-1-github-runner-ec2
	aws-resource-tags: >
	[
	{"Key": "Name", "Value": "github-runner-${{ github.run_id }}-${{ github.run_number }}"},
	{"Key": "GitHubRepository", "Value": "${{ github.repository }}"},
	{"Key": "KeyName", "Value": "github-runners-key"},
	{"Key": "Deployment", "Value": "github-runners"},
	{"Key": "Type", "Value": "GithubRunner"},
	{"Key": "User", "Value": "ec2-user"},
	{"Key": "Environment", "Value": "us-east-1"}
	]

	build-images:
	name: Build ${{ matrix.component }} image
	needs: start-runner
	runs-on: ${{ needs.start-runner.outputs.label }}
	strategy:
	matrix:
	component: [vllm, attestation, api]
	include:
	- component: api
	build_args: "--target nilai --platform linux/amd64"
	steps:
	- name: Checkout
	uses: actions/checkout@v2

	- name: Build ${{ matrix.component }} image
	run: \|
	echo "Building ${{ matrix.component }} image..."
	docker build -t nillion/nilai-${{ matrix.component }}:latest -f docker/${{ matrix.component }}.Dockerfile ${{ matrix.build_args \|\| '' }} .
	echo "✅ ${{ matrix.component }} build completed successfully"

	e2e-tests:
	name: E2E Tests
	needs: [start-runner, build-images]
	runs-on: ${{ needs.start-runner.outputs.label }}
	steps:
	- name: Checkout
	uses: actions/checkout@v2

	- uses: astral-sh/setup-uv@v4
	with:
	enable-cache: true
	cache-dependency-glob: "**/pyproject.toml"

	- name: Install dependencies
	run: \|
	apt-get update && apt-get install curl git pkg-config automake file python3.12-dev -y
	export ACLOCAL=aclocal
	export AUTOMAKE=automake
	uv sync

	- name: Create .env
	run: \|
	cp .env.ci .env
	# Copy secret into .env replacing the existing HF_TOKEN
	sed -i 's/HF_TOKEN=.*/HF_TOKEN=${{ secrets.HF_TOKEN }}/' .env
	sed -i 's/BRAVE_SEARCH_API=.*/BRAVE_SEARCH_API=${{ secrets.BRAVE_SEARCH_API }}/' .env
	sed -i 's/NILDB_BUILDER_PRIVATE_KEY=.*/NILDB_BUILDER_PRIVATE_KEY=${{ secrets.NILDB_BUILDER_PRIVATE_KEY }}/' .env
	sed -i 's/NILDB_COLLECTION=.*/NILDB_COLLECTION=${{ secrets.NILDB_COLLECTION }}/' .env

	- name: Compose docker-compose.yml
	run: python3 ./scripts/docker-composer.py --dev -f docker/compose/docker-compose.gpt-20b-gpu.ci.yml -o development-compose.yml

	- name: GPU stack versions (non-fatal)
	shell: bash
	run: \|
	set +e # never fail this step

	echo "::group::Host & kernel"
	uname -a \|\| true
	echo "Kernel: $(uname -r 2>/dev/null \|\| echo unknown)"
	test -e /var/run/reboot-required && echo "Reboot flag: PRESENT" \|\| echo "Reboot flag: none"
	echo "::endgroup::"

	echo "::group::NVIDIA driver"
	if command -v nvidia-smi >/dev/null 2>&1; then
	nvidia-smi \|\| true
	echo "Driver version (nvidia-smi): $(nvidia-smi --query-gpu=driver_version --format=csv,noheader 2>/dev/null \| head -n1 \|\| echo unknown)"
	echo "GPU(s):"; nvidia-smi -L \|\| true
	else
	echo "nvidia-smi: not found"
	fi
	if [ -r /proc/driver/nvidia/version ]; then
	echo "--- /proc/driver/nvidia/version ---"
	cat /proc/driver/nvidia/version \|\| true
	else
	echo "/proc/driver/nvidia/version: not present"
	fi
	command -v modinfo >/dev/null 2>&1 && { echo "--- modinfo nvidia (head) ---"; modinfo nvidia 2>/dev/null \| head -n 20 \|\| true; } \|\| true
	echo "::endgroup::"

	echo "::group::DKMS status"
	command -v dkms >/dev/null 2>&1 && dkms status \| grep -i nvidia \|\| echo "dkms or nvidia dkms info not present"
	echo "::endgroup::"

	echo "::group::CUDA toolkit/runtime"
	if command -v nvcc >/dev/null 2>&1; then
	nvcc --version \|\| true
	else
	echo "nvcc: not found"
	fi
	echo "libcudart in ldconfig:"
	ldconfig -p 2>/dev/null \| grep -i libcudart \|\| echo "libcudart not found in ldconfig cache"
	echo "NCCL packages:"
	dpkg -l 2>/dev/null \| grep -iE '^ii\s+libnccl' \|\| echo "NCCL not installed (Debian/Ubuntu dpkg check)"
	echo "::endgroup::"

	echo "::group::Container stack"
	docker --version \|\| echo "docker: not found"
	docker info 2>/dev/null \| grep -iE 'Runtimes\|nvidia' \|\| echo "docker info: no nvidia runtime line found"
	containerd --version 2>/dev/null \|\| echo "containerd: not found"
	runc --version 2>/dev/null \|\| echo "runc: not found"
	echo "::endgroup::"

	echo "::group::NVIDIA container runtime/toolkit"
	# Legacy/runtime binaries
	if command -v nvidia-container-runtime >/dev/null 2>&1; then
	nvidia-container-runtime --version \|\| nvidia-container-runtime -v \|\| true
	else
	echo "nvidia-container-runtime: not found"
	fi
	# Toolkit binaries (newer distros)
	if command -v nvidia-ctk >/dev/null 2>&1; then
	nvidia-ctk --version \|\| true
	nvidia-ctk runtime configure --help >/dev/null 2>&1 \|\| true
	else
	echo "nvidia-ctk: not found"
	fi
	if command -v nvidia-container-toolkit >/dev/null 2>&1; then
	nvidia-container-toolkit --version \|\| true
	else
	echo "nvidia-container-toolkit: not found"
	fi
	echo "libnvidia-container packages:"
	dpkg -l 2>/dev/null \| grep -iE '^ii\s+(libnvidia-container1\|libnvidia-container-tools)\s' \|\| echo "libnvidia-container packages not found (dpkg)"
	# Show runtime config if present
	if [ -f /etc/nvidia-container-runtime/config.toml ]; then
	echo "--- /etc/nvidia-container-runtime/config.toml (head) ---"
	sed -n '1,120p' /etc/nvidia-container-runtime/config.toml \|\| true
	else
	echo "/etc/nvidia-container-runtime/config.toml: not present"
	fi
	echo "::endgroup::"

	echo "::group::Apt logs (NVIDIA-related entries)"
	for f in /var/log/apt/history.log /var/log/apt/term.log /var/log/unattended-upgrades/unattended-upgrades.log; do
	if [[ -f "$f" ]]; then
	echo "--- scanning $f"
	grep -H -i -E 'nvidia\|cuda\|container-toolkit' "$f" \|\| echo "no recent NVIDIA entries"
	else
	echo "missing: $f"
	fi
	done
	echo "::endgroup::"

	- name: Start Services
	run: \|
	docker-compose -f development-compose.yml up -d
	docker ps -a

	- name: Wait for services to be healthy
	run: bash scripts/wait_for_ci_services.sh

	- name: Run E2E tests for NUC
	run: \|
	set -e
	export ENVIRONMENT=ci
	export AUTH_STRATEGY=nuc
	uv run pytest -v tests/e2e

	- name: Run E2E tests for API Key
	run: \|
	set -e
	# Create a user with a rate limit of 1000 requests per minute, hour, and day
	export AUTH_TOKEN=$(docker exec nilai-api uv run src/nilai_api/commands/add_user.py --name test1 --ratelimit-minute 1000 --ratelimit-hour 1000 --ratelimit-day 1000 \| jq ".apikey" -r)
	export ENVIRONMENT=ci
	# Set the environment variable for the API key
	export AUTH_STRATEGY=api_key
	uv run pytest -v tests/e2e

	- name: Stop Services
	run: \|
	docker-compose -f development-compose.yml down -v

	push-images:
	name: Push ${{ matrix.component }} to ECR
	needs: [start-runner, build-images, e2e-tests]
	runs-on: ${{ needs.start-runner.outputs.label }}
	if: (github.event_name == 'push' && github.ref == 'refs/heads/main') \|\| github.event_name == 'release'
	strategy:
	matrix:
	component: [vllm, attestation, api]
	steps:
	- name: Configure AWS credentials for ECR
	uses: aws-actions/configure-aws-credentials@v4
	with:
	role-to-assume: "arn:aws:iam::054037142884:role/nilAI-github"
	aws-region: "us-east-1"

	- name: Login to Amazon ECR
	id: login-ecr
	uses: aws-actions/amazon-ecr-login@v2
	with:
	registry-type: public

	- name: Set image tags
	id: image-tags
	run: \|
	IMAGE_TAG="${{ github.event_name == 'release' && github.ref_name \|\| github.sha }}"
	echo "image_tag=${IMAGE_TAG}" >> $GITHUB_OUTPUT

	- name: Tag and push ${{ matrix.component }} to ECR
	env:
	ECR_REGISTRY: ${{ steps.login-ecr.outputs.registry }}
	ECR_REGISTRY_ALIAS: k5d9x2g2
	IMAGE_TAG: ${{ steps.image-tags.outputs.image_tag }}
	run: \|
	echo "Tagging and pushing ${{ matrix.component }} image to ECR..."

	# Tag for ECR
	docker tag nillion/nilai-${{ matrix.component }}:latest ${ECR_REGISTRY}/${ECR_REGISTRY_ALIAS}/nilai-${{ matrix.component }}:${IMAGE_TAG}

	# Push to ECR
	docker push ${ECR_REGISTRY}/${ECR_REGISTRY_ALIAS}/nilai-${{ matrix.component }}:${IMAGE_TAG}

	echo "## Pushed ${{ matrix.component }} Image" >> $GITHUB_STEP_SUMMARY
	echo "- ${{ matrix.component }}: \`${ECR_REGISTRY}/${ECR_REGISTRY_ALIAS}/nilai-${{ matrix.component }}:${IMAGE_TAG}\`" >> $GITHUB_STEP_SUMMARY

	stop-runner:
	name: Stop self-hosted EC2 runner
	needs: [start-runner, build-images, e2e-tests, push-images]
	runs-on: ubuntu-24.04
	if: ${{ always() }}
	steps:
	- name: Configure AWS credentials
	uses: aws-actions/configure-aws-credentials@v1
	with:
	aws-access-key-id: ${{ secrets.GH_AWS_ACCESS_KEY }}
	aws-secret-access-key: ${{ secrets.GH_AWS_SECRET_KEY }}
	aws-region: "us-east-1"

	- name: Stop EC2 runner
	uses: NillionNetwork/[email protected]
	with:
	mode: stop
	github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }}
	label: ${{ needs.start-runner.outputs.label }}
	ec2-instances-ids: ${{ needs.start-runner.outputs.ec2-instances-ids }}

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

feat: new /v1/responses #359

Workflow file

feat: new /v1/responses #359

Uh oh!

Jobs

Run details

Workflow file for this run