Skip to content

docker-test-gpu

docker-test-gpu #13

name: docker-test-gpu
on:
workflow_dispatch:
inputs:
backend:
type: choice
description: Backend type
options:
- llama-cuda
- exllama
- exllama2-gptq
- exllama2-exl2
permissions: read-all
jobs:
test:
runs-on: self-hosted
timeout-minutes: 240
steps:
- uses: AutoModality/action-clean@d004b47eb728f2a83316fc70ba9a62ef97278013 # v1.1.0
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
- name: build aikit
run: |
docker buildx build . -t aikit:test \
--load --progress plain
- name: build test model
run: |
docker buildx build . -t testmodel:test \
-f test/aikitfile-${{ github.event.inputs.backend }}.yaml \
--load --progress plain
- name: list images
run: docker images
- name: run test model
run: docker run --name testmodel -d --rm -p 8080:8080 --gpus all testmodel:test
- name: install e2e dependencies
run: make test-e2e-dependencies
- name: run test
run: |
curl http://127.0.0.1:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
"model": "llama-2-7b-chat",
"messages": [{"role": "user", "content": "explain kubernetes in a sentence"}]
}'
- run: docker stop testmodel
- name: save logs
if: always()
run: docker logs testmodel > /tmp/docker-${{ github.event.inputs.backend }}.log
- name: publish test artifacts
if: always()
uses: actions/upload-artifact@c7d193f32edcb7bfad88892161225aeda64e9392 # v4.0.0
with:
name: test-${{ github.event.inputs.backend }}
path: |
/tmp/*.log