Skip to content

[Triton] FA v3 API #610

[Triton] FA v3 API

[Triton] FA v3 API #610

name: vLLM Benchmark
on:
push:
branches: [main]
pull_request:
branches: [main]
workflow_dispatch:
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
env:
VLLM_BRANCH: "main"
VLLM_REPOSITORY_URL: "https://github.com/vllm-project/vllm"
BASE_IMAGE: rocm/vllm-dev:nightly
GITHUB_REPO_URL: ${{ github.event.pull_request.head.repo.clone_url || 'https://github.com/ROCm/aiter.git' }}
GITHUB_COMMIT_SHA: ${{ github.event.pull_request.head.sha || github.event.head_commit.id }}
jobs:
build_vllm_image:
if: ${{ !github.event.pull_request.head.repo.fork }}
runs-on: aiter-k8s-build
steps:
- name: Checkout aiter repo
uses: actions/checkout@v4
- name: Sync submodules
run: |
set -e
git submodule sync
git submodule update --init --recursive --depth 1 --jobs 4
- name: Docker login
run: docker login -u rocmshared -p ${{ secrets.DOCKER_PASSWORD }}
- name: Download the vLLM base image
run: |
docker pull ${{ env.BASE_IMAGE }}
- name: Generate Dockerfile
run: |
cat <<EOF > Dockerfile.mod
FROM ${{ env.BASE_IMAGE }}
RUN echo "=== Aiter version BEFORE uninstall ===" && pip show aiter || true
RUN pip uninstall -y aiter
RUN pip install --upgrade "pybind11>=3.0.1"
RUN pip show pybind11
RUN git clone ${{ env.GITHUB_REPO_URL}} /aiter && \\
cd /aiter && \\
git checkout ${{ env.GITHUB_COMMIT_SHA }} && \\
git submodule sync && git submodule update --init --recursive && \\
python3 setup.py develop
RUN echo "=== Aiter version AFTER installation ===" && pip show aiter || true
EOF
- name: Show Dockerfile
run: cat Dockerfile.mod
- name: Build Docker image
run: |
IMAGE_TAG=rocm/aiter-ci:${{ env.GITHUB_COMMIT_SHA }}
docker build -t $IMAGE_TAG -f Dockerfile.mod .
- name: Push Docker image
run: |
IMAGE_TAG=rocm/aiter-ci:${{ env.GITHUB_COMMIT_SHA }}
docker push $IMAGE_TAG
- name: Success message
run: |
echo "Successfully prepared image: rocm/aiter-ci:${{ env.GITHUB_COMMIT_SHA }}"
vllm_benchmark:
if: ${{ !github.event.pull_request.head.repo.fork }}
runs-on: aiter-8gpu-runner
needs: build_vllm_image
strategy:
fail-fast: false
matrix:
model:
- 'mistralai/Mixtral-8x7B-Instruct-v0.1'
- 'deepseek-ai/DeepSeek-R1'
kv_cache_dtype:
- 'default_kvcache'
- 'fp8_kvcache'
exclude:
- model: 'deepseek-ai/DeepSeek-R1'
kv_cache_dtype: 'fp8_kvcache'
steps:
- name: Checkout aiter repo
uses: actions/checkout@v4
- name: Checkout vLLM repo with retry
uses: nick-fields/retry@v2
with:
timeout_minutes: 20
max_attempts: 3
command: |
git clone --depth=1 --branch ${{ env.VLLM_BRANCH }} ${{ env.VLLM_REPOSITORY_URL }}
- name: Docker login
run: docker login -u rocmshared -p ${{ secrets.DOCKER_PASSWORD }}
- name: Download the vLLM image
run: |
docker pull rocm/aiter-ci:${{ env.GITHUB_COMMIT_SHA }}
- name: Clean up Rocm processes
run: |
./.github/scripts/clean_up_rocm.sh
- name: Run benchmarks
run: |
set -x -o pipefail
echo "Starting benchmark for model: ${{ matrix.model }} with kv_cache_dtype: ${{ matrix.kv_cache_dtype }}"
logFile="result_$(echo '${{ matrix.model }}' | sed 's/\//_/g')_kv_${{ matrix.kv_cache_dtype }}.log"
if [[ "${{ matrix.model }}" == *DeepSeek* ]]; then
extraArgs="--block-size 1"
else
extraArgs=""
fi
if [[ "${{ matrix.kv_cache_dtype }}" == "fp8_kvcache" ]]; then
extraArgs="${extraArgs} --kv-cache-dtype fp8"
fi
docker run --rm --device=/dev/kfd --device=/dev/dri --group-add video \
--ulimit core=0:0 --ulimit memlock=-1:-1 --ulimit stack=67108864 --cap-add=SYS_PTRACE \
--network=host --security-opt seccomp=unconfined --shm-size=16G \
-e HF_TOKEN=${{ secrets.HF_TOKEN_TEST }} -e VLLM_ROCM_USE_AITER=1 \
rocm/aiter-ci:${{ env.GITHUB_COMMIT_SHA }} python -m vllm.entrypoints.cli.main bench latency \
--model "${{ matrix.model }}" \
--batch-size 123 --input-len 456 --output-len 78 \
--num-iters-warmup 3 --num-iters 10 \
-tp 8 --load-format dummy ${extraArgs} |& tee ${logFile}
grep "Avg latency:" ${logFile} | awk '{print $3}'
- name: Clean up Rocm processes
if: always()
run: |
./.github/scripts/clean_up_rocm.sh
skip-job:
if: ${{ github.event.pull_request.head.repo.fork }}
runs-on: ubuntu-latest
steps:
- name: Skip reason
run: echo "It's a fork repository, skipping tests."