CI: Move triton and sglang from mi325 to mi355 #612
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: vLLM Benchmark | |
on: | |
push: | |
branches: [main] | |
pull_request: | |
branches: [main] | |
workflow_dispatch: | |
concurrency: | |
group: ${{ github.workflow }}-${{ github.ref }} | |
cancel-in-progress: true | |
env: | |
VLLM_BRANCH: "main" | |
VLLM_REPOSITORY_URL: "https://github.com/vllm-project/vllm" | |
BASE_IMAGE: rocm/vllm-dev:nightly | |
GITHUB_REPO_URL: ${{ github.event.pull_request.head.repo.clone_url || 'https://github.com/ROCm/aiter.git' }} | |
GITHUB_COMMIT_SHA: ${{ github.event.pull_request.head.sha || github.event.head_commit.id }} | |
jobs: | |
build_vllm_image: | |
if: ${{ !github.event.pull_request.head.repo.fork }} | |
runs-on: aiter-k8s-build | |
steps: | |
- name: Checkout aiter repo | |
uses: actions/checkout@v4 | |
- name: Sync submodules | |
run: | | |
set -e | |
git submodule sync | |
git submodule update --init --recursive --depth 1 --jobs 4 | |
- name: Docker login | |
run: docker login -u rocmshared -p ${{ secrets.DOCKER_PASSWORD }} | |
- name: Download the vLLM base image | |
run: | | |
docker pull ${{ env.BASE_IMAGE }} | |
- name: Generate Dockerfile | |
run: | | |
cat <<EOF > Dockerfile.mod | |
FROM ${{ env.BASE_IMAGE }} | |
RUN echo "=== Aiter version BEFORE uninstall ===" && pip show aiter || true | |
RUN pip uninstall -y aiter | |
RUN pip install --upgrade "pybind11>=3.0.1" | |
RUN pip show pybind11 | |
RUN git clone ${{ env.GITHUB_REPO_URL}} /aiter && \\ | |
cd /aiter && \\ | |
git checkout ${{ env.GITHUB_COMMIT_SHA }} && \\ | |
git submodule sync && git submodule update --init --recursive && \\ | |
python3 setup.py develop | |
RUN echo "=== Aiter version AFTER installation ===" && pip show aiter || true | |
EOF | |
- name: Show Dockerfile | |
run: cat Dockerfile.mod | |
- name: Build Docker image | |
run: | | |
IMAGE_TAG=rocm/aiter-ci:${{ env.GITHUB_COMMIT_SHA }} | |
docker build -t $IMAGE_TAG -f Dockerfile.mod . | |
- name: Push Docker image | |
run: | | |
IMAGE_TAG=rocm/aiter-ci:${{ env.GITHUB_COMMIT_SHA }} | |
docker push $IMAGE_TAG | |
- name: Success message | |
run: | | |
echo "Successfully prepared image: rocm/aiter-ci:${{ env.GITHUB_COMMIT_SHA }}" | |
vllm_benchmark: | |
if: ${{ !github.event.pull_request.head.repo.fork }} | |
runs-on: aiter-8gpu-runner | |
needs: build_vllm_image | |
strategy: | |
fail-fast: false | |
matrix: | |
model: | |
- 'mistralai/Mixtral-8x7B-Instruct-v0.1' | |
- 'deepseek-ai/DeepSeek-R1' | |
kv_cache_dtype: | |
- 'default_kvcache' | |
- 'fp8_kvcache' | |
exclude: | |
- model: 'deepseek-ai/DeepSeek-R1' | |
kv_cache_dtype: 'fp8_kvcache' | |
steps: | |
- name: Checkout aiter repo | |
uses: actions/checkout@v4 | |
- name: Checkout vLLM repo with retry | |
uses: nick-fields/retry@v2 | |
with: | |
timeout_minutes: 20 | |
max_attempts: 3 | |
command: | | |
git clone --depth=1 --branch ${{ env.VLLM_BRANCH }} ${{ env.VLLM_REPOSITORY_URL }} | |
- name: Docker login | |
run: docker login -u rocmshared -p ${{ secrets.DOCKER_PASSWORD }} | |
- name: Download the vLLM image | |
run: | | |
docker pull rocm/aiter-ci:${{ env.GITHUB_COMMIT_SHA }} | |
- name: Clean up Rocm processes | |
run: | | |
./.github/scripts/clean_up_rocm.sh | |
- name: Run benchmarks | |
run: | | |
set -x -o pipefail | |
echo "Starting benchmark for model: ${{ matrix.model }} with kv_cache_dtype: ${{ matrix.kv_cache_dtype }}" | |
logFile="result_$(echo '${{ matrix.model }}' | sed 's/\//_/g')_kv_${{ matrix.kv_cache_dtype }}.log" | |
if [[ "${{ matrix.model }}" == *DeepSeek* ]]; then | |
extraArgs="--block-size 1" | |
else | |
extraArgs="" | |
fi | |
if [[ "${{ matrix.kv_cache_dtype }}" == "fp8_kvcache" ]]; then | |
extraArgs="${extraArgs} --kv-cache-dtype fp8" | |
fi | |
docker run --rm --device=/dev/kfd --device=/dev/dri --group-add video \ | |
--ulimit core=0:0 --ulimit memlock=-1:-1 --ulimit stack=67108864 --cap-add=SYS_PTRACE \ | |
--network=host --security-opt seccomp=unconfined --shm-size=16G \ | |
-e HF_TOKEN=${{ secrets.HF_TOKEN_TEST }} -e VLLM_ROCM_USE_AITER=1 \ | |
rocm/aiter-ci:${{ env.GITHUB_COMMIT_SHA }} python -m vllm.entrypoints.cli.main bench latency \ | |
--model "${{ matrix.model }}" \ | |
--batch-size 123 --input-len 456 --output-len 78 \ | |
--num-iters-warmup 3 --num-iters 10 \ | |
-tp 8 --load-format dummy ${extraArgs} |& tee ${logFile} | |
grep "Avg latency:" ${logFile} | awk '{print $3}' | |
- name: Clean up Rocm processes | |
if: always() | |
run: | | |
./.github/scripts/clean_up_rocm.sh | |
skip-job: | |
if: ${{ github.event.pull_request.head.repo.fork }} | |
runs-on: ubuntu-latest | |
steps: | |
- name: Skip reason | |
run: echo "It's a fork repository, skipping tests." |