Skip to content

CI: Move triton and sglang from mi325 to mi355 #1917

CI: Move triton and sglang from mi325 to mi355

CI: Move triton and sglang from mi325 to mi355 #1917

name: Sglang Downstream Test
on:
push:
branches: [main]
pull_request:
branches: [main] # Triggers on PRs targeting `main`
workflow_dispatch:
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
jobs:
sglang:
name: sglang integration
runs-on: aiter-mi355-1gpu
env:
SGL_BRANCH: v0.5.2
GPU_ARCH: gfx942
steps:
- name: Checkout aiter repo
uses: actions/checkout@v4
- name: Clone sglang repo
run: |
git clone -b ${SGL_BRANCH} https://github.com/sgl-project/sglang.git
# TODO: Currently, we use PAT, it should be replaced by repo token
- name: Make authenticated API request
run: |
curl -sSfL https://api.github.com/repos/casey/just/releases/latest
- name: Workaround to change the connection of unbuntu from http to https
run: |
cd sglang
sed -i "79iRUN sed -i 's|http://|https://|g' /etc/apt/sources.list.d/* && sed -i 's|http://|https://|g' /etc/apt/sources.list && apt update -o Acquire::https::Verify-Peer=false && apt install -o Acquire::https::Verify-Peer=false ca-certificates -y && echo 'Acquire::https::Verify-Peer \"false\";' > /etc/apt/apt.conf.d/99insecure" docker/Dockerfile.rocm
- name: Build sglang_aiter_test image
run: |
cd sglang
sed -i '/ENV AITER_COMMIT="v0.1.4"/c\ARG AITER_COMMIT="v0.1.4"\nENV AITER_COMMIT=${AITER_COMMIT}' docker/Dockerfile.rocm
docker build --no-cache \
--build-arg SGL_BRANCH=${SGL_BRANCH} \
--build-arg GPU_ARCH=${GPU_ARCH} \
--build-arg AITER_REPO=${GITHUB_REPO_URL} \
--build-arg AITER_COMMIT=${GITHUB_COMMIT_SHA} \
-t sglang_aiter_test:ci -f docker/Dockerfile.rocm .
env:
GITHUB_REPO_URL: ${{ github.event.pull_request.head.repo.clone_url || 'https://github.com/ROCm/aiter.git' }}
GITHUB_COMMIT_SHA: ${{ github.event.pull_request.head.sha || github.event.head_commit.id }}
- name: Start CI container
run: |
echo "Clean up containers..."
docker ps -aq -f name=sglang_aiter_test | xargs -r docker stop | xargs -r docker rm
if [ -f "/etc/podinfo/gha-render-devices" ]; then
DEVICE_FLAG=$(cat /etc/podinfo/gha-render-devices)
else
DEVICE_FLAG="--device /dev/dri"
fi
echo "Starting container: sglang_aiter_test:ci"
docker run -dt --user root --device=/dev/kfd $DEVICE_FLAG \
-v "${GITHUB_WORKSPACE:-$PWD}/sglang:/sglang-checkout" \
--ipc=host --group-add video \
--shm-size 32g \
--cap-add=SYS_PTRACE \
-e HF_TOKEN="${HF_TOKEN:-}" \
--security-opt seccomp=unconfined \
-w /sglang-checkout \
--name sglang_aiter_test \
sglang_aiter_test:ci
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install wget
run: sudo apt-get update && sudo apt-get install -y wget
- name: Install dependencies
run: |
cd sglang
sed -i 's/ci_sglang/sglang_aiter_test/g' scripts/ci/amd_ci_install_dependency.sh
bash scripts/ci/amd_ci_install_dependency.sh
- name: Evaluate Accuracy
timeout-minutes: 60
run: |
cd sglang
sed -i 's/ci_sglang/sglang_aiter_test/g' scripts/ci/amd_ci_exec.sh
bash scripts/ci/amd_ci_exec.sh pip show aiter || true
bash scripts/ci/amd_ci_exec.sh -e SGLANG_USE_AITER=0 python3 test_eval_accuracy_large.py
bash scripts/ci/amd_ci_exec.sh python3 test_eval_fp8_accuracy.py
bash scripts/ci/amd_ci_exec.sh python3 models/test_qwen_models.py
# TODO: Clean up because some dependencies are installed under root user which can't be removed by runner, these dependencies should be installed as a non-root user
- name: Clean Up
if: always()
run:
docker exec -u root sglang_aiter_test bash -c "rm -rf /sglang-checkout/sgl-kernel; rm -rf /sglang-checkout/python"