Refactor ModelInputForSpyre dataclass #462
Workflow file for this run
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Test | |
on: | |
# Don't use pull_request.paths filter since this workflow is required for | |
# all pull requests on main irrespective of file type or location | |
pull_request: | |
branches: | |
- main | |
push: | |
branches: | |
- main | |
paths: | |
- "tests/**/*.py" | |
- "vllm_spyre/**/*.py" | |
- pyproject.toml | |
- .github/workflows/test-spyre.yml | |
workflow_dispatch: | |
env: | |
FORCE_COLOR: "1" | |
VLLM_CPU_DISABLE_AVX512: "true" | |
VLLM_TARGET_DEVICE: "empty" | |
VLLM_PLUGINS: "spyre" | |
VLLM_SPYRE_TEST_MODEL_DIR: "${{ github.workspace }}/models" | |
HF_HUB_CACHE: "${{ github.workspace }}/.cache/huggingface/hub" | |
concurrency: | |
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} | |
cancel-in-progress: true | |
jobs: | |
test: | |
timeout-minutes: 20 | |
runs-on: ${{ matrix.os }} | |
strategy: | |
fail-fast: false | |
matrix: | |
os: ["ubuntu-latest"] | |
python_version: ["3.12"] | |
vllm_version: | |
- name: "default" | |
repo: "" | |
- name: "vLLM:main" | |
repo: "git+https://github.com/vllm-project/vllm --branch main" | |
test_suite: | |
- name: "V0-e2e" | |
markers: "v0 and cpu and e2e" | |
flags: "--timeout=300" | |
- name: "V1-e2e" | |
markers: "v1 and cpu and e2e" | |
flags: "--timeout=300 --forked" | |
- name: "V1-worker" | |
markers: "v1 and not e2e" | |
flags: "--timeout=300" | |
- name: "utils" | |
markers: "utils" | |
flags: "--timeout=300" | |
name: "${{ matrix.test_suite.name }} (${{ matrix.vllm_version.name }})" | |
steps: | |
- name: "Checkout" | |
uses: actions/checkout@v4 | |
with: | |
fetch-depth: 1 | |
- name: "Install PyTorch" | |
run: | | |
pip install torch=="2.5.1+cpu" --index-url https://download.pytorch.org/whl/cpu | |
- name: "Install uv" | |
uses: astral-sh/setup-uv@v5 | |
with: | |
version: "latest" | |
python-version: ${{ matrix.python_version }} | |
enable-cache: true | |
ignore-nothing-to-cache: true | |
cache-dependency-glob: | | |
pyproject.toml | |
- name: "Set vLLM version" | |
if: matrix.vllm_version.repo | |
run: | | |
uv add ${{ matrix.vllm_version.repo }} | |
- name: "Install vLLM with Spyre plugin" | |
run: | | |
uv venv .venv --system-site-packages | |
source .venv/bin/activate | |
# Syncs both the runtime and dev deps, based on the lockfile contents | |
uv sync --frozen | |
# Builds and installs the vllm_spyre wheel into .venv | |
# This needs to be done after `uv sync`, or the wheel install will be | |
# overwritten. | |
uv pip install -v . | |
- name: "Restore HF models cache" | |
uses: actions/cache/restore@v4 | |
with: | |
path: ${{ env.HF_HUB_CACHE }} | |
key: ${{ runner.os }}-hub-cache-${{ hashFiles('cached_models.txt') }} | |
restore-keys: | | |
${{ runner.os }}-hub-cache | |
- name: "Download HF models" | |
run: | | |
mkdir -p "${VLLM_SPYRE_TEST_MODEL_DIR}" | |
# We are caching HF models (HF_HUB_CACHE) for reliability rather than speed, since HF downloads are flaky for concurrent jobs. | |
# Be careful when adding models to the cache here, as the GHA cache is limited to 10 GB. | |
# If a new model is added here, hashFiles('cached_models.txt') should create a new hash key. The previous cache blob can then | |
# be removed by an admin or can be left to expire after 7 days. | |
download_jackfram_llama() { | |
python -c "from transformers import pipeline; pipeline('text-generation', model='JackFram/llama-160m')" | |
VARIANT=$(ls "${HF_HUB_CACHE}/models--JackFram--llama-160m/snapshots/") | |
ln -s "${HF_HUB_CACHE}/models--JackFram--llama-160m/snapshots/${VARIANT}" "${VLLM_SPYRE_TEST_MODEL_DIR}/llama-194m" | |
} | |
download_roberta_large() { | |
python -c "from sentence_transformers import SentenceTransformer; SentenceTransformer('sentence-transformers/all-roberta-large-v1')" | |
VARIANT=$(ls "${HF_HUB_CACHE}/models--sentence-transformers--all-roberta-large-v1/snapshots/") | |
ln -s "${HF_HUB_CACHE}/models--sentence-transformers--all-roberta-large-v1/snapshots/${VARIANT}" "${VLLM_SPYRE_TEST_MODEL_DIR}/all-roberta-large-v1" | |
} | |
download_jackfram_llama & | |
download_roberta_large & | |
wait | |
ls "${VLLM_SPYRE_TEST_MODEL_DIR}" > cached_models.txt | |
- name: "Save HF models cache" | |
if: ( github.event_name != 'pull_request' && strategy.job-index == 0 ) | |
uses: actions/cache/save@v4 | |
with: | |
path: ${{ env.HF_HUB_CACHE }} | |
key: ${{ runner.os }}-hub-cache-${{ hashFiles('cached_models.txt') }} | |
- name: "Run tests" | |
env: | |
MASTER_PORT: 12355 | |
MASTER_ADDR: localhost | |
DISTRIBUTED_STRATEGY_IGNORE_MODULES: WordEmbedding | |
run: | | |
# Delete the source code so we can ensure we're testing the installed | |
# wheel | |
rm -fr vllm_spyre | |
# We activate .venv manually and run pytest directly instead of using | |
# `uv run`, to avoid having `uv run` re-sync any dependencies or | |
# re-install the vllm_sypre package from source | |
source .venv/bin/activate | |
python3 -m pytest ${{ matrix.test_suite.flags }} \ | |
tests -v -m "${{ matrix.test_suite.markers }}" |