Test

Refactor ModelInputForSpyre dataclass #462

Workflow file for this run

.github/workflows/test-spyre.yml at 1283d22

	name: Test

	on:
	# Don't use pull_request.paths filter since this workflow is required for
	# all pull requests on main irrespective of file type or location
	pull_request:
	branches:
	- main
	push:
	branches:
	- main
	paths:
	- "tests/*/.py"
	- "vllm_spyre/*/.py"
	- pyproject.toml
	- .github/workflows/test-spyre.yml
	workflow_dispatch:

	env:
	FORCE_COLOR: "1"
	VLLM_CPU_DISABLE_AVX512: "true"
	VLLM_TARGET_DEVICE: "empty"
	VLLM_PLUGINS: "spyre"
	VLLM_SPYRE_TEST_MODEL_DIR: "${{ github.workspace }}/models"
	HF_HUB_CACHE: "${{ github.workspace }}/.cache/huggingface/hub"

	concurrency:
	group: ${{ github.workflow }}-${{ github.head_ref \|\| github.run_id }}
	cancel-in-progress: true

	jobs:
	test:
	timeout-minutes: 20
	runs-on: ${{ matrix.os }}
	strategy:
	fail-fast: false
	matrix:
	os: ["ubuntu-latest"]
	python_version: ["3.12"]
	vllm_version:
	- name: "default"
	repo: ""
	- name: "vLLM:main"
	repo: "git+https://github.com/vllm-project/vllm --branch main"
	test_suite:
	- name: "V0-e2e"
	markers: "v0 and cpu and e2e"
	flags: "--timeout=300"
	- name: "V1-e2e"
	markers: "v1 and cpu and e2e"
	flags: "--timeout=300 --forked"
	- name: "V1-worker"
	markers: "v1 and not e2e"
	flags: "--timeout=300"
	- name: "utils"
	markers: "utils"
	flags: "--timeout=300"

	name: "${{ matrix.test_suite.name }} (${{ matrix.vllm_version.name }})"

	steps:
	- name: "Checkout"
	uses: actions/checkout@v4
	with:
	fetch-depth: 1

	- name: "Install PyTorch"
	run: \|
	pip install torch=="2.5.1+cpu" --index-url https://download.pytorch.org/whl/cpu

	- name: "Install uv"
	uses: astral-sh/setup-uv@v5
	with:
	version: "latest"
	python-version: ${{ matrix.python_version }}
	enable-cache: true
	ignore-nothing-to-cache: true
	cache-dependency-glob: \|
	pyproject.toml

	- name: "Set vLLM version"
	if: matrix.vllm_version.repo
	run: \|
	uv add ${{ matrix.vllm_version.repo }}

	- name: "Install vLLM with Spyre plugin"
	run: \|
	uv venv .venv --system-site-packages
	source .venv/bin/activate

	# Syncs both the runtime and dev deps, based on the lockfile contents
	uv sync --frozen
	# Builds and installs the vllm_spyre wheel into .venv
	# This needs to be done after `uv sync`, or the wheel install will be
	# overwritten.
	uv pip install -v .

	- name: "Restore HF models cache"
	uses: actions/cache/restore@v4
	with:
	path: ${{ env.HF_HUB_CACHE }}
	key: ${{ runner.os }}-hub-cache-${{ hashFiles('cached_models.txt') }}
	restore-keys: \|
	${{ runner.os }}-hub-cache

	- name: "Download HF models"
	run: \|
	mkdir -p "${VLLM_SPYRE_TEST_MODEL_DIR}"

	# We are caching HF models (HF_HUB_CACHE) for reliability rather than speed, since HF downloads are flaky for concurrent jobs.
	# Be careful when adding models to the cache here, as the GHA cache is limited to 10 GB.
	# If a new model is added here, hashFiles('cached_models.txt') should create a new hash key. The previous cache blob can then
	# be removed by an admin or can be left to expire after 7 days.

	download_jackfram_llama() {
	python -c "from transformers import pipeline; pipeline('text-generation', model='JackFram/llama-160m')"
	VARIANT=$(ls "${HF_HUB_CACHE}/models--JackFram--llama-160m/snapshots/")
	ln -s "${HF_HUB_CACHE}/models--JackFram--llama-160m/snapshots/${VARIANT}" "${VLLM_SPYRE_TEST_MODEL_DIR}/llama-194m"
	}
	download_roberta_large() {
	python -c "from sentence_transformers import SentenceTransformer; SentenceTransformer('sentence-transformers/all-roberta-large-v1')"
	VARIANT=$(ls "${HF_HUB_CACHE}/models--sentence-transformers--all-roberta-large-v1/snapshots/")
	ln -s "${HF_HUB_CACHE}/models--sentence-transformers--all-roberta-large-v1/snapshots/${VARIANT}" "${VLLM_SPYRE_TEST_MODEL_DIR}/all-roberta-large-v1"
	}
	download_jackfram_llama &
	download_roberta_large &
	wait
	ls "${VLLM_SPYRE_TEST_MODEL_DIR}" > cached_models.txt

	- name: "Save HF models cache"
	if: ( github.event_name != 'pull_request' && strategy.job-index == 0 )
	uses: actions/cache/save@v4
	with:
	path: ${{ env.HF_HUB_CACHE }}
	key: ${{ runner.os }}-hub-cache-${{ hashFiles('cached_models.txt') }}

	- name: "Run tests"
	env:
	MASTER_PORT: 12355
	MASTER_ADDR: localhost
	DISTRIBUTED_STRATEGY_IGNORE_MODULES: WordEmbedding
	run: \|
	# Delete the source code so we can ensure we're testing the installed
	# wheel
	rm -fr vllm_spyre
	# We activate .venv manually and run pytest directly instead of using
	# `uv run`, to avoid having `uv run` re-sync any dependencies or
	# re-install the vllm_sypre package from source
	source .venv/bin/activate
	python3 -m pytest ${{ matrix.test_suite.flags }} \
	tests -v -m "${{ matrix.test_suite.markers }}"

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Refactor ModelInputForSpyre dataclass #462

Workflow file

Refactor ModelInputForSpyre dataclass #462

Uh oh!

Jobs

Run details

Workflow file for this run