Add e2e test for train API #48
Workflow file for this run
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: E2E Test with train API | |
on: | |
- pull_request | |
concurrency: | |
group: ${{ github.workflow }}-${{ github.ref }} | |
cancel-in-progress: true | |
jobs: | |
e2e-test: | |
runs-on: ubuntu-latest | |
strategy: | |
fail-fast: false | |
matrix: | |
kubernetes-version: ["v1.28.7"] | |
python-version: ["3.9", "3.10", "3.11"] | |
steps: | |
- name: Checkout | |
uses: actions/checkout@v4 | |
- name: Free-Up Disk Space | |
uses: ./.github/workflows/free-up-disk-space | |
- name: Setup Python | |
uses: actions/setup-python@v5 | |
with: | |
python-version: ${{ matrix.python-version }} | |
- name: Create k8s Kind Cluster | |
uses: helm/[email protected] | |
with: | |
node_image: kindest/node:${{ matrix.kubernetes-version }} | |
cluster_name: training-operator-cluster | |
kubectl_version: ${{ matrix.kubernetes-version }} | |
- name: Build training-operator | |
run: | | |
./scripts/gha/build-image.sh | |
env: | |
TRAINING_CI_IMAGE: kubeflowtraining/training-operator:test | |
- name: Deploy training operator | |
run: | | |
./scripts/gha/setup-training-operator.sh | |
env: | |
KIND_CLUSTER: training-operator-cluster | |
TRAINING_CI_IMAGE: kubeflowtraining/training-operator:test | |
GANG_SCHEDULER_NAME: "none" | |
KUBERNETES_VERSION: ${{ matrix.kubernetes-version }} | |
- name: Build storage initializer and trainer | |
run: | | |
./scripts/gha/setup-storage-initializer-and-trainer.sh | |
env: | |
STORAGE_INITIALIZER_CI_IMAGE: kubeflowtraining/storage-initializer:test | |
TRAINER_CI_IMAGE: kubeflowtraining/trainer:test | |
- name: Check disk space | |
run: df -h | |
- name: Load storage initializer | |
run: | | |
kind load docker-image ${{ env.STORAGE_INITIALIZER_CI_IMAGE }} --name ${{ env.KIND_CLUSTER }} | |
docker rmi ${{ env.STORAGE_INITIALIZER_CI_IMAGE }} | |
env: | |
KIND_CLUSTER: training-operator-cluster | |
STORAGE_INITIALIZER_CI_IMAGE: kubeflowtraining/storage-initializer:test | |
- name: Check disk space | |
run: df -h | |
- name: Load trainer | |
run: | | |
kind load docker-image ${{ env.TRAINER_CI_IMAGE }} --name ${{ env.KIND_CLUSTER }} | |
docker rmi ${{ env.TRAINER_CI_IMAGE }} | |
env: | |
KIND_CLUSTER: training-operator-cluster | |
TRAINER_CI_IMAGE: kubeflowtraining/trainer:test | |
- name: Check disk space | |
run: df -h | |
- name: Run tests | |
run: | | |
pip install pytest | |
python3 -m pip install -e sdk/python[huggingface] | |
pytest -s sdk/python/test/e2e-train-api/test_e2e_train_api.py --log-cli-level=debug | |
env: | |
STORAGE_INITIALIZER_IMAGE: kubeflowtraining/storage-initializer:test | |
TRAINER_TRANSFORMER_IMAGE_DEFAULT: kubeflowtraining/trainer:test |