Add e2e test for train API #57
Workflow file for this run
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: E2E Test with train API | |
on: | |
- pull_request | |
concurrency: | |
group: ${{ github.workflow }}-${{ github.ref }} | |
cancel-in-progress: true | |
jobs: | |
e2e-test: | |
runs-on: ubuntu-latest | |
strategy: | |
fail-fast: false | |
matrix: | |
kubernetes-version: ["v1.28.7"] | |
python-version: ["3.9", "3.10", "3.11"] | |
steps: | |
- name: Checkout | |
uses: actions/checkout@v4 | |
- name: Free-Up Disk Space | |
uses: ./.github/workflows/free-up-disk-space | |
- name: Setup Python | |
uses: actions/setup-python@v5 | |
with: | |
python-version: ${{ matrix.python-version }} | |
- name: Create k8s Kind Cluster | |
uses: helm/[email protected] | |
with: | |
node_image: kindest/node:${{ matrix.kubernetes-version }} | |
cluster_name: training-operator-cluster | |
kubectl_version: ${{ matrix.kubernetes-version }} | |
- name: Build training-operator | |
run: | | |
./scripts/gha/build-image.sh | |
env: | |
TRAINING_CI_IMAGE: kubeflowtraining/training-operator:test | |
- name: Deploy training operator | |
run: | | |
./scripts/gha/setup-training-operator.sh | |
docker system prune -a -f | |
docker system df | |
df -h | |
env: | |
KIND_CLUSTER: training-operator-cluster | |
TRAINING_CI_IMAGE: kubeflowtraining/training-operator:test | |
GANG_SCHEDULER_NAME: "none" | |
KUBERNETES_VERSION: ${{ matrix.kubernetes-version }} | |
- name: Build trainer | |
run: | | |
./scripts/gha/build-trainer.sh | |
docker builder prune -a -f | |
docker system df | |
df -h | |
env: | |
TRAINER_CI_IMAGE: kubeflowtraining/trainer:test | |
- name: Load trainer | |
run: | | |
kind load docker-image ${{ env.TRAINER_CI_IMAGE }} --name ${{ env.KIND_CLUSTER }} | |
docker system prune -a -f | |
docker system df | |
df -h | |
env: | |
KIND_CLUSTER: training-operator-cluster | |
TRAINER_CI_IMAGE: kubeflowtraining/trainer:test | |
- name: Build storage initializer | |
run: | | |
./scripts/gha/build-storage-initializer.sh | |
docker builder prune -a -f | |
docker system df | |
df -h | |
env: | |
STORAGE_INITIALIZER_CI_IMAGE: kubeflowtraining/storage-initializer:test | |
TRAINER_CI_IMAGE: kubeflowtraining/trainer:test | |
- name: Load storage initializer | |
run: | | |
kind load docker-image ${{ env.STORAGE_INITIALIZER_CI_IMAGE }} --name ${{ env.KIND_CLUSTER }} | |
docker system prune -a -f | |
docker system df | |
df -h | |
env: | |
KIND_CLUSTER: training-operator-cluster | |
STORAGE_INITIALIZER_CI_IMAGE: kubeflowtraining/storage-initializer:test | |
- name: Run tests | |
run: | | |
pip install pytest | |
python3 -m pip install -e sdk/python[huggingface] | |
pytest -s sdk/python/test/e2e-train-api/test_e2e_train_api.py --log-cli-level=debug | |
env: | |
STORAGE_INITIALIZER_IMAGE: kubeflowtraining/storage-initializer:test | |
TRAINER_TRANSFORMER_IMAGE_DEFAULT: kubeflowtraining/trainer:test |