(Gaudi3) Non-regression tests #148
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: (Gaudi3) Non-regression tests | |
on: | |
workflow_dispatch: | |
schedule: | |
- cron: '0 21 * * *' # every day at 11pm CET (10pm winter time) | |
concurrency: | |
group: ${{ github.workflow }} | |
jobs: | |
example-diff: | |
name: Test examples differences | |
runs-on: | |
group: itac-bm-emr-gaudi3-dell-1gaudi | |
container: | |
image: docker://vault.habana.ai/gaudi-docker/1.22.0/ubuntu22.04/habanalabs/pytorch-installer-2.7.1:latest | |
options: --runtime=habana --shm-size=64G --env HABANA_VISIBLE_DEVICES --env HABANA_VISIBLE_MODULES | |
env: | |
OMPI_MCA_btl_vader_single_copy_mechanism: none | |
steps: | |
- name: HL-SMI | |
run: | | |
hl-smi | |
echo "HABANA_VISIBLE_DEVICES=${HABANA_VISIBLE_DEVICES}" | |
echo "HABANA_VISIBLE_MODULES=${HABANA_VISIBLE_MODULES}" | |
- name: Checkout | |
uses: actions/checkout@v4 | |
- name: Run tests | |
run: /bin/bash tests/ci/example_diff_tests.sh | |
stable-diffusion: | |
name: Test Stable Diffusion | |
if: ${{ !cancelled() && (success() || failure()) }} | |
needs: | |
- example-diff # run the job when the previous test job is done | |
runs-on: | |
group: itac-bm-emr-gaudi3-dell-8gaudi | |
container: | |
image: docker://vault.habana.ai/gaudi-docker/1.22.0/ubuntu22.04/habanalabs/pytorch-installer-2.7.1:latest | |
options: --runtime=habana --shm-size=64G --env HABANA_VISIBLE_DEVICES --env HABANA_VISIBLE_MODULES | |
env: | |
OMPI_MCA_btl_vader_single_copy_mechanism: none | |
steps: | |
- name: HL-SMI | |
run: | | |
hl-smi | |
echo "HABANA_VISIBLE_DEVICES=${HABANA_VISIBLE_DEVICES}" | |
echo "HABANA_VISIBLE_MODULES=${HABANA_VISIBLE_MODULES}" | |
- name: Checkout | |
uses: actions/checkout@v4 | |
- name: Run tests | |
run: /bin/bash tests/ci/slow_tests_diffusers.sh ${{ secrets.TEXT_GENERATION_CI_HUB_TOKEN }} | |
deepspeed: | |
name: Test DeepSpeed models | |
if: ${{ !cancelled() && (success() || failure()) }} | |
needs: | |
- stable-diffusion # run the job when the previous test job is done | |
runs-on: | |
group: itac-bm-emr-gaudi3-dell-8gaudi | |
container: | |
image: docker://vault.habana.ai/gaudi-docker/1.22.0/ubuntu22.04/habanalabs/pytorch-installer-2.7.1:latest | |
options: --runtime=habana --shm-size=64G --env HABANA_VISIBLE_DEVICES --env HABANA_VISIBLE_MODULES | |
env: | |
OMPI_MCA_btl_vader_single_copy_mechanism: none | |
steps: | |
- name: HL-SMI | |
run: | | |
hl-smi | |
echo "HABANA_VISIBLE_DEVICES=${HABANA_VISIBLE_DEVICES}" | |
echo "HABANA_VISIBLE_MODULES=${HABANA_VISIBLE_MODULES}" | |
- name: Checkout | |
uses: actions/checkout@v4 | |
- name: Run tests | |
run: /bin/bash tests/ci/slow_tests_deepspeed.sh ${{ secrets.TEXT_GENERATION_CI_HUB_TOKEN }} | |
fsdp: | |
name: Test FSDP models | |
if: ${{ !cancelled() && (success() || failure()) }} | |
needs: | |
- deepspeed # run the job when the previous test job is done | |
runs-on: | |
group: itac-bm-emr-gaudi3-dell-8gaudi | |
container: | |
image: docker://vault.habana.ai/gaudi-docker/1.22.0/ubuntu22.04/habanalabs/pytorch-installer-2.7.1:latest | |
options: --runtime=habana --shm-size=64G --env HABANA_VISIBLE_DEVICES --env HABANA_VISIBLE_MODULES | |
env: | |
OMPI_MCA_btl_vader_single_copy_mechanism: none | |
steps: | |
- name: HL-SMI | |
run: | | |
hl-smi | |
echo "HABANA_VISIBLE_DEVICES=${HABANA_VISIBLE_DEVICES}" | |
echo "HABANA_VISIBLE_MODULES=${HABANA_VISIBLE_MODULES}" | |
- name: Checkout | |
uses: actions/checkout@v4 | |
- name: Run tests | |
run: make slow_tests_fsdp TOKEN=${{ secrets.TEXT_GENERATION_CI_HUB_TOKEN }} | |
multi-card: | |
name: Test multi-card models | |
if: ${{ !cancelled() && (success() || failure()) }} | |
needs: | |
- fsdp # run the job when the previous test job is done | |
runs-on: | |
group: itac-bm-emr-gaudi3-dell-8gaudi | |
container: | |
image: docker://vault.habana.ai/gaudi-docker/1.22.0/ubuntu22.04/habanalabs/pytorch-installer-2.7.1:latest | |
options: --runtime=habana --shm-size=64G --env HABANA_VISIBLE_DEVICES --env HABANA_VISIBLE_MODULES | |
env: | |
OMPI_MCA_btl_vader_single_copy_mechanism: none | |
steps: | |
- name: HL-SMI | |
run: | | |
hl-smi | |
echo "HABANA_VISIBLE_DEVICES=${HABANA_VISIBLE_DEVICES}" | |
echo "HABANA_VISIBLE_MODULES=${HABANA_VISIBLE_MODULES}" | |
- name: Checkout | |
uses: actions/checkout@v4 | |
- name: Run tests | |
run: /bin/bash tests/ci/slow_tests_8x.sh ${{ secrets.TEXT_GENERATION_CI_HUB_TOKEN }} | |
single-card: | |
name: Test single-card models | |
if: ${{ !cancelled() && (success() || failure()) }} | |
needs: | |
- deepspeed | |
- multi-card # run the job when the previous test jobs are done | |
runs-on: | |
group: itac-bm-emr-gaudi3-dell-1gaudi | |
container: | |
image: docker://vault.habana.ai/gaudi-docker/1.22.0/ubuntu22.04/habanalabs/pytorch-installer-2.7.1:latest | |
options: --runtime=habana --shm-size=64G --env HABANA_VISIBLE_DEVICES --env HABANA_VISIBLE_MODULES | |
env: | |
OMPI_MCA_btl_vader_single_copy_mechanism: none | |
steps: | |
- name: HL-SMI | |
run: | | |
hl-smi | |
echo "HABANA_VISIBLE_DEVICES=${HABANA_VISIBLE_DEVICES}" | |
echo "HABANA_VISIBLE_MODULES=${HABANA_VISIBLE_MODULES}" | |
- name: Checkout | |
uses: actions/checkout@v4 | |
- name: Run tests | |
run: /bin/bash tests/ci/slow_tests_1x.sh ${{ secrets.TEXT_GENERATION_CI_HUB_TOKEN }} | |
text-generation: | |
name: Test text-generation example | |
if: ${{ !cancelled() && (success() || failure()) }} | |
needs: | |
- deepspeed | |
- multi-card | |
- single-card # run the job when the previous test jobs are done | |
runs-on: | |
group: itac-bm-emr-gaudi3-dell-8gaudi | |
container: | |
image: docker://vault.habana.ai/gaudi-docker/1.22.0/ubuntu22.04/habanalabs/pytorch-installer-2.7.1:latest | |
options: --runtime=habana --shm-size=64G --env HABANA_VISIBLE_DEVICES --env HABANA_VISIBLE_MODULES | |
env: | |
OMPI_MCA_btl_vader_single_copy_mechanism: none | |
steps: | |
- name: HL-SMI | |
run: | | |
hl-smi | |
echo "HABANA_VISIBLE_DEVICES=${HABANA_VISIBLE_DEVICES}" | |
echo "HABANA_VISIBLE_MODULES=${HABANA_VISIBLE_MODULES}" | |
- name: Checkout | |
uses: actions/checkout@v4 | |
- name: Run tests | |
run: make slow_tests_text_generation_example TOKEN=${{ secrets.TEXT_GENERATION_CI_HUB_TOKEN }} | |
trl: | |
name: Test TRL integration | |
if: ${{ !cancelled() && (success() || failure()) }} | |
needs: | |
- text-generation | |
runs-on: | |
group: itac-bm-emr-gaudi3-dell-1gaudi | |
container: | |
image: docker://vault.habana.ai/gaudi-docker/1.22.0/ubuntu22.04/habanalabs/pytorch-installer-2.7.1:latest | |
options: --runtime=habana --shm-size=64G --env HABANA_VISIBLE_DEVICES --env HABANA_VISIBLE_MODULES | |
env: | |
OMPI_MCA_btl_vader_single_copy_mechanism: none | |
steps: | |
- name: HL-SMI | |
run: | | |
hl-smi | |
echo "HABANA_VISIBLE_DEVICES=${HABANA_VISIBLE_DEVICES}" | |
echo "HABANA_VISIBLE_MODULES=${HABANA_VISIBLE_MODULES}" | |
- name: Checkout | |
uses: actions/checkout@v4 | |
- name: Run tests | |
run: /bin/bash tests/ci/slow_tests_trl.sh | |
sentence-transformers: | |
name: Test Sentence Transformers integration | |
if: ${{ !cancelled() && (success() || failure()) }} | |
needs: | |
- trl | |
runs-on: | |
group: itac-bm-emr-gaudi3-dell-1gaudi | |
container: | |
image: docker://vault.habana.ai/gaudi-docker/1.22.0/ubuntu22.04/habanalabs/pytorch-installer-2.7.1:latest | |
options: --workdir=/root/workspace --runtime=habana --shm-size=64G --env HABANA_VISIBLE_DEVICES --env HABANA_VISIBLE_MODULES | |
env: | |
OMPI_MCA_btl_vader_single_copy_mechanism: none | |
steps: | |
- name: HL-SMI | |
run: | | |
hl-smi | |
echo "HABANA_VISIBLE_DEVICES=${HABANA_VISIBLE_DEVICES}" | |
echo "HABANA_VISIBLE_MODULES=${HABANA_VISIBLE_MODULES}" | |
- name: Checkout Optimum Habana | |
uses: actions/checkout@v4 | |
- name: Install optimum-habana and run tests | |
run: make slow_tests_sentence_transformers | |
- name: Checkout Sentence Transformers | |
uses: actions/checkout@v4 | |
with: | |
repository: 'UKPLab/sentence-transformers' | |
- name: Install sentence-transformers and run tests | |
run: pip install . && cd tests && pytest test_cmnrl.py test_multi_process.py test_compute_embeddings.py test_model_card_data.py test_util.py |