Skip to content

(Gaudi3) Non-regression tests #146

(Gaudi3) Non-regression tests

(Gaudi3) Non-regression tests #146

name: (Gaudi3) Non-regression tests
on:
workflow_dispatch:
schedule:
- cron: '0 21 * * *' # every day at 11pm CET (10pm winter time)
concurrency:
group: ${{ github.workflow }}
jobs:
example-diff:
name: Test examples differences
runs-on:
group: itac-bm-emr-gaudi3-dell-1gaudi
container:
image: docker://vault.habana.ai/gaudi-docker/1.22.0/ubuntu22.04/habanalabs/pytorch-installer-2.7.1:latest
options: --runtime=habana --shm-size=64G --env HABANA_VISIBLE_DEVICES --env HABANA_VISIBLE_MODULES
env:
OMPI_MCA_btl_vader_single_copy_mechanism: none
steps:
- name: HL-SMI
run: |
hl-smi
echo "HABANA_VISIBLE_DEVICES=${HABANA_VISIBLE_DEVICES}"
echo "HABANA_VISIBLE_MODULES=${HABANA_VISIBLE_MODULES}"
- name: Checkout
uses: actions/checkout@v4
- name: Run tests
run: /bin/bash tests/ci/example_diff_tests.sh
stable-diffusion:
name: Test Stable Diffusion
if: ${{ !cancelled() && (success() || failure()) }}
needs:
- example-diff # run the job when the previous test job is done
runs-on:
group: itac-bm-emr-gaudi3-dell-8gaudi
container:
image: docker://vault.habana.ai/gaudi-docker/1.22.0/ubuntu22.04/habanalabs/pytorch-installer-2.7.1:latest
options: --runtime=habana --shm-size=64G --env HABANA_VISIBLE_DEVICES --env HABANA_VISIBLE_MODULES
env:
OMPI_MCA_btl_vader_single_copy_mechanism: none
steps:
- name: HL-SMI
run: |
hl-smi
echo "HABANA_VISIBLE_DEVICES=${HABANA_VISIBLE_DEVICES}"
echo "HABANA_VISIBLE_MODULES=${HABANA_VISIBLE_MODULES}"
- name: Checkout
uses: actions/checkout@v4
- name: Run tests
run: /bin/bash tests/ci/slow_tests_diffusers.sh ${{ secrets.TEXT_GENERATION_CI_HUB_TOKEN }}
deepspeed:
name: Test DeepSpeed models
if: ${{ !cancelled() && (success() || failure()) }}
needs:
- stable-diffusion # run the job when the previous test job is done
runs-on:
group: itac-bm-emr-gaudi3-dell-8gaudi
container:
image: docker://vault.habana.ai/gaudi-docker/1.22.0/ubuntu22.04/habanalabs/pytorch-installer-2.7.1:latest
options: --runtime=habana --shm-size=64G --env HABANA_VISIBLE_DEVICES --env HABANA_VISIBLE_MODULES
env:
OMPI_MCA_btl_vader_single_copy_mechanism: none
steps:
- name: HL-SMI
run: |
hl-smi
echo "HABANA_VISIBLE_DEVICES=${HABANA_VISIBLE_DEVICES}"
echo "HABANA_VISIBLE_MODULES=${HABANA_VISIBLE_MODULES}"
- name: Checkout
uses: actions/checkout@v4
- name: Run tests
run: /bin/bash tests/ci/slow_tests_deepspeed.sh ${{ secrets.TEXT_GENERATION_CI_HUB_TOKEN }}
fsdp:
name: Test FSDP models
if: ${{ !cancelled() && (success() || failure()) }}
needs:
- deepspeed # run the job when the previous test job is done
runs-on:
group: itac-bm-emr-gaudi3-dell-8gaudi
container:
image: docker://vault.habana.ai/gaudi-docker/1.22.0/ubuntu22.04/habanalabs/pytorch-installer-2.7.1:latest
options: --runtime=habana --shm-size=64G --env HABANA_VISIBLE_DEVICES --env HABANA_VISIBLE_MODULES
env:
OMPI_MCA_btl_vader_single_copy_mechanism: none
steps:
- name: HL-SMI
run: |
hl-smi
echo "HABANA_VISIBLE_DEVICES=${HABANA_VISIBLE_DEVICES}"
echo "HABANA_VISIBLE_MODULES=${HABANA_VISIBLE_MODULES}"
- name: Checkout
uses: actions/checkout@v4
- name: Run tests
run: make slow_tests_fsdp TOKEN=${{ secrets.TEXT_GENERATION_CI_HUB_TOKEN }}
multi-card:
name: Test multi-card models
if: ${{ !cancelled() && (success() || failure()) }}
needs:
- fsdp # run the job when the previous test job is done
runs-on:
group: itac-bm-emr-gaudi3-dell-8gaudi
container:
image: docker://vault.habana.ai/gaudi-docker/1.22.0/ubuntu22.04/habanalabs/pytorch-installer-2.7.1:latest
options: --runtime=habana --shm-size=64G --env HABANA_VISIBLE_DEVICES --env HABANA_VISIBLE_MODULES
env:
OMPI_MCA_btl_vader_single_copy_mechanism: none
steps:
- name: HL-SMI
run: |
hl-smi
echo "HABANA_VISIBLE_DEVICES=${HABANA_VISIBLE_DEVICES}"
echo "HABANA_VISIBLE_MODULES=${HABANA_VISIBLE_MODULES}"
- name: Checkout
uses: actions/checkout@v4
- name: Run tests
run: /bin/bash tests/ci/slow_tests_8x.sh ${{ secrets.TEXT_GENERATION_CI_HUB_TOKEN }}
single-card:
name: Test single-card models
if: ${{ !cancelled() && (success() || failure()) }}
needs:
- deepspeed
- multi-card # run the job when the previous test jobs are done
runs-on:
group: itac-bm-emr-gaudi3-dell-1gaudi
container:
image: docker://vault.habana.ai/gaudi-docker/1.22.0/ubuntu22.04/habanalabs/pytorch-installer-2.7.1:latest
options: --runtime=habana --shm-size=64G --env HABANA_VISIBLE_DEVICES --env HABANA_VISIBLE_MODULES
env:
OMPI_MCA_btl_vader_single_copy_mechanism: none
steps:
- name: HL-SMI
run: |
hl-smi
echo "HABANA_VISIBLE_DEVICES=${HABANA_VISIBLE_DEVICES}"
echo "HABANA_VISIBLE_MODULES=${HABANA_VISIBLE_MODULES}"
- name: Checkout
uses: actions/checkout@v4
- name: Run tests
run: /bin/bash tests/ci/slow_tests_1x.sh ${{ secrets.TEXT_GENERATION_CI_HUB_TOKEN }}
text-generation:
name: Test text-generation example
if: ${{ !cancelled() && (success() || failure()) }}
needs:
- deepspeed
- multi-card
- single-card # run the job when the previous test jobs are done
runs-on:
group: itac-bm-emr-gaudi3-dell-8gaudi
container:
image: docker://vault.habana.ai/gaudi-docker/1.22.0/ubuntu22.04/habanalabs/pytorch-installer-2.7.1:latest
options: --runtime=habana --shm-size=64G --env HABANA_VISIBLE_DEVICES --env HABANA_VISIBLE_MODULES
env:
OMPI_MCA_btl_vader_single_copy_mechanism: none
steps:
- name: HL-SMI
run: |
hl-smi
echo "HABANA_VISIBLE_DEVICES=${HABANA_VISIBLE_DEVICES}"
echo "HABANA_VISIBLE_MODULES=${HABANA_VISIBLE_MODULES}"
- name: Checkout
uses: actions/checkout@v4
- name: Run tests
run: make slow_tests_text_generation_example TOKEN=${{ secrets.TEXT_GENERATION_CI_HUB_TOKEN }}
trl:
name: Test TRL integration
if: ${{ !cancelled() && (success() || failure()) }}
needs:
- text-generation
runs-on:
group: itac-bm-emr-gaudi3-dell-1gaudi
container:
image: docker://vault.habana.ai/gaudi-docker/1.22.0/ubuntu22.04/habanalabs/pytorch-installer-2.7.1:latest
options: --runtime=habana --shm-size=64G --env HABANA_VISIBLE_DEVICES --env HABANA_VISIBLE_MODULES
env:
OMPI_MCA_btl_vader_single_copy_mechanism: none
steps:
- name: HL-SMI
run: |
hl-smi
echo "HABANA_VISIBLE_DEVICES=${HABANA_VISIBLE_DEVICES}"
echo "HABANA_VISIBLE_MODULES=${HABANA_VISIBLE_MODULES}"
- name: Checkout
uses: actions/checkout@v4
- name: Run tests
run: /bin/bash tests/ci/slow_tests_trl.sh
sentence-transformers:
name: Test Sentence Transformers integration
if: ${{ !cancelled() && (success() || failure()) }}
needs:
- trl
runs-on:
group: itac-bm-emr-gaudi3-dell-1gaudi
container:
image: docker://vault.habana.ai/gaudi-docker/1.22.0/ubuntu22.04/habanalabs/pytorch-installer-2.7.1:latest
options: --workdir=/root/workspace --runtime=habana --shm-size=64G --env HABANA_VISIBLE_DEVICES --env HABANA_VISIBLE_MODULES
env:
OMPI_MCA_btl_vader_single_copy_mechanism: none
steps:
- name: HL-SMI
run: |
hl-smi
echo "HABANA_VISIBLE_DEVICES=${HABANA_VISIBLE_DEVICES}"
echo "HABANA_VISIBLE_MODULES=${HABANA_VISIBLE_MODULES}"
- name: Checkout Optimum Habana
uses: actions/checkout@v4
- name: Install optimum-habana and run tests
run: make slow_tests_sentence_transformers
- name: Checkout Sentence Transformers
uses: actions/checkout@v4
with:
repository: 'UKPLab/sentence-transformers'
- name: Install sentence-transformers and run tests
run: pip install . && cd tests && pytest test_cmnrl.py test_multi_process.py test_compute_embeddings.py test_model_card_data.py test_util.py