Skip to content

Fix docker GPU build for gptqmodel (#3018) #7234

Fix docker GPU build for gptqmodel (#3018)

Fix docker GPU build for gptqmodel (#3018) #7234

Workflow file for this run

name: tests
on:
push:
branches: [main]
paths-ignore:
- 'docs/**'
pull_request:
paths-ignore:
- 'docs/**'
env:
HF_HOME: .cache/huggingface
permissions: {}
jobs:
check_code_quality:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
with:
persist-credentials: false
- name: Set up Python
uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6.0.0
with:
python-version: "3.11"
cache: "pip"
cache-dependency-path: "setup.py"
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install .[dev]
- name: Check quality
run: |
make quality
tests:
needs: check_code_quality
strategy:
fail-fast: false
matrix:
python-version: ["3.10", "3.11", "3.12", "3.13"]
os: ["ubuntu-latest", "windows-latest"]
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
with:
persist-credentials: false
- name: Make space for cache + models
# Ubuntu runner have less space free which is problematic since the model
# cache + dependencies fill up the disk, leaving no space for execution.
# So we remove some of the stuff we don't need (Java, .NET, etc.)
#
# Idea: https://dev.to/mathio/squeezing-disk-space-from-github-actions-runners-an-engineers-guide-3pjg
if: matrix.os != 'windows-latest'
run: |
df -h
# Remove Java (JDKs)
sudo rm -rf /usr/lib/jvm
# Remove .NET SDKs
sudo rm -rf /usr/share/dotnet
# Remove Swift toolchain
sudo rm -rf /usr/share/swift
# Remove Haskell (GHC)
sudo rm -rf /usr/local/.ghcup
# Remove Julia
sudo rm -rf /usr/local/julia*
# Remove Android SDKs
sudo rm -rf /usr/local/lib/android
# Remove Chromium (optional if not using for browser tests)
sudo rm -rf /usr/local/share/chromium
# Remove Microsoft/Edge and Google Chrome builds
sudo rm -rf /opt/microsoft /opt/google
# Remove Azure CLI
sudo rm -rf /opt/az
# Remove PowerShell
sudo rm -rf /usr/local/share/powershell
# Remove CodeQL and other toolcaches
sudo rm -rf /opt/hostedtoolcache
df -h
- name: Model cache
uses: actions/cache/restore@0400d5f644dc74513175e3cd8d07132dd4860809 # v4.2.4
with:
# Avoid caching HF_HOME/modules and Python cache files to prevent interoperability
# issues and potential cache poisioning. We also avoid lock files to prevent runs
# avoiding re-download because they see a lock file.
path: |
${{ env.HF_HOME }}/hub/**
!${{ env.HF_HOME }}/**/*.pyc
key: model-cache-${{ github.run_id }}
restore-keys: model-cache-
enableCrossOsArchive: true
- name: Dump cache content
# TODO: remove this step after 2025-02-15
if: matrix.os != 'windows-latest'
run: |
SHASUM=sha256sum
[ -f "$(which shasum)" ] && SHASUM=shasum
find "${{ env.HF_HOME }}/hub" -type f -exec "$SHASUM" {} \; > cache_content_initial || true
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6.0.0
with:
python-version: ${{ matrix.python-version }}
cache: "pip"
cache-dependency-path: "setup.py"
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install setuptools
# cpu version of pytorch
pip install -e .[test]
- name: Test with pytest
shell: bash
env:
HF_TOKEN: ${{ secrets.HF_TOKEN }}
TRANSFORMERS_IS_CI: 1
CI: 1
run: |
make test
# clean up all pytest temporary directories that are kept due to retention since space
# is a scarce resource on the runners and tasks like model cache creation (further below)
# fail if there's not enough space available.
(rm -r "/tmp/pytest-of-$(id -u -n)" || true)
- name: Dump cache content and diff
# This is just debug info so that we can monitor if the model cache diverges substantially
# over time and what the diverging model is.
# TODO: remove after 2025-02-15
if: matrix.os != 'windows-latest'
run: |
SHASUM=sha256sum
[ -f "$(which shasum)" ] && SHASUM=shasum
find "${{ env.HF_HOME }}/hub" -type f -exec "$SHASUM" {} \; > cache_content_after || true
diff -udp cache_content_initial cache_content_after || true
- name: Delete old model cache entries
run: |
# make sure that cache cleaning doesn't break the pipeline
python scripts/ci_clean_cache.py -d || true
- name: Update model cache
uses: actions/cache/save@0400d5f644dc74513175e3cd8d07132dd4860809 # v4.2.4
# Only let one runner (preferably the one that covers most tests) update the model cache
# after *every* run. This way we make sure that our cache is never outdated and we don't
# have to keep track of hashes.
if: always() && matrix.os == 'ubuntu-latest' && matrix.python-version == '3.10'
with:
path: |
${{ env.HF_HOME }}/hub/**
!${{ env.HF_HOME }}/**/*.pyc
key: model-cache-${{ github.run_id }}