huggingface
diff --git a/‎.github/workflows/build_docker_images.yml‎
Lines changed: 6 additions & 6 deletions b/‎.github/workflows/build_docker_images.yml‎
Lines changed: 6 additions & 6 deletions
diff --git a/‎.github/workflows/nightly.yml‎
Lines changed: 5 additions & 0 deletions b/‎.github/workflows/nightly.yml‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎.github/workflows/test-docker-build.yml‎
Lines changed: 4 additions & 4 deletions b/‎.github/workflows/test-docker-build.yml‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎.github/workflows/tests-main.yml‎
Lines changed: 44 additions & 0 deletions b/‎.github/workflows/tests-main.yml‎
Lines changed: 44 additions & 0 deletions
diff --git a/‎.github/workflows/tests.yml‎
Lines changed: 50 additions & 0 deletions b/‎.github/workflows/tests.yml‎
Lines changed: 50 additions & 0 deletions
diff --git a/‎Makefile‎
Lines changed: 7 additions & 0 deletions b/‎Makefile‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎docker/peft-gpu/Dockerfile‎
Lines changed: 6 additions & 6 deletions b/‎docker/peft-gpu/Dockerfile‎
Lines changed: 6 additions & 6 deletions
diff --git a/‎docs/source/_toctree.yml‎
Lines changed: 4 additions & 0 deletions b/‎docs/source/_toctree.yml‎
Lines changed: 4 additions & 0 deletions
@@ -22,19 +22,19 @@ jobs:
       group: aws-general-8-plus
     steps:
       - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@b5ca514318bd6ebac0fb2aedd5d36ec1b5c232a2  # v3.10.0
+        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f  # v3.12.0
       - name: Check out code
         uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8  # v5.0.0
         with:
           persist-credentials: false
       - name: Login to DockerHub
-        uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772  # v3.4.0
+        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef  # v3.6.0
         with:
           username: ${{ secrets.DOCKERHUB_USERNAME }}
           password: ${{ secrets.DOCKERHUB_PASSWORD }}
 
       - name: Build and Push CPU
-        uses: docker/build-push-action@14487ce63c7a62a4a324b0bfb37086795e31c6c1  # v6.16.0
+        uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83  # v6.18.0
         with:
           context: ./docker/peft-cpu
           push: true
@@ -55,19 +55,19 @@ jobs:
       group: aws-general-8-plus
     steps:
       - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@b5ca514318bd6ebac0fb2aedd5d36ec1b5c232a2  # v3.10.0
+        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f  # v3.12.0
       - name: Check out code
         uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8  # v5.0.0
         with:
           persist-credentials: false
       - name: Login to DockerHub
-        uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772  # v3.4.0
+        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef  # v3.6.0
         with:
           username: ${{ secrets.DOCKERHUB_USERNAME }}
           password: ${{ secrets.DOCKERHUB_PASSWORD }}
 
       - name: Build and Push GPU
-        uses: docker/build-push-action@14487ce63c7a62a4a324b0bfb37086795e31c6c1  # v6.16.0
+        uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83  # v6.18.0
         with:
           context: ./docker/peft-gpu
           push: true
 
@@ -108,6 +108,11 @@ jobs:
           source activate peft
           make tests_core_multi_gpu
 
+      - name: Run training on multi GPU
+        run: |
+          source activate peft
+          make tests_training
+
       - name: Generate Report
         if: always()
         run: |
 
@@ -29,9 +29,9 @@ jobs:
         if: steps.changed-files.outputs.any_changed == 'true'
         id: set-matrix
         env:
-          ALL_CHANGED_FILES: ${{ steps.changed-files.outputs.all_changed_files }}
+          CHANGED_FILES: "${{ steps.changed-files.outputs.all_changed_files }}"
         run: |
-          echo "matrix=${ALL_CHANGED_FILES}" >> $GITHUB_OUTPUT
+          echo "matrix=$(echo ${CHANGED_FILES} | sed -e 's/\\\"/\"/g')" >> $GITHUB_OUTPUT
   build_modified_files:
     needs: get_changed_files
     name: Build Docker images on modified files
@@ -53,13 +53,13 @@ jobs:
           sudo du -sh /usr/local/lib/
           sudo du -sh /usr/share/
       - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@b5ca514318bd6ebac0fb2aedd5d36ec1b5c232a2  # v3.10.0
+        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f  # v3.12.0
       - name: Check out code
         uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8  # v5.0.0
         with:
           persist-credentials: false
       - name: Build Docker image
-        uses: docker/build-push-action@14487ce63c7a62a4a324b0bfb37086795e31c6c1  # v6.16.0
+        uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83  # v6.18.0
         with:
           file: ${{ matrix.docker-file }}
           context: .
 
@@ -15,6 +15,50 @@ jobs:
       - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8  # v5.0.0
         with:
           persist-credentials: false
+      - name: Make space for cache + models
+        # Ubuntu runner have less space free which is problematic since the model
+        # cache + dependencies fill up the disk, leaving no space for execution.
+        # So we remove some of the stuff we don't need (Java, .NET, etc.)
+        #
+        # Idea: https://dev.to/mathio/squeezing-disk-space-from-github-actions-runners-an-engineers-guide-3pjg
+        if: matrix.os != 'windows-latest'
+        run: |
+          df -h
+
+          # Remove Java (JDKs)
+          sudo rm -rf /usr/lib/jvm
+
+          # Remove .NET SDKs
+          sudo rm -rf /usr/share/dotnet
+
+          # Remove Swift toolchain
+          sudo rm -rf /usr/share/swift
+
+          # Remove Haskell (GHC)
+          sudo rm -rf /usr/local/.ghcup
+
+          # Remove Julia
+          sudo rm -rf /usr/local/julia*
+
+          # Remove Android SDKs
+          sudo rm -rf /usr/local/lib/android
+
+          # Remove Chromium (optional if not using for browser tests)
+          sudo rm -rf /usr/local/share/chromium
+
+          # Remove Microsoft/Edge and Google Chrome builds
+          sudo rm -rf /opt/microsoft /opt/google
+
+          # Remove Azure CLI
+          sudo rm -rf /opt/az
+
+          # Remove PowerShell
+          sudo rm -rf /usr/local/share/powershell
+
+          # Remove CodeQL and other toolcaches
+          sudo rm -rf /opt/hostedtoolcache
+
+          df -h
       - name: Set up Python 3.11
         uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c  # v6.0.0
         with:
 
@@ -47,6 +47,50 @@ jobs:
       - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8  # v5.0.0
         with:
           persist-credentials: false
+      - name: Make space for cache + models
+        # Ubuntu runner have less space free which is problematic since the model
+        # cache + dependencies fill up the disk, leaving no space for execution.
+        # So we remove some of the stuff we don't need (Java, .NET, etc.)
+        #
+        # Idea: https://dev.to/mathio/squeezing-disk-space-from-github-actions-runners-an-engineers-guide-3pjg
+        if: matrix.os != 'windows-latest'
+        run: |
+          df -h
+
+          # Remove Java (JDKs)
+          sudo rm -rf /usr/lib/jvm
+
+          # Remove .NET SDKs
+          sudo rm -rf /usr/share/dotnet
+
+          # Remove Swift toolchain
+          sudo rm -rf /usr/share/swift
+
+          # Remove Haskell (GHC)
+          sudo rm -rf /usr/local/.ghcup
+
+          # Remove Julia
+          sudo rm -rf /usr/local/julia*
+
+          # Remove Android SDKs
+          sudo rm -rf /usr/local/lib/android
+
+          # Remove Chromium (optional if not using for browser tests)
+          sudo rm -rf /usr/local/share/chromium
+
+          # Remove Microsoft/Edge and Google Chrome builds
+          sudo rm -rf /opt/microsoft /opt/google
+
+          # Remove Azure CLI
+          sudo rm -rf /opt/az
+
+          # Remove PowerShell
+          sudo rm -rf /usr/local/share/powershell
+
+          # Remove CodeQL and other toolcaches
+          sudo rm -rf /opt/hostedtoolcache
+
+          df -h
       - name: Model cache
         uses: actions/cache/restore@0400d5f644dc74513175e3cd8d07132dd4860809  # v4.2.4
         with:
@@ -59,6 +103,7 @@ jobs:
           key: model-cache-${{ github.run_id }}
           restore-keys: model-cache-
           enableCrossOsArchive: true
+
       - name: Dump cache content
         # TODO: remove this step after 2025-02-15
         if: matrix.os != 'windows-latest'
@@ -83,8 +128,13 @@ jobs:
         env:
           HF_TOKEN: ${{ secrets.HF_TOKEN }}
           TRANSFORMERS_IS_CI: 1
+          CI: 1
         run: |
           make test
+          # clean up all pytest temporary directories that are kept due to retention since space
+          # is a scarce resource on the runners and tasks like model cache creation (further below)
+          # fail if there's not enough space available.
+          (rm -r "/tmp/pytest-of-$(id -u -n)" || true)
       - name: Dump cache content and diff
         # This is just debug info so that we can monitor if the model cache diverges substantially
         # over time and what the diverging model is.
 
@@ -61,3 +61,10 @@ tests_regression:
 
 tests_torch_compile:
 	python -m pytest tests/test_torch_compile.py $(if $(IS_GITHUB_CI),--report-log "compile_tests.log",)
+
+tests_training:
+	accelerate launch --config_file tests/training/deepspeed_config.yaml tests/training/training.py $(if $(IS_GITHUB_CI),--report-log "training_deepspeed.log",)
+	accelerate launch --config_file tests/training/deepspeed_config.yaml tests/training/training.py --quant 4bit $(if $(IS_GITHUB_CI),--report-log "training_deepspeed_4bit.log",)
+	accelerate launch --config_file tests/training/deepspeed_config.yaml tests/training/training.py --quant 8bit $(if $(IS_GITHUB_CI),--report-log "training_deepspeed_8bit.log",)
+	accelerate launch --config_file tests/training/fsdp_config.yaml tests/training/training.py $(if $(IS_GITHUB_CI),--report-log "training_fsdp.log",)
+	accelerate launch --config_file tests/training/fsdp_config.yaml tests/training/training.py --quant 4bit $(if $(IS_GITHUB_CI),--report-log "training_fsdp_4bit.log",)
@@ -8,7 +8,7 @@ ENV PYTHON_VERSION=3.11
 # Install apt libs - copied from https://github.com/huggingface/accelerate/blob/main/docker/accelerate-gpu/Dockerfile
 # Install audio-related libraries
 RUN apt-get update && \
-    apt-get install -y curl git wget software-properties-common git-lfs ffmpeg libsndfile1-dev && \
+    apt-get install -y curl git wget git-lfs ffmpeg libsndfile1-dev && \
     apt-get clean && \
     rm -rf /var/lib/apt/lists*
 
@@ -26,7 +26,7 @@ RUN chsh -s /bin/bash
 SHELL ["/bin/bash", "-c"]
 
 # Stage 2
-FROM nvidia/cuda:12.4.1-devel-ubuntu22.04 AS build-image
+FROM nvidia/cuda:12.8.1-devel-ubuntu22.04 AS build-image
 COPY --from=compile-image /opt/conda /opt/conda
 ENV PATH /opt/conda/bin:$PATH
 
@@ -39,12 +39,12 @@ RUN apt-get update && \
 RUN chsh -s /bin/bash
 SHELL ["/bin/bash", "-c"]
 
-RUN conda run -n peft pip install --no-cache-dir bitsandbytes optimum auto-gptq
+RUN conda run -n peft pip install --no-cache-dir bitsandbytes optimum
+
+# GPTQmodel doesn't find torch without build isolation
+RUN conda run -n peft pip install --no-build-isolation gptqmodel
 
 RUN \
-    # Add autoawq for quantization testing
-    conda run -n peft pip install --no-cache-dir https://github.com/casper-hansen/AutoAWQ/releases/download/v0.2.7.post2/autoawq-0.2.7.post2-py3-none-any.whl && \
-    conda run -n peft pip install --no-cache-dir https://github.com/casper-hansen/AutoAWQ_kernels/releases/download/v0.0.9/autoawq_kernels-0.0.9-cp311-cp311-linux_x86_64.whl && \
     # Add eetq for quantization testing; needs to run without build isolation since the setup
     # script directly imports torch from the environment which would fail with isolation.
     conda run -n peft pip install --no-build-isolation git+https://github.com/NetEase-FuXi/EETQ.git
 
@@ -108,6 +108,8 @@
       title: P-tuning
     - local: package_reference/prefix_tuning
       title: Prefix tuning
+    - local: package_reference/cartridges
+      title: Cartridges
     - local: package_reference/prompt_tuning
       title: Prompt tuning
     - local: package_reference/layernorm_tuning
@@ -153,5 +155,7 @@
       title: Hotswapping adapters
     - local: package_reference/functional
       title: Functions for PEFT integration
+    - local: package_reference/lora_conversion
+      title: Converting non-LoRA adapters to LoRA
     title: Utilities
   title: API reference