feat: use content defined chunking #5549
Workflow file for this run
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: CI | |
on: | |
pull_request: | |
branches: | |
- main | |
push: | |
# branches: | |
# - main | |
# - ci-* | |
env: | |
HF_ALLOW_CODE_EVAL: 1 | |
jobs: | |
check_code_quality: | |
runs-on: ubuntu-latest | |
steps: | |
- uses: actions/checkout@v4 | |
- name: Set up Python | |
uses: actions/setup-python@v5 | |
with: | |
python-version: "3.9" | |
- name: Install dependencies | |
run: | | |
python -m pip install --upgrade pip | |
pip install --extra-index-url https://pypi.anaconda.org/scientific-python-nightly-wheels/simple "pyarrow>=21.0.0.dev" | |
pip install .[quality] | |
- name: Check quality | |
run: | | |
ruff check tests src benchmarks utils setup.py # linter | |
ruff format --check tests src benchmarks utils setup.py # formatter | |
test: | |
needs: check_code_quality | |
strategy: | |
matrix: | |
test: ['unit', 'integration'] | |
os: [ubuntu-latest, windows-latest] | |
deps_versions: [deps-latest, deps-minimum] | |
continue-on-error: ${{ matrix.test == 'integration' }} | |
runs-on: ${{ matrix.os }} | |
steps: | |
- uses: actions/checkout@v4 | |
with: | |
fetch-depth: 0 | |
- name: Set up Python 3.9 | |
uses: actions/setup-python@v5 | |
with: | |
python-version: "3.9" | |
- name: Upgrade pip | |
run: python -m pip install --upgrade pip | |
- name: Pin setuptools-scm | |
if: ${{ matrix.os == 'ubuntu-latest' }} | |
run: echo "installing pinned version of setuptools-scm to fix seqeval installation on 3.7" && pip install "setuptools-scm==6.4.2" | |
- name: Install uv | |
run: pip install --upgrade uv | |
- name: Install dependencies | |
run: | | |
uv pip install --system --extra-index-url https://pypi.anaconda.org/scientific-python-nightly-wheels/simple "pyarrow>=21.0.0.dev" | |
uv pip install --system "datasets[tests] @ ." | |
- name: Install dependencies (latest versions) | |
if: ${{ matrix.deps_versions == 'deps-latest' }} | |
run: uv pip install --system --upgrade huggingface-hub "dill<0.3.9" | |
- name: Install dependencies (minimum versions) | |
if: ${{ matrix.deps_versions != 'deps-latest' }} | |
run: uv pip install --system huggingface-hub==0.24.7 transformers dill==0.3.1.1 | |
- name: Test with pytest | |
run: | | |
python -m pytest -rfExX -m ${{ matrix.test }} -n 2 --dist loadfile -sv ./tests/ | |
test_py311: | |
needs: check_code_quality | |
strategy: | |
matrix: | |
test: ['unit'] | |
os: [ubuntu-latest, windows-latest] | |
deps_versions: [deps-latest] | |
continue-on-error: false | |
runs-on: ${{ matrix.os }} | |
steps: | |
- uses: actions/checkout@v4 | |
with: | |
fetch-depth: 0 | |
- name: Set up Python 3.11 | |
uses: actions/setup-python@v5 | |
with: | |
python-version: "3.11" | |
- name: Upgrade pip | |
run: python -m pip install --upgrade pip | |
- name: Install uv | |
run: pip install --upgrade uv | |
- name: Install dependencies | |
run: | | |
uv pip install --system --extra-index-url https://pypi.anaconda.org/scientific-python-nightly-wheels/simple "pyarrow>=21.0.0.dev" | |
uv pip install --system "datasets[tests] @ ." | |
- name: Test with pytest | |
run: | | |
python -m pytest -rfExX -m ${{ matrix.test }} -n 2 --dist loadfile -sv ./tests/ | |
test_py311_numpy2: | |
needs: check_code_quality | |
strategy: | |
matrix: | |
test: ['unit'] | |
os: [ubuntu-latest, windows-latest] | |
deps_versions: [deps-latest] | |
continue-on-error: false | |
runs-on: ${{ matrix.os }} | |
steps: | |
- uses: actions/checkout@v4 | |
with: | |
fetch-depth: 0 | |
- name: Set up Python 3.11 | |
uses: actions/setup-python@v5 | |
with: | |
python-version: "3.11" | |
- name: Upgrade pip | |
run: python -m pip install --upgrade pip | |
- name: Install uv | |
run: pip install --upgrade uv | |
- name: Install dependencies | |
run: | | |
uv pip install --system --extra-index-url https://pypi.anaconda.org/scientific-python-nightly-wheels/simple "pyarrow>=21.0.0.dev" | |
uv pip install --system "datasets[tests_numpy2] @ ." | |
- name: Test with pytest | |
run: | | |
python -m pytest -rfExX -m ${{ matrix.test }} -n 2 --dist loadfile -sv ./tests/ |