Skip to content

Commit cdd6936

Browse files
committed
update workflows
1 parent 417ea30 commit cdd6936

File tree

4 files changed

+49
-52
lines changed

4 files changed

+49
-52
lines changed

.github/workflows/new_tasks.yml

Lines changed: 10 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ jobs:
1616
name: Scan for changed tasks
1717
steps:
1818
- name: checkout
19-
uses: actions/checkout@v4
19+
uses: actions/checkout@v6
2020
with:
2121
fetch-depth: 2 # OR "2" -> To retrieve the preceding commit.
2222

@@ -25,7 +25,7 @@ jobs:
2525
# and prepends the filter name to the standard output names.
2626
- name: Check task folders
2727
id: changed-tasks
28-
uses: tj-actions/changed-files@v46.0.5
28+
uses: tj-actions/changed-files@24d32ffd492484c1d75e0c0b894501ddb9d30d62
2929
with:
3030
# tasks checks the tasks folder and api checks the api folder for changes
3131
files_yaml: |
@@ -44,28 +44,24 @@ jobs:
4444
echo "One or more test file(s) has changed."
4545
echo "List of all the files that have changed: ${{ steps.changed-tasks.outputs.tasks_all_modified_files }}"
4646
47-
- name: Set up Python 3.10
47+
- name: Install uv
4848
if: steps.changed-tasks.outputs.tasks_any_modified == 'true' || steps.changed-tasks.outputs.api_any_modified == 'true'
49-
uses: actions/setup-python@v5
49+
uses: astral-sh/setup-uv@v7
5050
with:
51-
python-version: '3.10'
52-
cache: 'pip'
53-
cache-dependency-path: pyproject.toml
51+
enable-cache: true
52+
python-version: "3.10"
53+
activate-environment: true
5454
- name: Install dependencies
5555
if: steps.changed-tasks.outputs.tasks_any_modified == 'true' || steps.changed-tasks.outputs.api_any_modified == 'true'
5656
run: |
57-
python -m pip install --upgrade pip
58-
pip install -e '.[dev,ifeval,unitxt,math,longbench]' --extra-index-url https://download.pytorch.org/whl/cpu
59-
# Install optional git dependencies
60-
# pip install bleurt@https://github.com/google-research/bleurt/archive/b610120347ef22b494b6d69b4316e303f5932516.zip#egg=bleurt
61-
# if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
57+
uv pip install -e '.[dev,ifeval,unitxt,math,longbench,hf]' --extra-index-url https://download.pytorch.org/whl/cpu
6258
- name: Test with pytest
6359
# if new tasks are added, run tests on them
6460
if: steps.changed-tasks.outputs.tasks_any_modified == 'true'
65-
run: python -m pytest tests/test_tasks.py -s -vv
61+
run: pytest -x -s -vv tests/test_tasks.py
6662
# if api is modified, run tests on it
6763
- name: Test more tasks with pytest
6864
env:
6965
API: true
7066
if: steps.changed-tasks.outputs.api_any_modified == 'true'
71-
run: python -m pytest tests/test_tasks.py -s -vv
67+
run: pytest -x -s -vv -n=auto tests/test_tasks.py

.github/workflows/unit_tests.yml

Lines changed: 18 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -21,13 +21,15 @@ jobs:
2121

2222
steps:
2323
- name: Checkout Code
24-
uses: actions/checkout@v4
25-
- name: Set up Python 3.10
26-
uses: actions/setup-python@v5
24+
uses: actions/checkout@v6
25+
- name: Install uv
26+
uses: astral-sh/setup-uv@v7
2727
with:
28-
python-version: '3.10'
29-
cache: pip
30-
cache-dependency-path: pyproject.toml
28+
enable-cache: true
29+
python-version: "3.10"
30+
activate-environment: true
31+
- name: Install pip
32+
run: uv pip install pip
3133
- name: Pre-Commit
3234
env:
3335
SKIP: "no-commit-to-branch,mypy"
@@ -43,13 +45,13 @@ jobs:
4345
timeout-minutes: 30
4446
steps:
4547
- name: Checkout Code
46-
uses: actions/checkout@v4
47-
- name: Set up Python ${{ matrix.python-version }}
48-
uses: actions/setup-python@v5
48+
uses: actions/checkout@v6
49+
- name: Install uv
50+
uses: astral-sh/setup-uv@v7
4951
with:
52+
enable-cache: true
5053
python-version: ${{ matrix.python-version }}
51-
cache: pip
52-
cache-dependency-path: pyproject.toml
54+
activate-environment: true
5355

5456
# Cache HuggingFace cache directory for CPU tests
5557
- name: Cache HuggingFace cache (CPU tests)
@@ -63,17 +65,16 @@ jobs:
6365
6466
- name: Install dependencies
6567
run: |
66-
python -m pip install --upgrade pip
67-
pip install -e '.[dev,unitxt]' --extra-index-url https://download.pytorch.org/whl/cpu
68-
pip install hf_xet
68+
uv pip install -e '.[dev,unitxt,hf]' --extra-index-url https://download.pytorch.org/whl/cpu
69+
uv pip install hf_xet
6970
7071
- name: Test with pytest
71-
run: python -m pytest --showlocals -s -vv -n=auto --ignore=tests/models/test_openvino.py --ignore=tests/models/test_hf_steered.py
72-
continue-on-error: true # Continue workflow even if tests fail
72+
run: pytest -x --showlocals -s -vv -n=auto --ignore=tests/models/test_openvino.py --ignore=tests/models/test_hf_steered.py
7373

7474
# Save test artifacts
7575
- name: Archive test artifacts
76-
uses: actions/upload-artifact@v4
76+
if: always() # Upload artifacts even if tests fail
77+
uses: actions/upload-artifact@v5
7778
with:
7879
name: output_testcpu${{ matrix.python-version }}
7980
path: |

.pre-commit-config.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ repos:
2727
- id: mixed-line-ending
2828
args: [ --fix=lf ]
2929
- repo: https://github.com/astral-sh/ruff-pre-commit
30-
rev: v0.13.2
30+
rev: v0.14.6
3131
hooks:
3232
# Run the linter.
3333
- id: ruff-check
@@ -46,7 +46,7 @@ repos:
4646
4747
args: [ --check-filenames, --check-hidden, --ignore-words=ignore.txt ]
4848
- repo: https://github.com/jackdewinter/pymarkdown
49-
rev: v0.9.32
49+
rev: v0.9.33
5050
hooks:
5151
- id: pymarkdown
5252
exclude: ^(lm_eval/tasks/.*|docs/footguns\.md)$

tests/models/test_bos_handling.py

Lines changed: 19 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -46,16 +46,16 @@ def pythia_tokenizer():
4646

4747

4848
@pytest.fixture(scope="module")
49-
def gemma_tokenizer():
49+
def olmo_tokenizer():
5050
"""
51-
Load gemma-2-2b-it tokenizer for testing.
51+
Load OLMo-3-7B-Instruct tokenizer for testing.
5252
5353
Properties:
54-
- BOS token: '<bos>' (ID: 2)
54+
- BOS token: '<|endoftext|>' (ID: 100257)
5555
- DOES add BOS by default (add_bos_token=True in tokenizer)
5656
- Used to test tokenizers that add BOS by default
5757
"""
58-
tokenizer = AutoTokenizer.from_pretrained("google/gemma-2-2b-it")
58+
tokenizer = AutoTokenizer.from_pretrained("allenai/OLMo-3-7B-Instruct")
5959
# Set pad token to avoid padding errors in batch encoding
6060
tokenizer.pad_token = tokenizer.eos_token
6161
tokenizer.padding_side = "right"
@@ -150,14 +150,14 @@ def test_both_none_returns_empty(self):
150150
class TestDefaultsToNone:
151151
"""Test that add_bos_token defaults to None, allowing tokenizer defaults."""
152152

153-
@pytest.mark.parametrize("tokenizer_name", ["pythia_tokenizer", "gemma_tokenizer"])
153+
@pytest.mark.parametrize("tokenizer_name", ["pythia_tokenizer", "olmo_tokenizer"])
154154
def test_huggingface_none_uses_tokenizer_default(self, tokenizer_name, request):
155155
"""
156156
HF: When add_bos_token=None, should respect tokenizer's default.
157157
158158
Tests both tokenizer types:
159159
- Pythia: Doesn't add BOS by default
160-
- Gemma: DOES add BOS by default
160+
- OLMo: DOES add BOS by default
161161
"""
162162
tokenizer = request.getfixturevalue(tokenizer_name)
163163
mock_hflm = create_hf_mock(tokenizer, add_bos_token=None)
@@ -166,14 +166,14 @@ def test_huggingface_none_uses_tokenizer_default(self, tokenizer_name, request):
166166
expected = tokenizer.encode("Hello")
167167
assert result == expected
168168

169-
@pytest.mark.parametrize("tokenizer_name", ["pythia_tokenizer", "gemma_tokenizer"])
169+
@pytest.mark.parametrize("tokenizer_name", ["pythia_tokenizer", "olmo_tokenizer"])
170170
def test_vllm_none_uses_tokenizer_default(self, tokenizer_name, request):
171171
"""
172172
vLLM: When add_bos_token=None, should respect tokenizer's default.
173173
174174
Tests both tokenizer types:
175175
- Pythia: Doesn't add BOS by default
176-
- Gemma: DOES add BOS by default
176+
- OLMo: DOES add BOS by default
177177
"""
178178
tokenizer = request.getfixturevalue(tokenizer_name)
179179
mock_vllm = create_vllm_mock(tokenizer, add_bos_token=None)
@@ -191,7 +191,7 @@ def test_vllm_none_uses_tokenizer_default(self, tokenizer_name, request):
191191
class TestNoDuplicateBos:
192192
"""Test that BOS tokens are never duplicated when already present."""
193193

194-
@pytest.mark.parametrize("tokenizer_name", ["pythia_tokenizer", "gemma_tokenizer"])
194+
@pytest.mark.parametrize("tokenizer_name", ["pythia_tokenizer", "olmo_tokenizer"])
195195
def test_huggingface_detects_bos_in_single_string(self, tokenizer_name, request):
196196
"""HF: Should detect BOS prefix and avoid duplication."""
197197
tokenizer = request.getfixturevalue(tokenizer_name)
@@ -215,7 +215,7 @@ def test_huggingface_detects_bos_in_single_string(self, tokenizer_name, request)
215215
# Should avoid duplication (fewer or equal tokens)
216216
assert input_ids.shape[1] <= without_detection.shape[1]
217217

218-
@pytest.mark.parametrize("tokenizer_name", ["pythia_tokenizer", "gemma_tokenizer"])
218+
@pytest.mark.parametrize("tokenizer_name", ["pythia_tokenizer", "olmo_tokenizer"])
219219
def test_huggingface_adds_bos_when_missing(self, tokenizer_name, request):
220220
"""HF: Should add BOS when string doesn't have it (using add_special_tokens=True)"""
221221
tokenizer = request.getfixturevalue(tokenizer_name)
@@ -228,13 +228,13 @@ def test_huggingface_adds_bos_when_missing(self, tokenizer_name, request):
228228

229229
assert input_ids.tolist() == expected.tolist()
230230

231-
@pytest.mark.parametrize("tokenizer_name", ["pythia_tokenizer", "gemma_tokenizer"])
231+
@pytest.mark.parametrize("tokenizer_name", ["pythia_tokenizer", "olmo_tokenizer"])
232232
def test_huggingface_follows_tokenizer_default(self, tokenizer_name, request):
233233
"""
234234
HF: When add_bos_token is not set (None), follows tokenizer default.
235235
236236
- Pythia: Doesn't add BOS by default
237-
- Gemma: DOES add BOS by default
237+
- OLMo: DOES add BOS by default
238238
"""
239239
tokenizer = request.getfixturevalue(tokenizer_name)
240240
mock_hflm = create_hf_mock(tokenizer, add_bos_token=None)
@@ -244,7 +244,7 @@ def test_huggingface_follows_tokenizer_default(self, tokenizer_name, request):
244244

245245
assert input_ids.tolist() == expected.tolist()
246246

247-
@pytest.mark.parametrize("tokenizer_name", ["pythia_tokenizer", "gemma_tokenizer"])
247+
@pytest.mark.parametrize("tokenizer_name", ["pythia_tokenizer", "olmo_tokenizer"])
248248
@pytest.mark.parametrize("add_bos_token", [None, True])
249249
def test_vllm_handles_mixed_batch(self, tokenizer_name, add_bos_token, request):
250250
"""
@@ -284,7 +284,7 @@ def test_vllm_handles_mixed_batch(self, tokenizer_name, add_bos_token, request):
284284
for i, exp in enumerate(expected):
285285
assert result[i] == exp
286286

287-
@pytest.mark.parametrize("tokenizer_name", ["pythia_tokenizer", "gemma_tokenizer"])
287+
@pytest.mark.parametrize("tokenizer_name", ["pythia_tokenizer", "olmo_tokenizer"])
288288
@pytest.mark.parametrize("add_bos_token", [None, True])
289289
def test_vllm_preserves_order_in_mixed_batch(
290290
self, tokenizer_name, add_bos_token, request
@@ -328,7 +328,7 @@ def test_vllm_preserves_order_in_mixed_batch(
328328
class TestChatTemplateCompatibility:
329329
"""Test that chat templates (which add BOS) work without duplication."""
330330

331-
@pytest.mark.parametrize("tokenizer_name", ["pythia_tokenizer", "gemma_tokenizer"])
331+
@pytest.mark.parametrize("tokenizer_name", ["pythia_tokenizer", "olmo_tokenizer"])
332332
def test_huggingface_chat_template_no_duplicate_bos(self, tokenizer_name, request):
333333
"""
334334
HF: Chat template adds BOS, tokenizer should not add another.
@@ -352,7 +352,7 @@ def test_huggingface_chat_template_no_duplicate_bos(self, tokenizer_name, reques
352352

353353
assert torch.equal(input_ids, expected)
354354

355-
@pytest.mark.parametrize("tokenizer_name", ["pythia_tokenizer", "gemma_tokenizer"])
355+
@pytest.mark.parametrize("tokenizer_name", ["pythia_tokenizer", "olmo_tokenizer"])
356356
@pytest.mark.parametrize("add_bos_token", [None, True])
357357
def test_vllm_mixed_chat_batch(self, tokenizer_name, add_bos_token, request):
358358
"""
@@ -424,7 +424,7 @@ def test_huggingface_seq2seq_skips_causal_bos_logic(self, pythia_tokenizer):
424424
class TestLoglikelihoodBosHandling:
425425
"""Test BOS handling in loglikelihood method."""
426426

427-
@pytest.mark.parametrize("tokenizer_name", ["pythia_tokenizer", "gemma_tokenizer"])
427+
@pytest.mark.parametrize("tokenizer_name", ["pythia_tokenizer", "olmo_tokenizer"])
428428
@pytest.mark.parametrize("add_bos_token", [None, True])
429429
def test_empty_context_continuation_with_bos(
430430
self, tokenizer_name, add_bos_token, request
@@ -479,7 +479,7 @@ def capture_and_return(reqs, disable_tqdm=False):
479479
)
480480
assert continuation_enc == continuation_without_bos[1:] # Skip the BOS token
481481

482-
@pytest.mark.parametrize("tokenizer_name", ["pythia_tokenizer", "gemma_tokenizer"])
482+
@pytest.mark.parametrize("tokenizer_name", ["pythia_tokenizer", "olmo_tokenizer"])
483483
@pytest.mark.parametrize("add_bos_token", [None, True])
484484
def test_empty_context_continuation_without_bos(
485485
self, tokenizer_name, add_bos_token, request
@@ -523,7 +523,7 @@ def capture_and_return(reqs, disable_tqdm=False):
523523
expected_continuation = tokenizer.encode(continuation, add_special_tokens=False)
524524
assert continuation_enc == expected_continuation
525525

526-
@pytest.mark.parametrize("tokenizer_name", ["pythia_tokenizer", "gemma_tokenizer"])
526+
@pytest.mark.parametrize("tokenizer_name", ["pythia_tokenizer", "olmo_tokenizer"])
527527
@pytest.mark.parametrize("add_bos_token", [None, True])
528528
def test_context_with_bos_prefix(self, tokenizer_name, add_bos_token, request):
529529
"""When context starts with BOS (e.g., from chat template), should not duplicate BOS."""

0 commit comments

Comments
 (0)