Skip to content

Commit b87077d

Browse files
ConchylicultorThe gemma Authors
authored andcommitted
Add tests for the examples
PiperOrigin-RevId: 752272527
1 parent d1c6f21 commit b87077d

File tree

4 files changed

+94
-6
lines changed

4 files changed

+94
-6
lines changed

gemma/gm/testing/__init__.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,5 +16,13 @@
1616

1717
# pylint: disable=g-importing-member
1818

19-
from gemma.gm.testing._dummy_model import DummyGemma
20-
from gemma.gm.testing._dummy_tokenizer import DummyTokenizer
19+
20+
from etils import epy as _epy
21+
22+
# pylint: disable=g-import-not-at-top,g-importing-member
23+
24+
with _epy.lazy_api_imports(globals()):
25+
26+
from gemma.gm.testing._dummy_model import DummyGemma
27+
from gemma.gm.testing._dummy_tokenizer import DummyTokenizer
28+
from gemma.gm.testing._fixtures import use_hermetic_tokenizer

gemma/gm/testing/_fixtures.py

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
# Copyright 2024 DeepMind Technologies Limited.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
"""Tests fixtures."""
16+
17+
import contextlib # pylint: disable=unused-import
18+
from unittest import mock
19+
20+
from etils import epath
21+
from gemma.gm.text import _tokenizer
22+
import pytest # pytype: disable=import-error
23+
24+
25+
@pytest.fixture(autouse=True, scope='module')
26+
def use_hermetic_tokenizer():
27+
"""Use the local tokenizer, to avoid TFHub calls."""
28+
29+
new_path = epath.resource_path('gemma') / 'testdata/tokenizer_gemma3.model'
30+
31+
# We cannot mock `Gemma3Tokenizer.path` directly as dataclasses also
32+
# set the value in the `__init__` default value.
33+
34+
old_init = _tokenizer.Gemma3Tokenizer.__init__
35+
36+
def mew_init(self, path=None, **kwargs):
37+
del path
38+
old_init(self, new_path, **kwargs)
39+
40+
with (
41+
contextlib.nullcontext()
42+
):
43+
yield

gemma/gm/tests/examples_test.py

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
# Copyright 2024 DeepMind Technologies Limited.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
from gemma import gm
16+
from gemma.examples import seq2seq
17+
from kauldron import konfig
18+
import tensorflow_datasets as tfds
19+
20+
# Activate the fixture
21+
use_hermetic_tokenizer = gm.testing.use_hermetic_tokenizer
22+
23+
24+
def test_examples():
25+
cfg = seq2seq.get_config()
26+
with konfig.mock_modules():
27+
cfg.model = gm.testing.DummyGemma(
28+
tokens='batch.input',
29+
)
30+
cfg.train_ds.num_workers = 0 # Disable multi-processing in tests.
31+
cfg.workdir = '/tmp/gemma_test'
32+
33+
trainer = konfig.resolve(cfg)
34+
35+
# Resolve the training step, including the metrics, losses,...
36+
with tfds.testing.mock_data(num_examples=10):
37+
_ = trainer.context_specs

gemma/gm/text/_tokenizer_test.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,14 +14,14 @@
1414

1515
import pickle
1616

17-
from etils import epath
1817
from gemma import gm
1918

19+
# Activate the fixture
20+
use_hermetic_tokenizer = gm.testing.use_hermetic_tokenizer
2021

21-
def test_pickle():
2222

23-
tokenizer = gm.text.Gemma3Tokenizer(
24-
)
23+
def test_pickle():
24+
tokenizer = gm.text.Gemma3Tokenizer()
2525
tokenizer.encode('Hello world!') # Trigger the lazy-loading of the tokenizer.
2626

2727
pickle.dumps(tokenizer)

0 commit comments

Comments
 (0)