diff --git a/src/transformers/testing_utils.py b/src/transformers/testing_utils.py index 25d837ccec0..30f7b5a68fb 100644 --- a/src/transformers/testing_utils.py +++ b/src/transformers/testing_utils.py @@ -40,7 +40,9 @@ from unittest import mock from unittest.mock import patch +import huggingface_hub.utils import urllib3 +from huggingface_hub import delete_repo from transformers import logging as transformers_logging @@ -1570,6 +1572,38 @@ def LoggingLevel(level): transformers_logging.set_verbosity(orig_level) +class TemporaryHubRepo: + """Create a temporary Hub repository and return its `RepoUrl` object. This is similar to + `tempfile.TemporaryDirectory` and can be used as a context manager. For example: + + with TemporaryHubRepo(token=self._token) as temp_repo: + ... + + Upon exiting the context, the repository and everything contained in it are removed. + + Example: + + ```python + with TemporaryHubRepo(token=self._token) as temp_repo: + model.push_to_hub(tmp_repo.repo_id, token=self._token) + ``` + """ + + def __init__(self, namespace: Optional[str] = None, token: Optional[str] = None) -> None: + self.token = token + with tempfile.TemporaryDirectory() as tmp_dir: + repo_id = Path(tmp_dir).name + if namespace is not None: + repo_id = f"{namespace}/{repo_id}" + self.repo_url = huggingface_hub.create_repo(repo_id, token=self.token) + + def __enter__(self): + return self.repo_url + + def __exit__(self, exc, value, tb): + delete_repo(repo_id=self.repo_url.repo_id, token=self.token, missing_ok=True) + + @contextlib.contextmanager # adapted from https://stackoverflow.com/a/64789046/9201239 def ExtendSysPath(path: Union[str, os.PathLike]) -> Iterator[None]: diff --git a/tests/generation/test_configuration_utils.py b/tests/generation/test_configuration_utils.py index f4bd551bd7a..24fea85a900 100644 --- a/tests/generation/test_configuration_utils.py +++ b/tests/generation/test_configuration_utils.py @@ -18,9 +18,8 @@ import tempfile import unittest import warnings -from pathlib import Path -from huggingface_hub import HfFolder, create_pull_request, create_repo, delete_repo +from huggingface_hub import HfFolder, create_pull_request from parameterized import parameterized from transformers import AutoConfig, GenerationConfig, WatermarkingConfig, is_torch_available @@ -57,7 +56,7 @@ UnbatchedClassifierFreeGuidanceLogitsProcessor, WatermarkLogitsProcessor, ) -from transformers.testing_utils import TOKEN, USER, is_staging_test, torch_device +from transformers.testing_utils import TOKEN, TemporaryHubRepo, is_staging_test, torch_device class GenerationConfigTest(unittest.TestCase): @@ -679,114 +678,82 @@ def setUpClass(cls): cls._token = TOKEN HfFolder.save_token(TOKEN) - @staticmethod - def _try_delete_repo(repo_id, token): - try: - # Reset repo - delete_repo(repo_id=repo_id, token=token) - except: # noqa E722 - pass - def test_push_to_hub(self): - with tempfile.TemporaryDirectory() as tmp_dir: - try: - tmp_repo = f"{USER}/test-generation-config-{Path(tmp_dir).name}" - config = GenerationConfig( - do_sample=True, - temperature=0.7, - length_penalty=1.0, - ) - config.push_to_hub(tmp_repo, token=self._token) - - new_config = GenerationConfig.from_pretrained(tmp_repo) - for k, v in config.to_dict().items(): - if k != "transformers_version": - self.assertEqual(v, getattr(new_config, k)) - finally: - # Always (try to) delete the repo. - self._try_delete_repo(repo_id=tmp_repo, token=self._token) + with TemporaryHubRepo(token=self._token) as tmp_repo: + config = GenerationConfig( + do_sample=True, + temperature=0.7, + length_penalty=1.0, + ) + config.push_to_hub(tmp_repo.repo_id, token=self._token) + + new_config = GenerationConfig.from_pretrained(tmp_repo.repo_id) + for k, v in config.to_dict().items(): + if k != "transformers_version": + self.assertEqual(v, getattr(new_config, k)) def test_push_to_hub_via_save_pretrained(self): - with tempfile.TemporaryDirectory() as tmp_dir: - try: - tmp_repo = f"{USER}/test-generation-config-{Path(tmp_dir).name}" - config = GenerationConfig( - do_sample=True, - temperature=0.7, - length_penalty=1.0, - ) - # Push to hub via save_pretrained - config.save_pretrained(tmp_dir, repo_id=tmp_repo, push_to_hub=True, token=self._token) - - new_config = GenerationConfig.from_pretrained(tmp_repo) - for k, v in config.to_dict().items(): - if k != "transformers_version": - self.assertEqual(v, getattr(new_config, k)) - finally: - # Always (try to) delete the repo. - self._try_delete_repo(repo_id=tmp_repo, token=self._token) + with TemporaryHubRepo(token=self._token) as tmp_repo: + config = GenerationConfig( + do_sample=True, + temperature=0.7, + length_penalty=1.0, + ) + # Push to hub via save_pretrained + with tempfile.TemporaryDirectory() as tmp_dir: + config.save_pretrained(tmp_dir, repo_id=tmp_repo.repo_id, push_to_hub=True, token=self._token) + + new_config = GenerationConfig.from_pretrained(tmp_repo.repo_id) + for k, v in config.to_dict().items(): + if k != "transformers_version": + self.assertEqual(v, getattr(new_config, k)) def test_push_to_hub_in_organization(self): - with tempfile.TemporaryDirectory() as tmp_dir: - try: - tmp_repo = f"valid_org/test-generation-config-org-{Path(tmp_dir).name}" - config = GenerationConfig( - do_sample=True, - temperature=0.7, - length_penalty=1.0, - ) - config.push_to_hub(tmp_repo, token=self._token) - - new_config = GenerationConfig.from_pretrained(tmp_repo) - for k, v in config.to_dict().items(): - if k != "transformers_version": - self.assertEqual(v, getattr(new_config, k)) - finally: - # Always (try to) delete the repo. - self._try_delete_repo(repo_id=tmp_repo, token=self._token) + with TemporaryHubRepo(namespace="valid_org", token=self._token) as tmp_repo: + config = GenerationConfig( + do_sample=True, + temperature=0.7, + length_penalty=1.0, + ) + config.push_to_hub(tmp_repo.repo_id, token=self._token) + + new_config = GenerationConfig.from_pretrained(tmp_repo.repo_id) + for k, v in config.to_dict().items(): + if k != "transformers_version": + self.assertEqual(v, getattr(new_config, k)) def test_push_to_hub_in_organization_via_save_pretrained(self): - with tempfile.TemporaryDirectory() as tmp_dir: - try: - tmp_repo = f"valid_org/test-generation-config-org-{Path(tmp_dir).name}" - config = GenerationConfig( - do_sample=True, - temperature=0.7, - length_penalty=1.0, - ) - # Push to hub via save_pretrained - config.save_pretrained(tmp_dir, repo_id=tmp_repo, push_to_hub=True, token=self._token) - - new_config = GenerationConfig.from_pretrained(tmp_repo) - for k, v in config.to_dict().items(): - if k != "transformers_version": - self.assertEqual(v, getattr(new_config, k)) - finally: - # Always (try to) delete the repo. - self._try_delete_repo(repo_id=tmp_repo, token=self._token) + with TemporaryHubRepo(namespace="valid_org", token=self._token) as tmp_repo: + config = GenerationConfig( + do_sample=True, + temperature=0.7, + length_penalty=1.0, + ) + # Push to hub via save_pretrained + with tempfile.TemporaryDirectory() as tmp_dir: + config.save_pretrained(tmp_dir, repo_id=tmp_repo.repo_id, push_to_hub=True, token=self._token) + + new_config = GenerationConfig.from_pretrained(tmp_repo.repo_id) + for k, v in config.to_dict().items(): + if k != "transformers_version": + self.assertEqual(v, getattr(new_config, k)) def test_push_to_hub_on_pr_revision(self): - with tempfile.TemporaryDirectory() as tmp_dir: - try: - # create a repo and a PR - repo_id = f"{USER}/test-generation-config-{Path(tmp_dir).name}" - create_repo(repo_id=repo_id, token=self._token) - pr = create_pull_request(repo_id=repo_id, title="Test PR", token=self._token) - revision = f"refs/pr/{pr.num}" - - # push to PR ref - config = GenerationConfig( - do_sample=True, - temperature=0.7, - length_penalty=1.0, - ) - config.push_to_hub(repo_id, token=self._token, revision=revision) - - # load from PR ref - new_config = GenerationConfig.from_pretrained(repo_id, revision=revision) - for k, v in config.to_dict().items(): - if k != "transformers_version": - self.assertEqual(v, getattr(new_config, k)) - finally: - # Always (try to) delete the repo. - self._try_delete_repo(repo_id=repo_id, token=self._token) + with TemporaryHubRepo(token=self._token) as tmp_repo: + # create a PR + pr = create_pull_request(repo_id=tmp_repo.repo_id, title="Test PR", token=self._token) + revision = f"refs/pr/{pr.num}" + + # push to PR ref + config = GenerationConfig( + do_sample=True, + temperature=0.7, + length_penalty=1.0, + ) + config.push_to_hub(tmp_repo.repo_id, token=self._token, revision=revision) + + # load from PR ref + new_config = GenerationConfig.from_pretrained(tmp_repo.repo_id, revision=revision) + for k, v in config.to_dict().items(): + if k != "transformers_version": + self.assertEqual(v, getattr(new_config, k)) diff --git a/tests/models/auto/test_processor_auto.py b/tests/models/auto/test_processor_auto.py index e46d57701f0..fd361f160f2 100644 --- a/tests/models/auto/test_processor_auto.py +++ b/tests/models/auto/test_processor_auto.py @@ -21,7 +21,7 @@ from pathlib import Path from shutil import copyfile -from huggingface_hub import HfFolder, Repository, create_repo, delete_repo +from huggingface_hub import HfFolder, Repository import transformers from transformers import ( @@ -39,7 +39,7 @@ Wav2Vec2FeatureExtractor, Wav2Vec2Processor, ) -from transformers.testing_utils import TOKEN, USER, get_tests_dir, is_staging_test +from transformers.testing_utils import TOKEN, TemporaryHubRepo, get_tests_dir, is_staging_test from transformers.tokenization_utils import TOKENIZER_CONFIG_FILE from transformers.utils import FEATURE_EXTRACTOR_NAME, PROCESSOR_NAME, is_tokenizers_available @@ -372,72 +372,52 @@ def setUpClass(cls): cls._token = TOKEN HfFolder.save_token(TOKEN) - @staticmethod - def _try_delete_repo(repo_id, token): - try: - # Reset repo - delete_repo(repo_id=repo_id, token=token) - except: # noqa E722 - pass - def test_push_to_hub_via_save_pretrained(self): - with tempfile.TemporaryDirectory() as tmp_dir: - try: - tmp_repo = f"{USER}/test-processor-{Path(tmp_dir).name}" - processor = Wav2Vec2Processor.from_pretrained(SAMPLE_PROCESSOR_CONFIG_DIR) - # Push to hub via save_pretrained - processor.save_pretrained(tmp_repo, repo_id=tmp_repo, push_to_hub=True, token=self._token) - - new_processor = Wav2Vec2Processor.from_pretrained(tmp_repo) - for k, v in processor.feature_extractor.__dict__.items(): - self.assertEqual(v, getattr(new_processor.feature_extractor, k)) - self.assertDictEqual(new_processor.tokenizer.get_vocab(), processor.tokenizer.get_vocab()) - finally: - # Always (try to) delete the repo. - self._try_delete_repo(repo_id=tmp_repo, token=self._token) + with TemporaryHubRepo(token=self._token) as tmp_repo: + processor = Wav2Vec2Processor.from_pretrained(SAMPLE_PROCESSOR_CONFIG_DIR) + # Push to hub via save_pretrained + with tempfile.TemporaryDirectory() as tmp_dir: + processor.save_pretrained(tmp_dir, repo_id=tmp_repo.repo_id, push_to_hub=True, token=self._token) - def test_push_to_hub_in_organization_via_save_pretrained(self): - with tempfile.TemporaryDirectory() as tmp_dir: - try: - tmp_repo = f"valid_org/test-processor-org-{Path(tmp_dir).name}" - processor = Wav2Vec2Processor.from_pretrained(SAMPLE_PROCESSOR_CONFIG_DIR) + new_processor = Wav2Vec2Processor.from_pretrained(tmp_repo.repo_id) + for k, v in processor.feature_extractor.__dict__.items(): + self.assertEqual(v, getattr(new_processor.feature_extractor, k)) + self.assertDictEqual(new_processor.tokenizer.get_vocab(), processor.tokenizer.get_vocab()) - # Push to hub via save_pretrained + def test_push_to_hub_in_organization_via_save_pretrained(self): + with TemporaryHubRepo(namespace="valid_org", token=self._token) as tmp_repo: + processor = Wav2Vec2Processor.from_pretrained(SAMPLE_PROCESSOR_CONFIG_DIR) + # Push to hub via save_pretrained + with tempfile.TemporaryDirectory() as tmp_dir: processor.save_pretrained( tmp_dir, - repo_id=tmp_repo, + repo_id=tmp_repo.repo_id, push_to_hub=True, token=self._token, ) - new_processor = Wav2Vec2Processor.from_pretrained(tmp_repo) - for k, v in processor.feature_extractor.__dict__.items(): - self.assertEqual(v, getattr(new_processor.feature_extractor, k)) - self.assertDictEqual(new_processor.tokenizer.get_vocab(), processor.tokenizer.get_vocab()) - finally: - # Always (try to) delete the repo. - self._try_delete_repo(repo_id=tmp_repo, token=self._token) + new_processor = Wav2Vec2Processor.from_pretrained(tmp_repo.repo_id) + for k, v in processor.feature_extractor.__dict__.items(): + self.assertEqual(v, getattr(new_processor.feature_extractor, k)) + self.assertDictEqual(new_processor.tokenizer.get_vocab(), processor.tokenizer.get_vocab()) def test_push_to_hub_dynamic_processor(self): - with tempfile.TemporaryDirectory() as tmp_dir: - try: - tmp_repo = f"{USER}/test-dynamic-processor-{Path(tmp_dir).name}" + with TemporaryHubRepo(token=self._token) as tmp_repo: + CustomFeatureExtractor.register_for_auto_class() + CustomTokenizer.register_for_auto_class() + CustomProcessor.register_for_auto_class() - CustomFeatureExtractor.register_for_auto_class() - CustomTokenizer.register_for_auto_class() - CustomProcessor.register_for_auto_class() - - feature_extractor = CustomFeatureExtractor.from_pretrained(SAMPLE_PROCESSOR_CONFIG_DIR) + feature_extractor = CustomFeatureExtractor.from_pretrained(SAMPLE_PROCESSOR_CONFIG_DIR) - with tempfile.TemporaryDirectory() as tmp_dir: - vocab_file = os.path.join(tmp_dir, "vocab.txt") - with open(vocab_file, "w", encoding="utf-8") as vocab_writer: - vocab_writer.write("".join([x + "\n" for x in self.vocab_tokens])) - tokenizer = CustomTokenizer(vocab_file) + with tempfile.TemporaryDirectory() as tmp_dir: + vocab_file = os.path.join(tmp_dir, "vocab.txt") + with open(vocab_file, "w", encoding="utf-8") as vocab_writer: + vocab_writer.write("".join([x + "\n" for x in self.vocab_tokens])) + tokenizer = CustomTokenizer(vocab_file) - processor = CustomProcessor(feature_extractor, tokenizer) + processor = CustomProcessor(feature_extractor, tokenizer) - create_repo(tmp_repo, token=self._token) + with tempfile.TemporaryDirectory() as tmp_dir: repo = Repository(tmp_dir, clone_from=tmp_repo, token=self._token) processor.save_pretrained(tmp_dir) @@ -468,10 +448,6 @@ def test_push_to_hub_dynamic_processor(self): repo.push_to_hub() - new_processor = AutoProcessor.from_pretrained(tmp_repo, trust_remote_code=True) + new_processor = AutoProcessor.from_pretrained(tmp_repo.repo_id, trust_remote_code=True) # Can't make an isinstance check because the new_processor is from the CustomProcessor class of a dynamic module self.assertEqual(new_processor.__class__.__name__, "CustomProcessor") - - finally: - # Always (try to) delete the repo. - self._try_delete_repo(repo_id=tmp_repo, token=self._token) diff --git a/tests/trainer/test_trainer.py b/tests/trainer/test_trainer.py index 5658372fa71..f7b4a8637bf 100644 --- a/tests/trainer/test_trainer.py +++ b/tests/trainer/test_trainer.py @@ -32,10 +32,9 @@ from unittest.mock import Mock, patch import numpy as np -from huggingface_hub import HfFolder, ModelCard, create_branch, delete_repo, list_repo_commits, list_repo_files +from huggingface_hub import HfFolder, ModelCard, create_branch, list_repo_commits, list_repo_files from packaging import version from parameterized import parameterized -from requests.exceptions import HTTPError from transformers import ( AutoFeatureExtractor, @@ -59,6 +58,7 @@ USER, CaptureLogger, LoggingLevel, + TemporaryHubRepo, TestCasePlus, backend_device_count, execute_subprocess_async, @@ -4152,64 +4152,49 @@ def setUpClass(cls): cls._token = TOKEN HfFolder.save_token(TOKEN) - @classmethod - def tearDownClass(cls): - for model in [ - "test-trainer", - "test-trainer-epoch", - "test-trainer-step", - "test-trainer-tensorboard", - "test-trainer-tags", - ]: - try: - delete_repo(token=cls._token, repo_id=model) - except HTTPError: - pass - - try: - delete_repo(token=cls._token, repo_id="valid_org/test-trainer-org") - except HTTPError: - pass - def test_push_to_hub(self): - with tempfile.TemporaryDirectory() as tmp_dir: - trainer = get_regression_trainer( - output_dir=os.path.join(tmp_dir, "test-trainer"), - push_to_hub=True, - hub_token=self._token, - ) - url = trainer.push_to_hub() + with TemporaryHubRepo(token=self._token) as tmp_repo: + output_dir_name = tmp_repo.repo_name + with tempfile.TemporaryDirectory() as tmp_dir: + trainer = get_regression_trainer( + output_dir=os.path.join(tmp_dir, output_dir_name), + push_to_hub=True, + hub_token=self._token, + ) + url = trainer.push_to_hub() # Extract repo_name from the url re_search = re.search(ENDPOINT_STAGING + r"/([^/]+/[^/]+)/", url) self.assertTrue(re_search is not None) repo_name = re_search.groups()[0] - self.assertEqual(repo_name, f"{USER}/test-trainer") + self.assertEqual(repo_name, f"{USER}/{output_dir_name}") model = RegressionPreTrainedModel.from_pretrained(repo_name) self.assertEqual(model.a.item(), trainer.model.a.item()) self.assertEqual(model.b.item(), trainer.model.b.item()) def test_push_to_hub_in_organization(self): - with tempfile.TemporaryDirectory() as tmp_dir: - trainer = get_regression_trainer(output_dir=tmp_dir) - trainer.save_model() - trainer = get_regression_trainer( - output_dir=os.path.join(tmp_dir, "test-trainer-org"), - push_to_hub=True, - hub_model_id="valid_org/test-trainer-org", - hub_token=self._token, - ) - url = trainer.push_to_hub() + with TemporaryHubRepo(namespace="valid_org", token=self._token) as tmp_repo: + with tempfile.TemporaryDirectory() as tmp_dir: + trainer = get_regression_trainer(output_dir=tmp_dir) + trainer.save_model() + output_dir_name = tmp_repo.repo_name + trainer = get_regression_trainer( + output_dir=os.path.join(tmp_dir, output_dir_name), + push_to_hub=True, + hub_model_id=f"valid_org/{output_dir_name}", + hub_token=self._token, + ) + url = trainer.push_to_hub() # Extract repo_name from the url re_search = re.search(ENDPOINT_STAGING + r"/([^/]+/[^/]+)/", url) self.assertTrue(re_search is not None) repo_name = re_search.groups()[0] - self.assertEqual(repo_name, "valid_org/test-trainer-org") + self.assertEqual(repo_name, f"valid_org/{output_dir_name}") - model = RegressionPreTrainedModel.from_pretrained("valid_org/test-trainer-org") + model = RegressionPreTrainedModel.from_pretrained(f"valid_org/{output_dir_name}") self.assertEqual(model.a.item(), trainer.model.a.item()) self.assertEqual(model.b.item(), trainer.model.b.item()) @@ -4226,120 +4211,130 @@ def get_commit_history(self, repo): return [commit.strip() for commit in commits] def test_push_to_hub_with_saves_each_epoch(self): - with tempfile.TemporaryDirectory() as tmp_dir: - with self.assertLogs(level="WARNING") as logs: - trainer = get_regression_trainer( - output_dir=os.path.join(tmp_dir, "test-trainer-epoch"), - push_to_hub=True, - hub_token=self._token, - # To avoid any flakiness if the training goes faster than the uploads. - hub_always_push=True, - save_strategy="epoch", - ) - trainer.train() + with TemporaryHubRepo(token=self._token) as tmp_repo: + with tempfile.TemporaryDirectory() as tmp_dir: + with self.assertLogs(level="WARNING") as logs: + output_dir_name = tmp_repo.repo_name + trainer = get_regression_trainer( + output_dir=os.path.join(tmp_dir, output_dir_name), + push_to_hub=True, + hub_token=self._token, + # To avoid any flakiness if the training goes faster than the uploads. + hub_always_push=True, + save_strategy="epoch", + ) + trainer.train() - commits = list_repo_commits(f"{USER}/test-trainer-epoch", token=self._token) - commits = [c.title for c in commits] - self.assertIn("initial commit", commits) - self.assertIn("Training in progress, epoch 1", commits) - self.assertIn("Training in progress, epoch 2", commits) - # Epochs 3 and 4 are not guaranteed to be present (empty commits) - self.assertTrue(any("Skipping to prevent empty commit." in record.message for record in logs.records)) + commits = list_repo_commits(f"{USER}/{output_dir_name}", token=self._token) + commits = [c.title for c in commits] + self.assertIn("initial commit", commits) + self.assertIn("Training in progress, epoch 1", commits) + self.assertIn("Training in progress, epoch 2", commits) + # Epochs 3 and 4 are not guaranteed to be present (empty commits) + self.assertTrue(any("Skipping to prevent empty commit." in record.message for record in logs.records)) def test_push_to_hub_with_saves_each_n_steps(self): num_gpus = max(1, backend_device_count(torch_device)) if num_gpus > 2: self.skipTest(reason="More than 2 GPUs available") - with tempfile.TemporaryDirectory() as tmp_dir: - with self.assertLogs(level="WARNING") as logs: - trainer = get_regression_trainer( - output_dir=os.path.join(tmp_dir, "test-trainer-step"), - push_to_hub=True, - hub_token=self._token, - # To avoid any flakiness if the training goes faster than the uploads. - hub_always_push=True, - save_strategy="steps", - save_steps=5, - ) - trainer.train() + with TemporaryHubRepo(token=self._token) as tmp_repo: + with tempfile.TemporaryDirectory() as tmp_dir: + with self.assertLogs(level="WARNING") as logs: + output_dir_name = tmp_repo.repo_name + trainer = get_regression_trainer( + output_dir=os.path.join(tmp_dir, output_dir_name), + push_to_hub=True, + hub_token=self._token, + # To avoid any flakiness if the training goes faster than the uploads. + hub_always_push=True, + save_strategy="steps", + save_steps=5, + ) + trainer.train() - commits = list_repo_commits(f"{USER}/test-trainer-step", token=self._token) - commits = [c.title for c in commits] - self.assertIn("initial commit", commits) + commits = list_repo_commits(f"{USER}/{output_dir_name}", token=self._token) + commits = [c.title for c in commits] + self.assertIn("initial commit", commits) - # Some commits are skipped if nothing has changed - # We expect 1 commit per 5 epochs + 1 commit at the end - nb_empty_commits = len( - [record for record in logs.records if "Skipping to prevent empty commit." in record.message] - ) - nb_epoch_commits = len([commit for commit in commits if "Training in progress, step" in commit]) + # Some commits are skipped if nothing has changed + # We expect 1 commit per 5 epochs + 1 commit at the end + nb_empty_commits = len( + [record for record in logs.records if "Skipping to prevent empty commit." in record.message] + ) + nb_epoch_commits = len([commit for commit in commits if "Training in progress, step" in commit]) - # max_steps depend on the number of available GPUs - max_steps = math.ceil(trainer.args.num_train_epochs * len(trainer.get_train_dataloader())) - nb_expected_commits = len(range(5, max_steps, 5)) + # max_steps depend on the number of available GPUs + max_steps = math.ceil(trainer.args.num_train_epochs * len(trainer.get_train_dataloader())) + nb_expected_commits = len(range(5, max_steps, 5)) - # '>=' since final commit might be an empty commit as well (not deterministic) - self.assertGreaterEqual(nb_empty_commits + nb_epoch_commits, nb_expected_commits) + # '>=' since final commit might be an empty commit as well (not deterministic) + self.assertGreaterEqual(nb_empty_commits + nb_epoch_commits, nb_expected_commits) @require_tensorboard def test_push_to_hub_with_tensorboard_logs(self): - with tempfile.TemporaryDirectory() as tmp_dir: - trainer = get_regression_trainer( - output_dir=os.path.join(tmp_dir, "test-trainer-tensorboard"), - hub_token=self._token, - save_strategy="epoch", - report_to=["tensorboard"], - keep_report_to=True, - ) - trainer.train() - # Push the runs via `push_to_hub()` - trainer.push_to_hub() + with TemporaryHubRepo(token=self._token) as tmp_repo: + with tempfile.TemporaryDirectory() as tmp_dir: + output_dir_name = tmp_repo.repo_name + trainer = get_regression_trainer( + output_dir=os.path.join(tmp_dir, output_dir_name), + hub_token=self._token, + save_strategy="epoch", + report_to=["tensorboard"], + keep_report_to=True, + ) + trainer.train() + # Push the runs via `push_to_hub()` + trainer.push_to_hub() - files = list_repo_files(f"{USER}/test-trainer-tensorboard", token=self._token) - found_log = False - for f in files: - if len(f.split("runs")) > 1 and "events.out.tfevents" in f: - found_log = True + files = list_repo_files(f"{USER}/{output_dir_name}", token=self._token) + found_log = False + for f in files: + if len(f.split("runs")) > 1 and "events.out.tfevents" in f: + found_log = True - assert found_log is True, "No tensorboard log found in repo" + assert found_log is True, "No tensorboard log found in repo" def test_push_to_hub_tags(self): # Checks if `trainer.push_to_hub()` works correctly by adding the desired # tag without having to pass `tags` in `push_to_hub` # see: - with tempfile.TemporaryDirectory() as tmp_dir: - trainer = get_regression_trainer( - output_dir=os.path.join(tmp_dir, "test-trainer-tags"), - push_to_hub=True, - hub_token=self._token, - ) + with TemporaryHubRepo(token=self._token) as tmp_repo: + with tempfile.TemporaryDirectory() as tmp_dir: + output_dir_name = tmp_repo.repo_name + trainer = get_regression_trainer( + output_dir=os.path.join(tmp_dir, output_dir_name), + push_to_hub=True, + hub_token=self._token, + ) - trainer.model.add_model_tags(["test-trainer-tags"]) + trainer.model.add_model_tags(["test-trainer-tags"]) - url = trainer.push_to_hub() + url = trainer.push_to_hub() # Extract repo_name from the url re_search = re.search(ENDPOINT_STAGING + r"/([^/]+/[^/]+)/", url) self.assertTrue(re_search is not None) repo_name = re_search.groups()[0] - self.assertEqual(repo_name, f"{USER}/test-trainer-tags") + self.assertEqual(repo_name, f"{USER}/{output_dir_name}") model_card = ModelCard.load(repo_name) self.assertTrue("test-trainer-tags" in model_card.data.tags) def test_push_to_hub_with_revision(self): # Checks if `trainer.push_to_hub()` works correctly by adding revision - with tempfile.TemporaryDirectory() as tmp_dir: - trainer = get_regression_trainer( - output_dir=os.path.join(tmp_dir, "test-trainer-revision"), - push_to_hub=True, - hub_token=self._token, - ) - branch = "v1.0" - create_branch(repo_id=trainer.hub_model_id, branch=branch, token=self._token, exist_ok=True) - url = trainer.push_to_hub(revision=branch) + with TemporaryHubRepo(token=self._token) as tmp_repo: + with tempfile.TemporaryDirectory() as tmp_dir: + output_dir_name = tmp_repo.repo_name + trainer = get_regression_trainer( + output_dir=os.path.join(tmp_dir, output_dir_name), + push_to_hub=True, + hub_token=self._token, + ) + branch = "v1.0" + create_branch(repo_id=trainer.hub_model_id, branch=branch, token=self._token, exist_ok=True) + url = trainer.push_to_hub(revision=branch) # Extract branch from the url re_search = re.search(r"tree/([^/]+)/", url) diff --git a/tests/utils/test_configuration_utils.py b/tests/utils/test_configuration_utils.py index 35a651d0e59..a6408928f61 100644 --- a/tests/utils/test_configuration_utils.py +++ b/tests/utils/test_configuration_utils.py @@ -22,12 +22,12 @@ import warnings from pathlib import Path -from huggingface_hub import HfFolder, delete_repo +from huggingface_hub import HfFolder from requests.exceptions import HTTPError from transformers import AutoConfig, BertConfig, GPT2Config from transformers.configuration_utils import PretrainedConfig -from transformers.testing_utils import TOKEN, USER, is_staging_test +from transformers.testing_utils import TOKEN, TemporaryHubRepo, is_staging_test sys.path.append(str(Path(__file__).parent.parent.parent / "utils")) @@ -98,106 +98,72 @@ def setUpClass(cls): cls._token = TOKEN HfFolder.save_token(TOKEN) - @staticmethod - def _try_delete_repo(repo_id, token): - try: - # Reset repo - delete_repo(repo_id=repo_id, token=token) - except: # noqa E722 - pass - def test_push_to_hub(self): - with tempfile.TemporaryDirectory() as tmp_dir: - try: - tmp_repo = f"{USER}/test-config-{Path(tmp_dir).name}" - - config = BertConfig( - vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37 - ) - config.push_to_hub(tmp_repo, token=self._token) - - new_config = BertConfig.from_pretrained(tmp_repo) - for k, v in config.to_dict().items(): - if k != "transformers_version": - self.assertEqual(v, getattr(new_config, k)) - finally: - # Always (try to) delete the repo. - self._try_delete_repo(repo_id=tmp_repo, token=self._token) + with TemporaryHubRepo(token=self._token) as tmp_repo: + config = BertConfig( + vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37 + ) + config.push_to_hub(tmp_repo.repo_id, token=self._token) + + new_config = BertConfig.from_pretrained(tmp_repo.repo_id) + for k, v in config.to_dict().items(): + if k != "transformers_version": + self.assertEqual(v, getattr(new_config, k)) def test_push_to_hub_via_save_pretrained(self): - with tempfile.TemporaryDirectory() as tmp_dir: - try: - tmp_repo = f"{USER}/test-config-{Path(tmp_dir).name}" - - config = BertConfig( - vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37 - ) - # Push to hub via save_pretrained - config.save_pretrained(tmp_dir, repo_id=tmp_repo, push_to_hub=True, token=self._token) - - new_config = BertConfig.from_pretrained(tmp_repo) - for k, v in config.to_dict().items(): - if k != "transformers_version": - self.assertEqual(v, getattr(new_config, k)) - finally: - # Always (try to) delete the repo. - self._try_delete_repo(repo_id=tmp_repo, token=self._token) + with TemporaryHubRepo(token=self._token) as tmp_repo: + config = BertConfig( + vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37 + ) + # Push to hub via save_pretrained + with tempfile.TemporaryDirectory() as tmp_dir: + config.save_pretrained(tmp_dir, repo_id=tmp_repo.repo_id, push_to_hub=True, token=self._token) + + new_config = BertConfig.from_pretrained(tmp_repo.repo_id) + for k, v in config.to_dict().items(): + if k != "transformers_version": + self.assertEqual(v, getattr(new_config, k)) def test_push_to_hub_in_organization(self): - with tempfile.TemporaryDirectory() as tmp_dir: - try: - tmp_repo = f"valid_org/test-config-org-{Path(tmp_dir).name}" - config = BertConfig( - vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37 - ) - config.push_to_hub(tmp_repo, token=self._token) - - new_config = BertConfig.from_pretrained(tmp_repo) - for k, v in config.to_dict().items(): - if k != "transformers_version": - self.assertEqual(v, getattr(new_config, k)) - finally: - # Always (try to) delete the repo. - self._try_delete_repo(repo_id=tmp_repo, token=self._token) + with TemporaryHubRepo(namespace="valid_org", token=self._token) as tmp_repo: + config = BertConfig( + vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37 + ) + config.push_to_hub(tmp_repo.repo_id, token=self._token) + + new_config = BertConfig.from_pretrained(tmp_repo.repo_id) + for k, v in config.to_dict().items(): + if k != "transformers_version": + self.assertEqual(v, getattr(new_config, k)) def test_push_to_hub_in_organization_via_save_pretrained(self): - with tempfile.TemporaryDirectory() as tmp_dir: - try: - tmp_repo = f"valid_org/test-config-org-{Path(tmp_dir).name}" - config = BertConfig( - vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37 - ) - # Push to hub via save_pretrained - config.save_pretrained(tmp_dir, repo_id=tmp_repo, push_to_hub=True, token=self._token) - - new_config = BertConfig.from_pretrained(tmp_repo) - for k, v in config.to_dict().items(): - if k != "transformers_version": - self.assertEqual(v, getattr(new_config, k)) - finally: - # Always (try to) delete the repo. - self._try_delete_repo(repo_id=tmp_repo, token=self._token) + with TemporaryHubRepo(namespace="valid_org", token=self._token) as tmp_repo: + config = BertConfig( + vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37 + ) + # Push to hub via save_pretrained + with tempfile.TemporaryDirectory() as tmp_dir: + config.save_pretrained(tmp_dir, repo_id=tmp_repo.repo_id, push_to_hub=True, token=self._token) - def test_push_to_hub_dynamic_config(self): - with tempfile.TemporaryDirectory() as tmp_dir: - try: - tmp_repo = f"{USER}/test-dynamic-config-{Path(tmp_dir).name}" + new_config = BertConfig.from_pretrained(tmp_repo.repo_id) + for k, v in config.to_dict().items(): + if k != "transformers_version": + self.assertEqual(v, getattr(new_config, k)) - CustomConfig.register_for_auto_class() - config = CustomConfig(attribute=42) + def test_push_to_hub_dynamic_config(self): + with TemporaryHubRepo(token=self._token) as tmp_repo: + CustomConfig.register_for_auto_class() + config = CustomConfig(attribute=42) - config.push_to_hub(tmp_repo, token=self._token) + config.push_to_hub(tmp_repo.repo_id, token=self._token) - # This has added the proper auto_map field to the config - self.assertDictEqual(config.auto_map, {"AutoConfig": "custom_configuration.CustomConfig"}) + # This has added the proper auto_map field to the config + self.assertDictEqual(config.auto_map, {"AutoConfig": "custom_configuration.CustomConfig"}) - new_config = AutoConfig.from_pretrained(tmp_repo, trust_remote_code=True) - # Can't make an isinstance check because the new_config is from the FakeConfig class of a dynamic module - self.assertEqual(new_config.__class__.__name__, "CustomConfig") - self.assertEqual(new_config.attribute, 42) - finally: - # Always (try to) delete the repo. - self._try_delete_repo(repo_id=tmp_repo, token=self._token) + new_config = AutoConfig.from_pretrained(tmp_repo.repo_id, trust_remote_code=True) + # Can't make an isinstance check because the new_config is from the FakeConfig class of a dynamic module + self.assertEqual(new_config.__class__.__name__, "CustomConfig") + self.assertEqual(new_config.attribute, 42) class ConfigTestUtils(unittest.TestCase): diff --git a/tests/utils/test_feature_extraction_utils.py b/tests/utils/test_feature_extraction_utils.py index 0d4e4cfb486..06121dab5d3 100644 --- a/tests/utils/test_feature_extraction_utils.py +++ b/tests/utils/test_feature_extraction_utils.py @@ -20,11 +20,11 @@ import unittest.mock as mock from pathlib import Path -from huggingface_hub import HfFolder, delete_repo +from huggingface_hub import HfFolder from requests.exceptions import HTTPError from transformers import AutoFeatureExtractor, Wav2Vec2FeatureExtractor -from transformers.testing_utils import TOKEN, USER, get_tests_dir, is_staging_test +from transformers.testing_utils import TOKEN, TemporaryHubRepo, get_tests_dir, is_staging_test sys.path.append(str(Path(__file__).parent.parent.parent / "utils")) @@ -60,91 +60,63 @@ def setUpClass(cls): cls._token = TOKEN HfFolder.save_token(TOKEN) - @staticmethod - def _try_delete_repo(repo_id, token): - try: - # Reset repo - delete_repo(repo_id=repo_id, token=token) - except: # noqa E722 - pass - def test_push_to_hub(self): - with tempfile.TemporaryDirectory() as tmp_dir: - try: - tmp_repo = f"{USER}/test-feature-extractor-{Path(tmp_dir).name}" - - feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(SAMPLE_FEATURE_EXTRACTION_CONFIG_DIR) - feature_extractor.push_to_hub(tmp_repo, token=self._token) + with TemporaryHubRepo(token=self._token) as tmp_repo: + feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(SAMPLE_FEATURE_EXTRACTION_CONFIG_DIR) + feature_extractor.push_to_hub(tmp_repo.repo_id, token=self._token) - new_feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(tmp_repo) - for k, v in feature_extractor.__dict__.items(): - self.assertEqual(v, getattr(new_feature_extractor, k)) - finally: - # Always (try to) delete the repo. - self._try_delete_repo(repo_id=tmp_repo, token=self._token) + new_feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(tmp_repo.repo_id) + for k, v in feature_extractor.__dict__.items(): + self.assertEqual(v, getattr(new_feature_extractor, k)) def test_push_to_hub_via_save_pretrained(self): - with tempfile.TemporaryDirectory() as tmp_dir: - try: - tmp_repo = f"{USER}/test-feature-extractor-{Path(tmp_dir).name}" - feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(SAMPLE_FEATURE_EXTRACTION_CONFIG_DIR) - # Push to hub via save_pretrained - feature_extractor.save_pretrained(tmp_dir, repo_id=tmp_repo, push_to_hub=True, token=self._token) - - new_feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(tmp_repo) - for k, v in feature_extractor.__dict__.items(): - self.assertEqual(v, getattr(new_feature_extractor, k)) - finally: - # Always (try to) delete the repo. - self._try_delete_repo(repo_id=tmp_repo, token=self._token) + with TemporaryHubRepo(token=self._token) as tmp_repo: + feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(SAMPLE_FEATURE_EXTRACTION_CONFIG_DIR) + # Push to hub via save_pretrained + with tempfile.TemporaryDirectory() as tmp_dir: + feature_extractor.save_pretrained( + tmp_dir, repo_id=tmp_repo.repo_id, push_to_hub=True, token=self._token + ) + + new_feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(tmp_repo.repo_id) + for k, v in feature_extractor.__dict__.items(): + self.assertEqual(v, getattr(new_feature_extractor, k)) def test_push_to_hub_in_organization(self): - with tempfile.TemporaryDirectory() as tmp_dir: - try: - tmp_repo = f"valid_org/test-feature-extractor-{Path(tmp_dir).name}" - feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(SAMPLE_FEATURE_EXTRACTION_CONFIG_DIR) - feature_extractor.push_to_hub(tmp_repo, token=self._token) - - new_feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(tmp_repo) - for k, v in feature_extractor.__dict__.items(): - self.assertEqual(v, getattr(new_feature_extractor, k)) - finally: - # Always (try to) delete the repo. - self._try_delete_repo(repo_id=tmp_repo, token=self._token) + with TemporaryHubRepo(namespace="valid_org", token=self._token) as tmp_repo: + feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(SAMPLE_FEATURE_EXTRACTION_CONFIG_DIR) + feature_extractor.push_to_hub(tmp_repo.repo_id, token=self._token) + + new_feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(tmp_repo.repo_id) + for k, v in feature_extractor.__dict__.items(): + self.assertEqual(v, getattr(new_feature_extractor, k)) def test_push_to_hub_in_organization_via_save_pretrained(self): - with tempfile.TemporaryDirectory() as tmp_dir: - try: - tmp_repo = f"valid_org/test-feature-extractor-{Path(tmp_dir).name}" - feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(SAMPLE_FEATURE_EXTRACTION_CONFIG_DIR) - # Push to hub via save_pretrained - feature_extractor.save_pretrained(tmp_dir, repo_id=tmp_repo, push_to_hub=True, token=self._token) - - new_feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(tmp_repo) - for k, v in feature_extractor.__dict__.items(): - self.assertEqual(v, getattr(new_feature_extractor, k)) - finally: - # Always (try to) delete the repo. - self._try_delete_repo(repo_id=tmp_repo, token=self._token) + with TemporaryHubRepo(namespace="valid_org", token=self._token) as tmp_repo: + feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(SAMPLE_FEATURE_EXTRACTION_CONFIG_DIR) + # Push to hub via save_pretrained + with tempfile.TemporaryDirectory() as tmp_dir: + feature_extractor.save_pretrained( + tmp_dir, repo_id=tmp_repo.repo_id, push_to_hub=True, token=self._token + ) + + new_feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(tmp_repo.repo_id) + for k, v in feature_extractor.__dict__.items(): + self.assertEqual(v, getattr(new_feature_extractor, k)) def test_push_to_hub_dynamic_feature_extractor(self): - with tempfile.TemporaryDirectory() as tmp_dir: - try: - tmp_repo = f"{USER}/test-dynamic-feature-extractor-{Path(tmp_dir).name}" - CustomFeatureExtractor.register_for_auto_class() - feature_extractor = CustomFeatureExtractor.from_pretrained(SAMPLE_FEATURE_EXTRACTION_CONFIG_DIR) - - feature_extractor.push_to_hub(tmp_repo, token=self._token) - - # This has added the proper auto_map field to the config - self.assertDictEqual( - feature_extractor.auto_map, - {"AutoFeatureExtractor": "custom_feature_extraction.CustomFeatureExtractor"}, - ) + with TemporaryHubRepo(token=self._token) as tmp_repo: + CustomFeatureExtractor.register_for_auto_class() + feature_extractor = CustomFeatureExtractor.from_pretrained(SAMPLE_FEATURE_EXTRACTION_CONFIG_DIR) + + feature_extractor.push_to_hub(tmp_repo.repo_id, token=self._token) + + # This has added the proper auto_map field to the config + self.assertDictEqual( + feature_extractor.auto_map, + {"AutoFeatureExtractor": "custom_feature_extraction.CustomFeatureExtractor"}, + ) - new_feature_extractor = AutoFeatureExtractor.from_pretrained(tmp_repo, trust_remote_code=True) - # Can't make an isinstance check because the new_feature_extractor is from the CustomFeatureExtractor class of a dynamic module - self.assertEqual(new_feature_extractor.__class__.__name__, "CustomFeatureExtractor") - finally: - # Always (try to) delete the repo. - self._try_delete_repo(repo_id=tmp_repo, token=self._token) + new_feature_extractor = AutoFeatureExtractor.from_pretrained(tmp_repo.repo_id, trust_remote_code=True) + # Can't make an isinstance check because the new_feature_extractor is from the CustomFeatureExtractor class of a dynamic module + self.assertEqual(new_feature_extractor.__class__.__name__, "CustomFeatureExtractor") diff --git a/tests/utils/test_image_processing_utils.py b/tests/utils/test_image_processing_utils.py index c64dd94ec34..2e70d78978f 100644 --- a/tests/utils/test_image_processing_utils.py +++ b/tests/utils/test_image_processing_utils.py @@ -19,12 +19,12 @@ import unittest.mock as mock from pathlib import Path -from huggingface_hub import HfFolder, delete_repo +from huggingface_hub import HfFolder from requests.exceptions import HTTPError from transformers import AutoImageProcessor, ViTImageProcessor from transformers.image_processing_utils import get_size_dict -from transformers.testing_utils import TOKEN, USER, get_tests_dir, is_staging_test +from transformers.testing_utils import TOKEN, TemporaryHubRepo, get_tests_dir, is_staging_test sys.path.append(str(Path(__file__).parent.parent.parent / "utils")) @@ -71,93 +71,62 @@ def setUpClass(cls): cls._token = TOKEN HfFolder.save_token(TOKEN) - @staticmethod - def _try_delete_repo(repo_id, token): - try: - # Reset repo - delete_repo(repo_id=repo_id, token=token) - except: # noqa E722 - pass - def test_push_to_hub(self): - with tempfile.TemporaryDirectory() as tmp_dir: - try: - tmp_repo = f"{USER}/test-image-processor-{Path(tmp_dir).name}" - image_processor = ViTImageProcessor.from_pretrained(SAMPLE_IMAGE_PROCESSING_CONFIG_DIR) - image_processor.push_to_hub(tmp_repo, token=self._token) - - new_image_processor = ViTImageProcessor.from_pretrained(tmp_repo) - for k, v in image_processor.__dict__.items(): - self.assertEqual(v, getattr(new_image_processor, k)) - finally: - # Always (try to) delete the repo. - self._try_delete_repo(repo_id=tmp_repo, token=self._token) + with TemporaryHubRepo(token=self._token) as tmp_repo: + image_processor = ViTImageProcessor.from_pretrained(SAMPLE_IMAGE_PROCESSING_CONFIG_DIR) + image_processor.push_to_hub(tmp_repo.repo_id, token=self._token) + + new_image_processor = ViTImageProcessor.from_pretrained(tmp_repo.repo_id) + for k, v in image_processor.__dict__.items(): + self.assertEqual(v, getattr(new_image_processor, k)) def test_push_to_hub_via_save_pretrained(self): - with tempfile.TemporaryDirectory() as tmp_dir: - try: - tmp_repo = f"{USER}/test-image-processor-{Path(tmp_dir).name}" - image_processor = ViTImageProcessor.from_pretrained(SAMPLE_IMAGE_PROCESSING_CONFIG_DIR) - # Push to hub via save_pretrained - image_processor.save_pretrained(tmp_dir, repo_id=tmp_repo, push_to_hub=True, token=self._token) - - new_image_processor = ViTImageProcessor.from_pretrained(tmp_repo) - for k, v in image_processor.__dict__.items(): - self.assertEqual(v, getattr(new_image_processor, k)) - finally: - # Always (try to) delete the repo. - self._try_delete_repo(repo_id=tmp_repo, token=self._token) + with TemporaryHubRepo(token=self._token) as tmp_repo: + image_processor = ViTImageProcessor.from_pretrained(SAMPLE_IMAGE_PROCESSING_CONFIG_DIR) + # Push to hub via save_pretrained + with tempfile.TemporaryDirectory() as tmp_dir: + image_processor.save_pretrained(tmp_dir, repo_id=tmp_repo.repo_id, push_to_hub=True, token=self._token) + + new_image_processor = ViTImageProcessor.from_pretrained(tmp_repo.repo_id) + for k, v in image_processor.__dict__.items(): + self.assertEqual(v, getattr(new_image_processor, k)) def test_push_to_hub_in_organization(self): - with tempfile.TemporaryDirectory() as tmp_dir: - try: - tmp_repo = f"valid_org/test-image-processor-{Path(tmp_dir).name}" - image_processor = ViTImageProcessor.from_pretrained(SAMPLE_IMAGE_PROCESSING_CONFIG_DIR) - image_processor.push_to_hub(tmp_repo, token=self._token) - - new_image_processor = ViTImageProcessor.from_pretrained(tmp_repo) - for k, v in image_processor.__dict__.items(): - self.assertEqual(v, getattr(new_image_processor, k)) - finally: - # Always (try to) delete the repo. - self._try_delete_repo(repo_id=tmp_repo, token=self._token) + with TemporaryHubRepo(namespace="valid_org", token=self._token) as tmp_repo: + image_processor = ViTImageProcessor.from_pretrained(SAMPLE_IMAGE_PROCESSING_CONFIG_DIR) + image_processor.push_to_hub(tmp_repo.repo_id, token=self._token) + + new_image_processor = ViTImageProcessor.from_pretrained(tmp_repo.repo_id) + for k, v in image_processor.__dict__.items(): + self.assertEqual(v, getattr(new_image_processor, k)) def test_push_to_hub_in_organization_via_save_pretrained(self): - with tempfile.TemporaryDirectory() as tmp_dir: - try: - tmp_repo = f"valid_org/test-image-processor-{Path(tmp_dir).name}" - image_processor = ViTImageProcessor.from_pretrained(SAMPLE_IMAGE_PROCESSING_CONFIG_DIR) - # Push to hub via save_pretrained - image_processor.save_pretrained(tmp_dir, repo_id=tmp_repo, push_to_hub=True, token=self._token) - - new_image_processor = ViTImageProcessor.from_pretrained(tmp_repo) - for k, v in image_processor.__dict__.items(): - self.assertEqual(v, getattr(new_image_processor, k)) - finally: - # Always (try to) delete the repo. - self._try_delete_repo(repo_id=tmp_repo, token=self._token) + with TemporaryHubRepo(namespace="valid_org", token=self._token) as tmp_repo: + image_processor = ViTImageProcessor.from_pretrained(SAMPLE_IMAGE_PROCESSING_CONFIG_DIR) + # Push to hub via save_pretrained + with tempfile.TemporaryDirectory() as tmp_dir: + image_processor.save_pretrained(tmp_dir, repo_id=tmp_repo.repo_id, push_to_hub=True, token=self._token) + + new_image_processor = ViTImageProcessor.from_pretrained(tmp_repo.repo_id) + for k, v in image_processor.__dict__.items(): + self.assertEqual(v, getattr(new_image_processor, k)) def test_push_to_hub_dynamic_image_processor(self): - with tempfile.TemporaryDirectory() as tmp_dir: - try: - tmp_repo = f"{USER}/test-dynamic-image-processor-{Path(tmp_dir).name}" - CustomImageProcessor.register_for_auto_class() - image_processor = CustomImageProcessor.from_pretrained(SAMPLE_IMAGE_PROCESSING_CONFIG_DIR) - - image_processor.push_to_hub(tmp_repo, token=self._token) - - # This has added the proper auto_map field to the config - self.assertDictEqual( - image_processor.auto_map, - {"AutoImageProcessor": "custom_image_processing.CustomImageProcessor"}, - ) - - new_image_processor = AutoImageProcessor.from_pretrained(tmp_repo, trust_remote_code=True) - # Can't make an isinstance check because the new_image_processor is from the CustomImageProcessor class of a dynamic module - self.assertEqual(new_image_processor.__class__.__name__, "CustomImageProcessor") - finally: - # Always (try to) delete the repo. - self._try_delete_repo(repo_id=tmp_repo, token=self._token) + with TemporaryHubRepo(token=self._token) as tmp_repo: + CustomImageProcessor.register_for_auto_class() + image_processor = CustomImageProcessor.from_pretrained(SAMPLE_IMAGE_PROCESSING_CONFIG_DIR) + + image_processor.push_to_hub(tmp_repo.repo_id, token=self._token) + + # This has added the proper auto_map field to the config + self.assertDictEqual( + image_processor.auto_map, + {"AutoImageProcessor": "custom_image_processing.CustomImageProcessor"}, + ) + + new_image_processor = AutoImageProcessor.from_pretrained(tmp_repo.repo_id, trust_remote_code=True) + # Can't make an isinstance check because the new_image_processor is from the CustomImageProcessor class of a dynamic module + self.assertEqual(new_image_processor.__class__.__name__, "CustomImageProcessor") class ImageProcessingUtilsTester(unittest.TestCase): diff --git a/tests/utils/test_modeling_flax_utils.py b/tests/utils/test_modeling_flax_utils.py index 3f86765f333..7f66944446a 100644 --- a/tests/utils/test_modeling_flax_utils.py +++ b/tests/utils/test_modeling_flax_utils.py @@ -14,16 +14,15 @@ import tempfile import unittest -from pathlib import Path import numpy as np -from huggingface_hub import HfFolder, delete_repo, snapshot_download +from huggingface_hub import HfFolder, snapshot_download from transformers import BertConfig, BertModel, is_flax_available, is_torch_available from transformers.testing_utils import ( TOKEN, - USER, CaptureLogger, + TemporaryHubRepo, is_pt_flax_cross_test, is_staging_test, require_flax, @@ -55,103 +54,77 @@ def setUpClass(cls): cls._token = TOKEN HfFolder.save_token(TOKEN) - @staticmethod - def _try_delete_repo(repo_id, token): - try: - # Reset repo - delete_repo(repo_id=repo_id, token=token) - except: # noqa E722 - pass - def test_push_to_hub(self): - with tempfile.TemporaryDirectory() as tmp_dir: - try: - tmp_repo = f"{USER}/test-model-flax-{Path(tmp_dir).name}" - config = BertConfig( - vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37 - ) - model = FlaxBertModel(config) - model.push_to_hub(tmp_repo, token=self._token) - - new_model = FlaxBertModel.from_pretrained(tmp_repo) - - base_params = flatten_dict(unfreeze(model.params)) - new_params = flatten_dict(unfreeze(new_model.params)) - - for key in base_params.keys(): - max_diff = (base_params[key] - new_params[key]).sum().item() - self.assertLessEqual(max_diff, 1e-3, msg=f"{key} not identical") - finally: - # Always (try to) delete the repo. - self._try_delete_repo(repo_id=tmp_repo, token=self._token) + with TemporaryHubRepo(token=self._token) as tmp_repo: + config = BertConfig( + vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37 + ) + model = FlaxBertModel(config) + model.push_to_hub(tmp_repo.repo_id, token=self._token) + + new_model = FlaxBertModel.from_pretrained(tmp_repo.repo_id) + + base_params = flatten_dict(unfreeze(model.params)) + new_params = flatten_dict(unfreeze(new_model.params)) + + for key in base_params.keys(): + max_diff = (base_params[key] - new_params[key]).sum().item() + self.assertLessEqual(max_diff, 1e-3, msg=f"{key} not identical") def test_push_to_hub_via_save_pretrained(self): - with tempfile.TemporaryDirectory() as tmp_dir: - try: - tmp_repo = f"{USER}/test-model-flax-{Path(tmp_dir).name}" - config = BertConfig( - vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37 - ) - model = FlaxBertModel(config) - # Push to hub via save_pretrained - model.save_pretrained(tmp_dir, repo_id=tmp_repo, push_to_hub=True, token=self._token) - - new_model = FlaxBertModel.from_pretrained(tmp_repo) - - base_params = flatten_dict(unfreeze(model.params)) - new_params = flatten_dict(unfreeze(new_model.params)) - - for key in base_params.keys(): - max_diff = (base_params[key] - new_params[key]).sum().item() - self.assertLessEqual(max_diff, 1e-3, msg=f"{key} not identical") - finally: - # Always (try to) delete the repo. - self._try_delete_repo(repo_id=tmp_repo, token=self._token) + with TemporaryHubRepo(token=self._token) as tmp_repo: + config = BertConfig( + vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37 + ) + model = FlaxBertModel(config) + # Push to hub via save_pretrained + with tempfile.TemporaryDirectory() as tmp_dir: + model.save_pretrained(tmp_dir, repo_id=tmp_repo.repo_id, push_to_hub=True, token=self._token) + + new_model = FlaxBertModel.from_pretrained(tmp_repo.repo_id) + + base_params = flatten_dict(unfreeze(model.params)) + new_params = flatten_dict(unfreeze(new_model.params)) + + for key in base_params.keys(): + max_diff = (base_params[key] - new_params[key]).sum().item() + self.assertLessEqual(max_diff, 1e-3, msg=f"{key} not identical") def test_push_to_hub_in_organization(self): - with tempfile.TemporaryDirectory() as tmp_dir: - try: - tmp_repo = f"valid_org/test-model-flax-org-{Path(tmp_dir).name}" - config = BertConfig( - vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37 - ) - model = FlaxBertModel(config) - model.push_to_hub(tmp_repo, token=self._token) - - new_model = FlaxBertModel.from_pretrained(tmp_repo) - - base_params = flatten_dict(unfreeze(model.params)) - new_params = flatten_dict(unfreeze(new_model.params)) - - for key in base_params.keys(): - max_diff = (base_params[key] - new_params[key]).sum().item() - self.assertLessEqual(max_diff, 1e-3, msg=f"{key} not identical") - finally: - # Always (try to) delete the repo. - self._try_delete_repo(repo_id=tmp_repo, token=self._token) + with TemporaryHubRepo(namespace="valid_org", token=self._token) as tmp_repo: + config = BertConfig( + vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37 + ) + model = FlaxBertModel(config) + model.push_to_hub(tmp_repo.repo_id, token=self._token) + + new_model = FlaxBertModel.from_pretrained(tmp_repo.repo_id) + + base_params = flatten_dict(unfreeze(model.params)) + new_params = flatten_dict(unfreeze(new_model.params)) + + for key in base_params.keys(): + max_diff = (base_params[key] - new_params[key]).sum().item() + self.assertLessEqual(max_diff, 1e-3, msg=f"{key} not identical") def test_push_to_hub_in_organization_via_save_pretrained(self): - with tempfile.TemporaryDirectory() as tmp_dir: - try: - tmp_repo = f"valid_org/test-model-flax-org-{Path(tmp_dir).name}" - config = BertConfig( - vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37 - ) - model = FlaxBertModel(config) - # Push to hub via save_pretrained - model.save_pretrained(tmp_dir, repo_id=tmp_repo, push_to_hub=True, token=self._token) - - new_model = FlaxBertModel.from_pretrained(tmp_repo) - - base_params = flatten_dict(unfreeze(model.params)) - new_params = flatten_dict(unfreeze(new_model.params)) - - for key in base_params.keys(): - max_diff = (base_params[key] - new_params[key]).sum().item() - self.assertLessEqual(max_diff, 1e-3, msg=f"{key} not identical") - finally: - # Always (try to) delete the repo. - self._try_delete_repo(repo_id=tmp_repo, token=self._token) + with TemporaryHubRepo(namespace="valid_org", token=self._token) as tmp_repo: + config = BertConfig( + vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37 + ) + model = FlaxBertModel(config) + # Push to hub via save_pretrained + with tempfile.TemporaryDirectory() as tmp_dir: + model.save_pretrained(tmp_dir, repo_id=tmp_repo.repo_id, push_to_hub=True, token=self._token) + + new_model = FlaxBertModel.from_pretrained(tmp_repo.repo_id) + + base_params = flatten_dict(unfreeze(model.params)) + new_params = flatten_dict(unfreeze(new_model.params)) + + for key in base_params.keys(): + max_diff = (base_params[key] - new_params[key]).sum().item() + self.assertLessEqual(max_diff, 1e-3, msg=f"{key} not identical") def check_models_equal(model1, model2): diff --git a/tests/utils/test_modeling_tf_utils.py b/tests/utils/test_modeling_tf_utils.py index 9ad607f4588..995618a5205 100644 --- a/tests/utils/test_modeling_tf_utils.py +++ b/tests/utils/test_modeling_tf_utils.py @@ -23,9 +23,8 @@ import tempfile import unittest import unittest.mock as mock -from pathlib import Path -from huggingface_hub import HfFolder, Repository, delete_repo, snapshot_download +from huggingface_hub import HfFolder, Repository, snapshot_download from requests.exceptions import HTTPError from transformers import is_tf_available, is_torch_available @@ -34,6 +33,7 @@ TOKEN, USER, CaptureLogger, + TemporaryHubRepo, _tf_gpu_memory_limit, is_pt_tf_cross_test, is_staging_test, @@ -683,149 +683,119 @@ def setUpClass(cls): cls._token = TOKEN HfFolder.save_token(TOKEN) - @staticmethod - def _try_delete_repo(repo_id, token): - try: - # Reset repo - delete_repo(repo_id=repo_id, token=token) - except: # noqa E722 - pass - def test_push_to_hub(self): - with tempfile.TemporaryDirectory() as tmp_dir: - try: - tmp_repo = f"{USER}/test-model-tf-{Path(tmp_dir).name}" - config = BertConfig( - vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37 - ) - model = TFBertModel(config) - # Make sure model is properly initialized - model.build_in_name_scope() - - logging.set_verbosity_info() - logger = logging.get_logger("transformers.utils.hub") - with CaptureLogger(logger) as cl: - model.push_to_hub(tmp_repo, token=self._token) - logging.set_verbosity_warning() - # Check the model card was created and uploaded. - self.assertIn("Uploading the following files to __DUMMY_TRANSFORMERS_USER__/test-model-tf", cl.out) - - new_model = TFBertModel.from_pretrained(tmp_repo) - models_equal = True - for p1, p2 in zip(model.weights, new_model.weights): - if not tf.math.reduce_all(p1 == p2): - models_equal = False - break - self.assertTrue(models_equal) - finally: - # Always (try to) delete the repo. - self._try_delete_repo(repo_id=tmp_repo, token=self._token) + with TemporaryHubRepo(token=self._token) as tmp_repo: + config = BertConfig( + vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37 + ) + model = TFBertModel(config) + # Make sure model is properly initialized + model.build_in_name_scope() + + logging.set_verbosity_info() + logger = logging.get_logger("transformers.utils.hub") + with CaptureLogger(logger) as cl: + model.push_to_hub(tmp_repo.repo_id, token=self._token) + logging.set_verbosity_warning() + # Check the model card was created and uploaded. + self.assertIn("Uploading the following files to __DUMMY_TRANSFORMERS_USER__/test-model-tf", cl.out) + + new_model = TFBertModel.from_pretrained(tmp_repo.repo_id) + models_equal = True + for p1, p2 in zip(model.weights, new_model.weights): + if not tf.math.reduce_all(p1 == p2): + models_equal = False + break + self.assertTrue(models_equal) def test_push_to_hub_via_save_pretrained(self): - with tempfile.TemporaryDirectory() as tmp_dir: - try: - tmp_repo = f"{USER}/test-model-tf-{Path(tmp_dir).name}" - config = BertConfig( - vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37 - ) - model = TFBertModel(config) - # Make sure model is properly initialized - model.build_in_name_scope() + with TemporaryHubRepo(token=self._token) as tmp_repo: + config = BertConfig( + vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37 + ) + model = TFBertModel(config) + # Make sure model is properly initialized + model.build_in_name_scope() - # Push to hub via save_pretrained - model.save_pretrained(tmp_dir, repo_id=tmp_repo, push_to_hub=True, token=self._token) + # Push to hub via save_pretrained + with tempfile.TemporaryDirectory() as tmp_dir: + model.save_pretrained(tmp_dir, repo_id=tmp_repo.repo_id, push_to_hub=True, token=self._token) - new_model = TFBertModel.from_pretrained(tmp_repo) - models_equal = True - for p1, p2 in zip(model.weights, new_model.weights): - if not tf.math.reduce_all(p1 == p2): - models_equal = False - break - self.assertTrue(models_equal) - finally: - # Always (try to) delete the repo. - self._try_delete_repo(repo_id=tmp_repo, token=self._token) + new_model = TFBertModel.from_pretrained(tmp_repo.repo_id) + models_equal = True + for p1, p2 in zip(model.weights, new_model.weights): + if not tf.math.reduce_all(p1 == p2): + models_equal = False + break + self.assertTrue(models_equal) @is_pt_tf_cross_test def test_push_to_hub_callback(self): - with tempfile.TemporaryDirectory() as tmp_dir: - try: - tmp_repo = f"{USER}/test-model-tf-callback-{Path(tmp_dir).name}" - config = BertConfig( - vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37 - ) - model = TFBertForMaskedLM(config) - model.compile() + with TemporaryHubRepo(token=self._token) as tmp_repo: + config = BertConfig( + vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37 + ) + model = TFBertForMaskedLM(config) + model.compile() + with tempfile.TemporaryDirectory() as tmp_dir: push_to_hub_callback = PushToHubCallback( output_dir=tmp_dir, - hub_model_id=tmp_repo, + hub_model_id=tmp_repo.repo_id, hub_token=self._token, ) model.fit(model.dummy_inputs, model.dummy_inputs, epochs=1, callbacks=[push_to_hub_callback]) - new_model = TFBertForMaskedLM.from_pretrained(tmp_repo) - models_equal = True - for p1, p2 in zip(model.weights, new_model.weights): - if not tf.math.reduce_all(p1 == p2): - models_equal = False - break - self.assertTrue(models_equal) - - tf_push_to_hub_params = dict(inspect.signature(TFPreTrainedModel.push_to_hub).parameters) - tf_push_to_hub_params.pop("base_model_card_args") - pt_push_to_hub_params = dict(inspect.signature(PreTrainedModel.push_to_hub).parameters) - pt_push_to_hub_params.pop("deprecated_kwargs") - self.assertDictEaual(tf_push_to_hub_params, pt_push_to_hub_params) - finally: - # Always (try to) delete the repo. - self._try_delete_repo(repo_id=tmp_repo, token=self._token) + new_model = TFBertForMaskedLM.from_pretrained(tmp_repo.repo_id) + models_equal = True + for p1, p2 in zip(model.weights, new_model.weights): + if not tf.math.reduce_all(p1 == p2): + models_equal = False + break + self.assertTrue(models_equal) + + tf_push_to_hub_params = dict(inspect.signature(TFPreTrainedModel.push_to_hub).parameters) + tf_push_to_hub_params.pop("base_model_card_args") + pt_push_to_hub_params = dict(inspect.signature(PreTrainedModel.push_to_hub).parameters) + pt_push_to_hub_params.pop("deprecated_kwargs") + self.assertDictEaual(tf_push_to_hub_params, pt_push_to_hub_params) def test_push_to_hub_in_organization(self): - with tempfile.TemporaryDirectory() as tmp_dir: - try: - tmp_repo = f"valid_org/test-model-tf-org-{Path(tmp_dir).name}" - config = BertConfig( - vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37 - ) - model = TFBertModel(config) - # Make sure model is properly initialized - model.build_in_name_scope() + with TemporaryHubRepo(namespace="valid_org", token=self._token) as tmp_repo: + config = BertConfig( + vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37 + ) + model = TFBertModel(config) + # Make sure model is properly initialized + model.build_in_name_scope() - model.push_to_hub(tmp_repo, token=self._token) + model.push_to_hub(tmp_repo.repo_id, token=self._token) - new_model = TFBertModel.from_pretrained(tmp_repo) - models_equal = True - for p1, p2 in zip(model.weights, new_model.weights): - if not tf.math.reduce_all(p1 == p2): - models_equal = False - break - self.assertTrue(models_equal) - finally: - # Always (try to) delete the repo. - self._try_delete_repo(repo_id=tmp_repo, token=self._token) + new_model = TFBertModel.from_pretrained(tmp_repo.repo_id) + models_equal = True + for p1, p2 in zip(model.weights, new_model.weights): + if not tf.math.reduce_all(p1 == p2): + models_equal = False + break + self.assertTrue(models_equal) def test_push_to_hub_in_organization_via_save_pretrained(self): - with tempfile.TemporaryDirectory() as tmp_dir: - try: - tmp_repo = f"valid_org/test-model-tf-org-{Path(tmp_dir).name}" - config = BertConfig( - vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37 - ) - model = TFBertModel(config) - # Make sure model is properly initialized - model.build_in_name_scope() + with TemporaryHubRepo(namespace="valid_org", token=self._token) as tmp_repo: + config = BertConfig( + vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37 + ) + model = TFBertModel(config) + # Make sure model is properly initialized + model.build_in_name_scope() - # Push to hub via save_pretrained - model.save_pretrained(tmp_dir, push_to_hub=True, token=self._token, repo_id=tmp_repo) + # Push to hub via save_pretrained + with tempfile.TemporaryDirectory() as tmp_dir: + model.save_pretrained(tmp_dir, push_to_hub=True, token=self._token, repo_id=tmp_repo.repo_id) - new_model = TFBertModel.from_pretrained(tmp_repo) - models_equal = True - for p1, p2 in zip(model.weights, new_model.weights): - if not tf.math.reduce_all(p1 == p2): - models_equal = False - break - self.assertTrue(models_equal) - finally: - # Always (try to) delete the repo. - self._try_delete_repo(repo_id=tmp_repo, token=self._token) + new_model = TFBertModel.from_pretrained(tmp_repo.repo_id) + models_equal = True + for p1, p2 in zip(model.weights, new_model.weights): + if not tf.math.reduce_all(p1 == p2): + models_equal = False + break + self.assertTrue(models_equal) diff --git a/tests/utils/test_modeling_utils.py b/tests/utils/test_modeling_utils.py index 85e7c20dd52..458ddeee5ff 100644 --- a/tests/utils/test_modeling_utils.py +++ b/tests/utils/test_modeling_utils.py @@ -28,7 +28,7 @@ from pathlib import Path import requests -from huggingface_hub import HfApi, HfFolder, delete_repo +from huggingface_hub import HfApi, HfFolder from pytest import mark from requests.exceptions import HTTPError @@ -44,9 +44,9 @@ ) from transformers.testing_utils import ( TOKEN, - USER, CaptureLogger, LoggingLevel, + TemporaryHubRepo, TestCasePlus, is_staging_test, require_accelerate, @@ -2000,168 +2000,127 @@ def setUpClass(cls): cls._token = TOKEN HfFolder.save_token(TOKEN) - @staticmethod - def _try_delete_repo(repo_id, token): - try: - # Reset repo - delete_repo(repo_id=repo_id, token=token) - except: # noqa E722 - pass - @unittest.skip(reason="This test is flaky") def test_push_to_hub(self): - with tempfile.TemporaryDirectory() as tmp_dir: - try: - tmp_repo = f"{USER}/test-model-{Path(tmp_dir).name}" - config = BertConfig( - vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37 - ) - model = BertModel(config) - model.push_to_hub(tmp_repo, token=self._token) + with TemporaryHubRepo(token=self._token) as tmp_repo: + config = BertConfig( + vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37 + ) + model = BertModel(config) + model.push_to_hub(tmp_repo.repo_id, token=self._token) - new_model = BertModel.from_pretrained(tmp_repo) - for p1, p2 in zip(model.parameters(), new_model.parameters()): - self.assertTrue(torch.equal(p1, p2)) - finally: - # Always (try to) delete the repo. - self._try_delete_repo(repo_id=tmp_repo, token=self._token) + new_model = BertModel.from_pretrained(tmp_repo.repo_id) + for p1, p2 in zip(model.parameters(), new_model.parameters()): + self.assertTrue(torch.equal(p1, p2)) @unittest.skip(reason="This test is flaky") def test_push_to_hub_via_save_pretrained(self): - with tempfile.TemporaryDirectory() as tmp_dir: - try: - tmp_repo = f"{USER}/test-model-{Path(tmp_dir).name}" - config = BertConfig( - vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37 - ) - model = BertModel(config) - # Push to hub via save_pretrained - model.save_pretrained(tmp_dir, repo_id=tmp_repo, push_to_hub=True, token=self._token) + with TemporaryHubRepo(token=self._token) as tmp_repo: + config = BertConfig( + vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37 + ) + model = BertModel(config) + # Push to hub via save_pretrained + with tempfile.TemporaryDirectory() as tmp_dir: + model.save_pretrained(tmp_dir, repo_id=tmp_repo.repo_id, push_to_hub=True, token=self._token) - new_model = BertModel.from_pretrained(tmp_repo) - for p1, p2 in zip(model.parameters(), new_model.parameters()): - self.assertTrue(torch.equal(p1, p2)) - finally: - # Always (try to) delete the repo. - self._try_delete_repo(repo_id=tmp_repo, token=self._token) + new_model = BertModel.from_pretrained(tmp_repo.repo_id) + for p1, p2 in zip(model.parameters(), new_model.parameters()): + self.assertTrue(torch.equal(p1, p2)) def test_push_to_hub_with_description(self): - with tempfile.TemporaryDirectory() as tmp_dir: - try: - tmp_repo = f"{USER}/test-model-{Path(tmp_dir).name}" - config = BertConfig( - vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37 - ) - model = BertModel(config) - COMMIT_DESCRIPTION = """ + with TemporaryHubRepo(token=self._token) as tmp_repo: + config = BertConfig( + vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37 + ) + model = BertModel(config) + COMMIT_DESCRIPTION = """ The commit description supports markdown synthax see: ```python >>> form transformers import AutoConfig >>> config = AutoConfig.from_pretrained("google-bert/bert-base-uncased") ``` """ - commit_details = model.push_to_hub( - tmp_repo, use_auth_token=self._token, create_pr=True, commit_description=COMMIT_DESCRIPTION - ) - self.assertEqual(commit_details.commit_description, COMMIT_DESCRIPTION) - finally: - # Always (try to) delete the repo. - self._try_delete_repo(repo_id=tmp_repo, token=self._token) + commit_details = model.push_to_hub( + tmp_repo.repo_id, use_auth_token=self._token, create_pr=True, commit_description=COMMIT_DESCRIPTION + ) + self.assertEqual(commit_details.commit_description, COMMIT_DESCRIPTION) @unittest.skip(reason="This test is flaky") def test_push_to_hub_in_organization(self): - with tempfile.TemporaryDirectory() as tmp_dir: - try: - tmp_repo = f"valid_org/test-model-org-{Path(tmp_dir).name}" - config = BertConfig( - vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37 - ) - model = BertModel(config) - model.push_to_hub(tmp_repo, token=self._token) + with TemporaryHubRepo(namespace="valid_org", token=self._token) as tmp_repo: + config = BertConfig( + vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37 + ) + model = BertModel(config) + model.push_to_hub(tmp_repo.repo_id, token=self._token) - new_model = BertModel.from_pretrained(tmp_repo) - for p1, p2 in zip(model.parameters(), new_model.parameters()): - self.assertTrue(torch.equal(p1, p2)) - finally: - # Always (try to) delete the repo. - self._try_delete_repo(repo_id=tmp_repo, token=self._token) + new_model = BertModel.from_pretrained(tmp_repo.repo_id) + for p1, p2 in zip(model.parameters(), new_model.parameters()): + self.assertTrue(torch.equal(p1, p2)) @unittest.skip(reason="This test is flaky") def test_push_to_hub_in_organization_via_save_pretrained(self): - with tempfile.TemporaryDirectory() as tmp_dir: - try: - tmp_repo = f"valid_org/test-model-org-{Path(tmp_dir).name}" - config = BertConfig( - vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37 - ) - model = BertModel(config) - # Push to hub via save_pretrained - model.save_pretrained(tmp_dir, push_to_hub=True, token=self._token, repo_id=tmp_repo) + with TemporaryHubRepo(namespace="valid_org", token=self._token) as tmp_repo: + config = BertConfig( + vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37 + ) + model = BertModel(config) + # Push to hub via save_pretrained + with tempfile.TemporaryDirectory() as tmp_dir: + model.save_pretrained(tmp_dir, push_to_hub=True, token=self._token, repo_id=tmp_repo.repo_id) - new_model = BertModel.from_pretrained(tmp_repo) - for p1, p2 in zip(model.parameters(), new_model.parameters()): - self.assertTrue(torch.equal(p1, p2)) - finally: - # Always (try to) delete the repo. - self._try_delete_repo(repo_id=tmp_repo, token=self._token) + new_model = BertModel.from_pretrained(tmp_repo.repo_id) + for p1, p2 in zip(model.parameters(), new_model.parameters()): + self.assertTrue(torch.equal(p1, p2)) def test_push_to_hub_dynamic_model(self): - with tempfile.TemporaryDirectory() as tmp_dir: - try: - tmp_repo = f"{USER}/test-dynamic-model-{Path(tmp_dir).name}" - CustomConfig.register_for_auto_class() - CustomModel.register_for_auto_class() - - config = CustomConfig(hidden_size=32) - model = CustomModel(config) - - model.push_to_hub(tmp_repo, token=self._token) - # checks - self.assertDictEqual( - config.auto_map, - {"AutoConfig": "custom_configuration.CustomConfig", "AutoModel": "custom_modeling.CustomModel"}, - ) + with TemporaryHubRepo(token=self._token) as tmp_repo: + CustomConfig.register_for_auto_class() + CustomModel.register_for_auto_class() + + config = CustomConfig(hidden_size=32) + model = CustomModel(config) + + model.push_to_hub(tmp_repo.repo_id, token=self._token) + # checks + self.assertDictEqual( + config.auto_map, + {"AutoConfig": "custom_configuration.CustomConfig", "AutoModel": "custom_modeling.CustomModel"}, + ) - new_model = AutoModel.from_pretrained(tmp_repo, trust_remote_code=True) - # Can't make an isinstance check because the new_model is from the CustomModel class of a dynamic module - self.assertEqual(new_model.__class__.__name__, "CustomModel") - for p1, p2 in zip(model.parameters(), new_model.parameters()): - self.assertTrue(torch.equal(p1, p2)) + new_model = AutoModel.from_pretrained(tmp_repo.repo_id, trust_remote_code=True) + # Can't make an isinstance check because the new_model is from the CustomModel class of a dynamic module + self.assertEqual(new_model.__class__.__name__, "CustomModel") + for p1, p2 in zip(model.parameters(), new_model.parameters()): + self.assertTrue(torch.equal(p1, p2)) - config = AutoConfig.from_pretrained(tmp_repo, trust_remote_code=True) - new_model = AutoModel.from_config(config, trust_remote_code=True) - self.assertEqual(new_model.__class__.__name__, "CustomModel") - finally: - # Always (try to) delete the repo. - self._try_delete_repo(repo_id=tmp_repo, token=self._token) + config = AutoConfig.from_pretrained(tmp_repo.repo_id, trust_remote_code=True) + new_model = AutoModel.from_config(config, trust_remote_code=True) + self.assertEqual(new_model.__class__.__name__, "CustomModel") def test_push_to_hub_with_tags(self): - with tempfile.TemporaryDirectory() as tmp_dir: - try: - tmp_repo = f"{USER}/test-dynamic-model-with-tags-{Path(tmp_dir).name}" - from huggingface_hub import ModelCard + with TemporaryHubRepo(token=self._token) as tmp_repo: + from huggingface_hub import ModelCard - new_tags = ["tag-1", "tag-2"] + new_tags = ["tag-1", "tag-2"] - CustomConfig.register_for_auto_class() - CustomModel.register_for_auto_class() + CustomConfig.register_for_auto_class() + CustomModel.register_for_auto_class() - config = CustomConfig(hidden_size=32) - model = CustomModel(config) + config = CustomConfig(hidden_size=32) + model = CustomModel(config) - self.assertTrue(model.model_tags is None) + self.assertTrue(model.model_tags is None) - model.add_model_tags(new_tags) + model.add_model_tags(new_tags) - self.assertTrue(model.model_tags == new_tags) + self.assertTrue(model.model_tags == new_tags) - model.push_to_hub(tmp_repo, token=self._token) + model.push_to_hub(tmp_repo.repo_id, token=self._token) - loaded_model_card = ModelCard.load(tmp_repo) - self.assertEqual(loaded_model_card.data.tags, new_tags) - finally: - # Always (try to) delete the repo. - self._try_delete_repo(repo_id=tmp_repo, token=self._token) + loaded_model_card = ModelCard.load(tmp_repo.repo_id) + self.assertEqual(loaded_model_card.data.tags, new_tags) @require_torch diff --git a/tests/utils/test_tokenization_utils.py b/tests/utils/test_tokenization_utils.py index 3600be91a7c..0c28f24f4ca 100644 --- a/tests/utils/test_tokenization_utils.py +++ b/tests/utils/test_tokenization_utils.py @@ -20,7 +20,7 @@ import unittest.mock as mock from pathlib import Path -from huggingface_hub import HfFolder, delete_repo +from huggingface_hub import HfFolder from huggingface_hub.file_download import http_get from requests.exceptions import HTTPError @@ -32,7 +32,7 @@ GPT2TokenizerFast, is_tokenizers_available, ) -from transformers.testing_utils import TOKEN, USER, is_staging_test, require_tokenizers +from transformers.testing_utils import TOKEN, TemporaryHubRepo, is_staging_test, require_tokenizers from transformers.tokenization_utils import ExtensionsTrie, Trie @@ -118,114 +118,84 @@ def setUpClass(cls): cls._token = TOKEN HfFolder.save_token(TOKEN) - @staticmethod - def _try_delete_repo(repo_id, token): - try: - # Reset repo - delete_repo(repo_id=repo_id, token=token) - except: # noqa E722 - pass - def test_push_to_hub(self): - with tempfile.TemporaryDirectory() as tmp_dir: - try: - tmp_repo = f"{USER}/test-tokenizer-{Path(tmp_dir).name}" + with TemporaryHubRepo(token=self._token) as tmp_repo: + with tempfile.TemporaryDirectory() as tmp_dir: vocab_file = os.path.join(tmp_dir, "vocab.txt") with open(vocab_file, "w", encoding="utf-8") as vocab_writer: vocab_writer.write("".join([x + "\n" for x in self.vocab_tokens])) tokenizer = BertTokenizer(vocab_file) - tokenizer.push_to_hub(tmp_repo, token=self._token) - new_tokenizer = BertTokenizer.from_pretrained(tmp_repo) - self.assertDictEqual(new_tokenizer.vocab, tokenizer.vocab) - finally: - # Always (try to) delete the repo. - self._try_delete_repo(repo_id=tmp_repo, token=self._token) + tokenizer.push_to_hub(tmp_repo.repo_id, token=self._token) + new_tokenizer = BertTokenizer.from_pretrained(tmp_repo.repo_id) + self.assertDictEqual(new_tokenizer.vocab, tokenizer.vocab) def test_push_to_hub_via_save_pretrained(self): - with tempfile.TemporaryDirectory() as tmp_dir: - try: - tmp_repo = f"{USER}/test-tokenizer-{Path(tmp_dir).name}" + with TemporaryHubRepo(token=self._token) as tmp_repo: + with tempfile.TemporaryDirectory() as tmp_dir: vocab_file = os.path.join(tmp_dir, "vocab.txt") with open(vocab_file, "w", encoding="utf-8") as vocab_writer: vocab_writer.write("".join([x + "\n" for x in self.vocab_tokens])) tokenizer = BertTokenizer(vocab_file) # Push to hub via save_pretrained - tokenizer.save_pretrained(tmp_dir, repo_id=tmp_repo, push_to_hub=True, token=self._token) + tokenizer.save_pretrained(tmp_dir, repo_id=tmp_repo.repo_id, push_to_hub=True, token=self._token) - new_tokenizer = BertTokenizer.from_pretrained(tmp_repo) - self.assertDictEqual(new_tokenizer.vocab, tokenizer.vocab) - finally: - # Always (try to) delete the repo. - self._try_delete_repo(repo_id=tmp_repo, token=self._token) + new_tokenizer = BertTokenizer.from_pretrained(tmp_repo.repo_id) + self.assertDictEqual(new_tokenizer.vocab, tokenizer.vocab) def test_push_to_hub_in_organization(self): - with tempfile.TemporaryDirectory() as tmp_dir: - try: - tmp_repo = f"valid_org/test-tokenizer-{Path(tmp_dir).name}" + with TemporaryHubRepo(namespace="valid_org", token=self._token) as tmp_repo: + with tempfile.TemporaryDirectory() as tmp_dir: vocab_file = os.path.join(tmp_dir, "vocab.txt") with open(vocab_file, "w", encoding="utf-8") as vocab_writer: vocab_writer.write("".join([x + "\n" for x in self.vocab_tokens])) tokenizer = BertTokenizer(vocab_file) - tokenizer.push_to_hub(tmp_repo, token=self._token) - new_tokenizer = BertTokenizer.from_pretrained(tmp_repo) - self.assertDictEqual(new_tokenizer.vocab, tokenizer.vocab) - finally: - # Always (try to) delete the repo. - self._try_delete_repo(repo_id=tmp_repo, token=self._token) + tokenizer.push_to_hub(tmp_repo.repo_id, token=self._token) + new_tokenizer = BertTokenizer.from_pretrained(tmp_repo.repo_id) + self.assertDictEqual(new_tokenizer.vocab, tokenizer.vocab) def test_push_to_hub_in_organization_via_save_pretrained(self): - with tempfile.TemporaryDirectory() as tmp_dir: - try: - tmp_repo = f"valid_org/test-tokenizer-{Path(tmp_dir).name}" + with TemporaryHubRepo(namespace="valid_org", token=self._token) as tmp_repo: + with tempfile.TemporaryDirectory() as tmp_dir: vocab_file = os.path.join(tmp_dir, "vocab.txt") with open(vocab_file, "w", encoding="utf-8") as vocab_writer: vocab_writer.write("".join([x + "\n" for x in self.vocab_tokens])) tokenizer = BertTokenizer(vocab_file) # Push to hub via save_pretrained - tokenizer.save_pretrained(tmp_dir, repo_id=tmp_repo, push_to_hub=True, token=self._token) + tokenizer.save_pretrained(tmp_dir, repo_id=tmp_repo.repo_id, push_to_hub=True, token=self._token) - new_tokenizer = BertTokenizer.from_pretrained(tmp_repo) - self.assertDictEqual(new_tokenizer.vocab, tokenizer.vocab) - finally: - # Always (try to) delete the repo. - self._try_delete_repo(repo_id=tmp_repo, token=self._token) + new_tokenizer = BertTokenizer.from_pretrained(tmp_repo.repo_id) + self.assertDictEqual(new_tokenizer.vocab, tokenizer.vocab) @require_tokenizers def test_push_to_hub_dynamic_tokenizer(self): - with tempfile.TemporaryDirectory() as tmp_dir: - try: - tmp_repo = f"{USER}/test-dynamic-tokenizer-{Path(tmp_dir).name}" - CustomTokenizer.register_for_auto_class() - + with TemporaryHubRepo(token=self._token) as tmp_repo: + CustomTokenizer.register_for_auto_class() + with tempfile.TemporaryDirectory() as tmp_dir: vocab_file = os.path.join(tmp_dir, "vocab.txt") with open(vocab_file, "w", encoding="utf-8") as vocab_writer: vocab_writer.write("".join([x + "\n" for x in self.vocab_tokens])) tokenizer = CustomTokenizer(vocab_file) - # No fast custom tokenizer - tokenizer.push_to_hub(tmp_repo, token=self._token) + # No fast custom tokenizer + tokenizer.push_to_hub(tmp_repo.repo_id, token=self._token) - tokenizer = AutoTokenizer.from_pretrained(tmp_repo, trust_remote_code=True) - # Can't make an isinstance check because the new_model.config is from the CustomTokenizer class of a dynamic module - self.assertEqual(tokenizer.__class__.__name__, "CustomTokenizer") - finally: - # Always (try to) delete the repo. - self._try_delete_repo(repo_id=tmp_repo, token=self._token) + tokenizer = AutoTokenizer.from_pretrained(tmp_repo.repo_id, trust_remote_code=True) + # Can't make an isinstance check because the new_model.config is from the CustomTokenizer class of a dynamic module + self.assertEqual(tokenizer.__class__.__name__, "CustomTokenizer") @require_tokenizers def test_push_to_hub_dynamic_tokenizer_with_both_slow_and_fast_classes(self): - with tempfile.TemporaryDirectory() as tmp_dir: - try: - tmp_repo = f"{USER}/test-dynamic-tokenizer-{Path(tmp_dir).name}" - CustomTokenizer.register_for_auto_class() + with TemporaryHubRepo(token=self._token) as tmp_repo: + CustomTokenizer.register_for_auto_class() - # Fast and slow custom tokenizer - CustomTokenizerFast.register_for_auto_class() + # Fast and slow custom tokenizer + CustomTokenizerFast.register_for_auto_class() + with tempfile.TemporaryDirectory() as tmp_dir: vocab_file = os.path.join(tmp_dir, "vocab.txt") with open(vocab_file, "w", encoding="utf-8") as vocab_writer: vocab_writer.write("".join([x + "\n" for x in self.vocab_tokens])) @@ -234,17 +204,14 @@ def test_push_to_hub_dynamic_tokenizer_with_both_slow_and_fast_classes(self): bert_tokenizer.save_pretrained(tmp_dir) tokenizer = CustomTokenizerFast.from_pretrained(tmp_dir) - tokenizer.push_to_hub(tmp_repo, token=self._token) - - tokenizer = AutoTokenizer.from_pretrained(tmp_repo, trust_remote_code=True) - # Can't make an isinstance check because the new_model.config is from the FakeConfig class of a dynamic module - self.assertEqual(tokenizer.__class__.__name__, "CustomTokenizerFast") - tokenizer = AutoTokenizer.from_pretrained(tmp_repo, use_fast=False, trust_remote_code=True) - # Can't make an isinstance check because the new_model.config is from the FakeConfig class of a dynamic module - self.assertEqual(tokenizer.__class__.__name__, "CustomTokenizer") - finally: - # Always (try to) delete the repo. - self._try_delete_repo(repo_id=tmp_repo, token=self._token) + tokenizer.push_to_hub(tmp_repo.repo_id, token=self._token) + + tokenizer = AutoTokenizer.from_pretrained(tmp_repo.repo_id, trust_remote_code=True) + # Can't make an isinstance check because the new_model.config is from the FakeConfig class of a dynamic module + self.assertEqual(tokenizer.__class__.__name__, "CustomTokenizerFast") + tokenizer = AutoTokenizer.from_pretrained(tmp_repo.repo_id, use_fast=False, trust_remote_code=True) + # Can't make an isinstance check because the new_model.config is from the FakeConfig class of a dynamic module + self.assertEqual(tokenizer.__class__.__name__, "CustomTokenizer") class TrieTest(unittest.TestCase):