Skip to content

Commit

Permalink
[tests] make cuda-only tests device-agnostic (#35607)
Browse files Browse the repository at this point in the history
* intial commit

* remove unrelated files

* further remove

* Update test_trainer.py

* fix style
  • Loading branch information
faaany authored Jan 13, 2025
1 parent e6f9b03 commit 2fa876d
Show file tree
Hide file tree
Showing 18 changed files with 57 additions and 47 deletions.
3 changes: 1 addition & 2 deletions tests/fsdp/test_fsdp.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@
require_accelerate,
require_fsdp,
require_torch_accelerator,
require_torch_gpu,
require_torch_multi_accelerator,
slow,
torch_device,
Expand Down Expand Up @@ -288,7 +287,7 @@ def test_training_and_can_resume_normally(self, state_dict_type):

@require_torch_multi_accelerator
@slow
@require_torch_gpu
@require_torch_accelerator
@require_fsdp
def test_fsdp_cpu_offloading(self):
try:
Expand Down
9 changes: 6 additions & 3 deletions tests/generation/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
require_flash_attn,
require_optimum_quanto,
require_torch,
require_torch_accelerator,
require_torch_gpu,
require_torch_multi_accelerator,
require_torch_multi_gpu,
Expand Down Expand Up @@ -2043,7 +2044,7 @@ def test_generate_with_quant_cache(self):
model.generate(**generation_kwargs, **inputs_dict)

@pytest.mark.generate
@require_torch_gpu
@require_torch_accelerator
@slow
def test_generate_compile_model_forward(self):
"""
Expand Down Expand Up @@ -3791,10 +3792,12 @@ def test_assisted_decoding_in_different_gpu(self):
self.assertTrue(input_length <= out.shape[-1] <= input_length + 20)

@slow
@require_torch_gpu
@require_torch_accelerator
def test_assisted_decoding_model_in_gpu_assistant_in_cpu(self):
# PT-only test: TF doesn't support assisted decoding yet.
model = AutoModelForCausalLM.from_pretrained("hf-internal-testing/tiny-random-MistralForCausalLM").to("cuda")
model = AutoModelForCausalLM.from_pretrained("hf-internal-testing/tiny-random-MistralForCausalLM").to(
torch_device
)
assistant = AutoModelForCausalLM.from_pretrained("hf-internal-testing/tiny-random-MistralForCausalLM").to(
"cpu"
)
Expand Down
11 changes: 6 additions & 5 deletions tests/models/blip_2/test_modeling_blip_2.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
from transformers import CONFIG_MAPPING, Blip2Config, Blip2QFormerConfig, Blip2VisionConfig
from transformers.testing_utils import (
require_torch,
require_torch_accelerator,
require_torch_fp16,
require_torch_gpu,
require_torch_multi_accelerator,
Expand Down Expand Up @@ -1565,7 +1566,7 @@ def test_forward_signature(self):
self.assertListEqual(arg_names[: len(expected_arg_names)], expected_arg_names)

@slow
@require_torch_gpu
@require_torch_accelerator
def test_model_from_pretrained(self):
model_name = "Salesforce/blip2-itm-vit-g"
model = Blip2TextModelWithProjection.from_pretrained(model_name)
Expand Down Expand Up @@ -2191,7 +2192,7 @@ def test_expansion_in_processing(self):

self.assertTrue(generated_text_expanded == generated_text)

@require_torch_gpu
@require_torch_accelerator
def test_inference_itm(self):
model_name = "Salesforce/blip2-itm-vit-g"
processor = Blip2Processor.from_pretrained(model_name)
Expand All @@ -2210,7 +2211,7 @@ def test_inference_itm(self):
self.assertTrue(torch.allclose(torch.nn.Softmax()(out_itm[0].cpu()), expected_scores, rtol=1e-3, atol=1e-3))
self.assertTrue(torch.allclose(out[0].cpu(), torch.Tensor([[0.4406]]), rtol=1e-3, atol=1e-3))

@require_torch_gpu
@require_torch_accelerator
@require_torch_fp16
def test_inference_itm_fp16(self):
model_name = "Salesforce/blip2-itm-vit-g"
Expand All @@ -2232,7 +2233,7 @@ def test_inference_itm_fp16(self):
)
self.assertTrue(torch.allclose(out[0].cpu().float(), torch.Tensor([[0.4406]]), rtol=1e-3, atol=1e-3))

@require_torch_gpu
@require_torch_accelerator
@require_torch_fp16
def test_inference_vision_with_projection_fp16(self):
model_name = "Salesforce/blip2-itm-vit-g"
Expand All @@ -2256,7 +2257,7 @@ def test_inference_vision_with_projection_fp16(self):
]
self.assertTrue(np.allclose(out.image_embeds[0][0][:6].tolist(), expected_image_embeds, atol=1e-3))

@require_torch_gpu
@require_torch_accelerator
@require_torch_fp16
def test_inference_text_with_projection_fp16(self):
model_name = "Salesforce/blip2-itm-vit-g"
Expand Down
4 changes: 2 additions & 2 deletions tests/models/diffllama/test_modeling_diffllama.py
Original file line number Diff line number Diff line change
Expand Up @@ -676,7 +676,7 @@ def test_eager_matches_sdpa_generate(self):
)


@require_torch_gpu
@require_torch_accelerator
class DiffLlamaIntegrationTest(unittest.TestCase):
# This variable is used to determine which CUDA device are we using for our runners (A10 or T4)
# Depending on the hardware we get different logits / generations
Expand All @@ -689,7 +689,7 @@ def setUpClass(cls):
cls.cuda_compute_capability_major_version = torch.cuda.get_device_capability()[0]

@slow
@require_torch_gpu
@require_torch_accelerator
@require_read_token
def test_compile_static_cache(self):
# `torch==2.2` will throw an error on this test (as in other compilation tests), but torch==2.1.2 and torch>2.2
Expand Down
4 changes: 2 additions & 2 deletions tests/models/falcon_mamba/test_modeling_falcon_mamba.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
from transformers.testing_utils import (
require_bitsandbytes,
require_torch,
require_torch_gpu,
require_torch_accelerator,
require_torch_multi_gpu,
slow,
torch_device,
Expand Down Expand Up @@ -426,7 +426,7 @@ def recursive_check(tuple_object, dict_object):


@require_torch
@require_torch_gpu
@require_torch_accelerator
@slow
class FalconMambaIntegrationTests(unittest.TestCase):
def setUp(self):
Expand Down
4 changes: 2 additions & 2 deletions tests/models/fuyu/test_modeling_fuyu.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
from parameterized import parameterized

from transformers import FuyuConfig, is_torch_available, is_vision_available
from transformers.testing_utils import require_torch, require_torch_gpu, slow, torch_device
from transformers.testing_utils import require_torch, require_torch_accelerator, slow, torch_device
from transformers.utils import cached_property

from ...generation.test_utils import GenerationTesterMixin
Expand Down Expand Up @@ -327,7 +327,7 @@ def test_model_parallelism(self):


@slow
@require_torch_gpu
@require_torch_accelerator
class FuyuModelIntegrationTest(unittest.TestCase):
@cached_property
def default_processor(self):
Expand Down
5 changes: 2 additions & 3 deletions tests/models/llama/test_modeling_llama.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@
require_read_token,
require_torch,
require_torch_accelerator,
require_torch_gpu,
slow,
torch_device,
)
Expand Down Expand Up @@ -541,7 +540,7 @@ def _reinitialize_config(base_config, new_kwargs):
config = _reinitialize_config(base_config, {"rope_scaling": {"rope_type": "linear"}}) # missing "factor"


@require_torch_gpu
@require_torch_accelerator
class LlamaIntegrationTest(unittest.TestCase):
# This variable is used to determine which CUDA device are we using for our runners (A10 or T4)
# Depending on the hardware we get different logits / generations
Expand Down Expand Up @@ -695,7 +694,7 @@ def test_model_7b_dola_generation(self):
self.assertEqual(EXPECTED_TEXT_COMPLETION, text)

@slow
@require_torch_gpu
@require_torch_accelerator
@require_read_token
def test_compile_static_cache(self):
# `torch==2.2` will throw an error on this test (as in other compilation tests), but torch==2.1.2 and torch>2.2
Expand Down
2 changes: 1 addition & 1 deletion tests/models/mistral/test_modeling_mistral.py
Original file line number Diff line number Diff line change
Expand Up @@ -424,7 +424,7 @@ def test_flash_attn_2_inference_equivalence_right_padding(self):
self.skipTest(reason="Mistral flash attention does not support right padding")


@require_torch_gpu
@require_torch_accelerator
class MistralIntegrationTest(unittest.TestCase):
# This variable is used to determine which CUDA device are we using for our runners (A10 or T4)
# Depending on the hardware we get different logits / generations
Expand Down
5 changes: 3 additions & 2 deletions tests/models/mixtral/test_modeling_mixtral.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
from transformers.testing_utils import (
require_flash_attn,
require_torch,
require_torch_accelerator,
require_torch_gpu,
slow,
torch_device,
Expand Down Expand Up @@ -471,7 +472,7 @@ def setUpClass(cls):
cls.cuda_compute_capability_major_version = torch.cuda.get_device_capability()[0]

@slow
@require_torch_gpu
@require_torch_accelerator
def test_small_model_logits(self):
model_id = "hf-internal-testing/Mixtral-tiny"
dummy_input = torch.LongTensor([[0, 1, 0], [0, 1, 0]]).to(torch_device)
Expand Down Expand Up @@ -507,7 +508,7 @@ def test_small_model_logits(self):
)

@slow
@require_torch_gpu
@require_torch_accelerator
def test_small_model_logits_batched(self):
model_id = "hf-internal-testing/Mixtral-tiny"
dummy_input = torch.LongTensor([[0, 0, 0, 0, 0, 0, 1, 2, 3], [1, 1, 2, 3, 4, 5, 6, 7, 8]]).to(torch_device)
Expand Down
3 changes: 2 additions & 1 deletion tests/models/nemotron/test_modeling_nemotron.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
require_flash_attn,
require_read_token,
require_torch,
require_torch_accelerator,
require_torch_gpu,
require_torch_sdpa,
slow,
Expand Down Expand Up @@ -103,7 +104,7 @@ def test_model_outputs_equivalence(self, **kwargs):
pass

@require_torch_sdpa
@require_torch_gpu
@require_torch_accelerator
@slow
def test_sdpa_equivalence(self):
for model_class in self.all_model_classes:
Expand Down
8 changes: 4 additions & 4 deletions tests/models/omdet_turbo/test_modeling_omdet_turbo.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
from transformers.testing_utils import (
require_timm,
require_torch,
require_torch_gpu,
require_torch_accelerator,
require_vision,
slow,
torch_device,
Expand Down Expand Up @@ -865,7 +865,7 @@ def test_inference_object_detection_head_batched(self):
]
self.assertListEqual([result["classes"] for result in results], expected_classes)

@require_torch_gpu
@require_torch_accelerator
def test_inference_object_detection_head_equivalence_cpu_gpu(self):
processor = self.default_processor
image = prepare_img()
Expand All @@ -878,8 +878,8 @@ def test_inference_object_detection_head_equivalence_cpu_gpu(self):
cpu_outputs = model(**encoding)

# 2. run model on GPU
model.to("cuda")
encoding = encoding.to("cuda")
model.to(torch_device)
encoding = encoding.to(torch_device)
with torch.no_grad():
gpu_outputs = model(**encoding)

Expand Down
12 changes: 9 additions & 3 deletions tests/models/rt_detr/test_modeling_rt_detr.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,13 @@
is_torch_available,
is_vision_available,
)
from transformers.testing_utils import require_torch, require_torch_gpu, require_vision, slow, torch_device
from transformers.testing_utils import (
require_torch,
require_torch_accelerator,
require_vision,
slow,
torch_device,
)
from transformers.utils import cached_property

from ...test_configuration_common import ConfigTester
Expand Down Expand Up @@ -631,7 +637,7 @@ def test_initialization(self):
self.assertTrue(not failed_cases, message)

@parameterized.expand(["float32", "float16", "bfloat16"])
@require_torch_gpu
@require_torch_accelerator
@slow
def test_inference_with_different_dtypes(self, torch_dtype_str):
torch_dtype = {
Expand All @@ -653,7 +659,7 @@ def test_inference_with_different_dtypes(self, torch_dtype_str):
_ = model(**self._prepare_for_class(inputs_dict, model_class))

@parameterized.expand(["float32", "float16", "bfloat16"])
@require_torch_gpu
@require_torch_accelerator
@slow
def test_inference_equivalence_for_static_and_dynamic_anchors(self, torch_dtype_str):
torch_dtype = {
Expand Down
3 changes: 2 additions & 1 deletion tests/models/starcoder2/test_modeling_starcoder2.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
require_bitsandbytes,
require_flash_attn,
require_torch,
require_torch_accelerator,
require_torch_gpu,
slow,
torch_device,
Expand Down Expand Up @@ -412,7 +413,7 @@ def test_flash_attn_2_inference_equivalence_right_padding(self):


@slow
@require_torch_gpu
@require_torch_accelerator
class Starcoder2IntegrationTest(unittest.TestCase):
def test_starcoder2_batched_generation_sdpa(self):
EXPECTED_TEXT = [
Expand Down
6 changes: 3 additions & 3 deletions tests/models/t5/test_modeling_t5.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
require_sentencepiece,
require_tokenizers,
require_torch,
require_torch_gpu,
require_torch_accelerator,
slow,
torch_device,
)
Expand Down Expand Up @@ -1646,7 +1646,7 @@ def test_contrastive_search_t5(self):
)

@slow
@require_torch_gpu
@require_torch_accelerator
def test_compile_static_cache(self):
NUM_TOKENS_TO_GENERATE = 40
EXPECTED_TEXT_COMPLETION = [
Expand Down Expand Up @@ -1686,7 +1686,7 @@ def test_compile_static_cache(self):
self.assertEqual(EXPECTED_TEXT_COMPLETION, static_compiled_text)

@slow
@require_torch_gpu
@require_torch_accelerator
def test_compile_static_cache_encoder(self):
prompts = [
"summarize: Simply put, the theory of relativity states that 1) the speed of light is constant in all inertial "
Expand Down
3 changes: 1 addition & 2 deletions tests/pipelines/test_pipelines_text_generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@
require_tf,
require_torch,
require_torch_accelerator,
require_torch_gpu,
require_torch_or_tf,
torch_device,
)
Expand Down Expand Up @@ -553,7 +552,7 @@ def run_pipeline_test(self, text_generator, _):

@require_torch
@require_accelerate
@require_torch_gpu
@require_torch_accelerator
def test_small_model_pt_bloom_accelerate(self):
import torch

Expand Down
5 changes: 3 additions & 2 deletions tests/quantization/quanto_integration/test_quanto.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
require_accelerate,
require_optimum_quanto,
require_read_token,
require_torch_accelerator,
require_torch_gpu,
slow,
torch_device,
Expand Down Expand Up @@ -123,7 +124,7 @@ def test_conversion_with_modules_to_not_convert(self):


@slow
@require_torch_gpu
@require_torch_accelerator
@require_optimum_quanto
@require_accelerate
class QuantoQuantizationTest(unittest.TestCase):
Expand Down Expand Up @@ -268,7 +269,7 @@ def test_compare_with_quanto(self):
quantize(model.transformer, weights=w_mapping[self.weights])
freeze(model.transformer)
self.check_same_model(model, self.quantized_model)
self.check_inference_correctness(model, device="cuda")
self.check_inference_correctness(model, device=torch_device)

@unittest.skip
def test_load_from_quanto_saved(self):
Expand Down
Loading

0 comments on commit 2fa876d

Please sign in to comment.