79
79
from peft .utils .other import fsdp_auto_wrap_policy
80
80
81
81
from .testing_utils import (
82
+ device_count ,
82
83
require_aqlm ,
83
84
require_auto_awq ,
84
85
require_auto_gptq ,
@@ -302,7 +303,7 @@ def test_causal_lm_training_multi_gpu_4bit(self):
302
303
quantization_config = BitsAndBytesConfig (load_in_4bit = True ),
303
304
)
304
305
305
- assert set (model .hf_device_map .values ()) == set (range (torch . cuda . device_count () ))
306
+ assert set (model .hf_device_map .values ()) == set (range (device_count ))
306
307
307
308
model = prepare_model_for_kbit_training (model )
308
309
@@ -424,7 +425,7 @@ def on_optimizer_step(self, args, state, control, **kwargs):
424
425
assert trainer .state .log_history [- 1 ]["train_loss" ] is not None
425
426
426
427
@pytest .mark .single_gpu_tests
427
- @require_torch_gpu
428
+ @require_non_cpu
428
429
def test_8bit_adalora_causalLM (self ):
429
430
r"""
430
431
Tests the 8bit training with adalora
@@ -497,7 +498,7 @@ def on_optimizer_step(self, args, state, control, **kwargs):
497
498
assert trainer .state .log_history [- 1 ]["train_loss" ] is not None
498
499
499
500
@pytest .mark .multi_gpu_tests
500
- @require_torch_multi_gpu
501
+ @require_multi_accelerator
501
502
def test_causal_lm_training_multi_gpu (self ):
502
503
r"""
503
504
Test the CausalLM training on a multi-GPU device. This test is a converted version of
@@ -511,8 +512,8 @@ def test_causal_lm_training_multi_gpu(self):
511
512
quantization_config = BitsAndBytesConfig (load_in_8bit = True ),
512
513
device_map = "auto" ,
513
514
)
514
-
515
- assert set (model .hf_device_map .values ()) == set (range (torch . cuda . device_count () ))
515
+ print ( f"device map: { model . hf_device_map } " )
516
+ assert set (model .hf_device_map .values ()) == set (range (device_count ))
516
517
517
518
tokenizer = AutoTokenizer .from_pretrained (self .causal_lm_model_id )
518
519
model = prepare_model_for_kbit_training (model )
@@ -621,7 +622,7 @@ def test_seq2seq_lm_training_single_gpu(self):
621
622
assert trainer .state .log_history [- 1 ]["train_loss" ] is not None
622
623
623
624
@pytest .mark .multi_gpu_tests
624
- @require_torch_multi_gpu
625
+ @require_multi_accelerator
625
626
def test_seq2seq_lm_training_multi_gpu (self ):
626
627
r"""
627
628
Test the Seq2SeqLM training on a multi-GPU device. This test is a converted version of
@@ -636,7 +637,7 @@ def test_seq2seq_lm_training_multi_gpu(self):
636
637
device_map = "balanced" ,
637
638
)
638
639
639
- assert set (model .hf_device_map .values ()) == set (range (torch . cuda . device_count () ))
640
+ assert set (model .hf_device_map .values ()) == set (range (device_count ))
640
641
641
642
tokenizer = AutoTokenizer .from_pretrained (self .seq2seq_model_id )
642
643
model = prepare_model_for_kbit_training (model )
@@ -920,7 +921,7 @@ def test_causal_lm_training_multi_gpu_4bit_dora(self):
920
921
quantization_config = BitsAndBytesConfig (load_in_4bit = True ),
921
922
)
922
923
923
- assert set (model .hf_device_map .values ()) == set (range (torch . cuda . device_count () ))
924
+ assert set (model .hf_device_map .values ()) == set (range (device_count ))
924
925
925
926
model = prepare_model_for_kbit_training (model )
926
927
@@ -1037,7 +1038,7 @@ def test_causal_lm_training_multi_gpu_8bit_dora(self):
1037
1038
quantization_config = BitsAndBytesConfig (load_in_8bit = True ),
1038
1039
)
1039
1040
1040
- assert set (model .hf_device_map .values ()) == set (range (torch . cuda . device_count () ))
1041
+ assert set (model .hf_device_map .values ()) == set (range (device_count ))
1041
1042
1042
1043
model = prepare_model_for_kbit_training (model )
1043
1044
@@ -1284,7 +1285,7 @@ def test_causal_lm_training_multi_gpu_vera(self):
1284
1285
quantization_config = BitsAndBytesConfig (load_in_8bit = True ),
1285
1286
)
1286
1287
1287
- assert set (model .hf_device_map .values ()) == set (range (torch . cuda . device_count () ))
1288
+ assert set (model .hf_device_map .values ()) == set (range (device_count ))
1288
1289
1289
1290
model = prepare_model_for_kbit_training (model )
1290
1291
@@ -1343,7 +1344,7 @@ def test_causal_lm_training_multi_gpu_4bit_vera(self):
1343
1344
quantization_config = BitsAndBytesConfig (load_in_4bit = True ),
1344
1345
)
1345
1346
1346
- assert set (model .hf_device_map .values ()) == set (range (torch . cuda . device_count () ))
1347
+ assert set (model .hf_device_map .values ()) == set (range (device_count ))
1347
1348
1348
1349
model = prepare_model_for_kbit_training (model )
1349
1350
@@ -1656,7 +1657,7 @@ def test_causal_lm_training_multi_gpu(self):
1656
1657
quantization_config = self .quantization_config ,
1657
1658
)
1658
1659
1659
- assert set (model .hf_device_map .values ()) == set (range (torch . cuda . device_count () ))
1660
+ assert set (model .hf_device_map .values ()) == set (range (device_count ))
1660
1661
1661
1662
model = prepare_model_for_kbit_training (model )
1662
1663
@@ -2552,7 +2553,7 @@ def test_config_no_loftq_config(self):
2552
2553
2553
2554
2554
2555
@require_bitsandbytes
2555
- @require_torch_gpu
2556
+ @require_non_cpu
2556
2557
class MultiprocessTester (unittest .TestCase ):
2557
2558
def test_notebook_launcher (self ):
2558
2559
script_path = os .path .join ("scripts" , "launch_notebook_mp.py" )
@@ -3187,7 +3188,7 @@ def test_causal_lm_training_multi_gpu(self):
3187
3188
device_map = "auto" ,
3188
3189
)
3189
3190
3190
- assert set (model .hf_device_map .values ()) == set (range (torch . cuda . device_count () ))
3191
+ assert set (model .hf_device_map .values ()) == set (range (device_count ))
3191
3192
3192
3193
model = prepare_model_for_kbit_training (model )
3193
3194
@@ -3335,7 +3336,7 @@ def test_causal_lm_training_multi_gpu_eetq(self):
3335
3336
quantization_config = quantization_config ,
3336
3337
)
3337
3338
3338
- assert set (model .hf_device_map .values ()) == set (range (torch . cuda . device_count () ))
3339
+ assert set (model .hf_device_map .values ()) == set (range (device_count ))
3339
3340
3340
3341
model = prepare_model_for_kbit_training (model )
3341
3342
@@ -3586,7 +3587,7 @@ def test_causal_lm_training_multi_gpu_torchao(self, quant_type):
3586
3587
torch_dtype = torch .bfloat16 ,
3587
3588
)
3588
3589
3589
- assert set (model .hf_device_map .values ()) == set (range (torch . cuda . device_count () ))
3590
+ assert set (model .hf_device_map .values ()) == set (range (device_count ))
3590
3591
3591
3592
model = prepare_model_for_kbit_training (model )
3592
3593
model .model_parallel = True
@@ -3646,7 +3647,7 @@ def test_causal_lm_training_multi_gpu_torchao_int4_raises(self):
3646
3647
torch_dtype = torch .bfloat16 ,
3647
3648
)
3648
3649
3649
- assert set (model .hf_device_map .values ()) == set (range (torch . cuda . device_count () ))
3650
+ assert set (model .hf_device_map .values ()) == set (range (device_count ))
3650
3651
3651
3652
model = prepare_model_for_kbit_training (model )
3652
3653
model .model_parallel = True
0 commit comments