Skip to content

Commit 637cadb

Browse files
committed
test on transformers-supported revision
1 parent 2fa876d commit 637cadb

File tree

1 file changed

+43
-13
lines changed

1 file changed

+43
-13
lines changed

tests/models/aria/test_modeling_aria.py

+43-13
Original file line numberDiff line numberDiff line change
@@ -318,7 +318,9 @@ def tearDown(self):
318318
@require_bitsandbytes
319319
def test_small_model_integration_test(self):
320320
# Let' s make sure we test the preprocessing to replace what is used
321-
model = AriaForConditionalGeneration.from_pretrained("rhymes-ai/Aria", load_in_4bit=True)
321+
model = AriaForConditionalGeneration.from_pretrained(
322+
"rhymes-ai/Aria", revision="6583f58908d092e52f348069485e64fef4867730", load_in_4bit=True
323+
)
322324

323325
prompt = "<image>\nUSER: What are the things I should be cautious about when I visit this place?\nASSISTANT:"
324326
image_file = "https://aria-vl.github.io/static/images/view.jpg"
@@ -342,9 +344,11 @@ def test_small_model_integration_test_llama_single(self):
342344
# Let' s make sure we test the preprocessing to replace what is used
343345
model_id = "rhymes-ai/Aria"
344346

345-
model = AriaForConditionalGeneration.from_pretrained(model_id, load_in_4bit=True)
347+
model = AriaForConditionalGeneration.from_pretrained(
348+
model_id, revision="6583f58908d092e52f348069485e64fef4867730", load_in_4bit=True
349+
)
346350
processor = AutoProcessor.from_pretrained(model_id)
347-
351+
breakpoint()
348352
prompt = "USER: <image>\nWhat are the things I should be cautious about when I visit this place? ASSISTANT:"
349353
image_file = "https://aria-vl.github.io/static/images/view.jpg"
350354
raw_image = Image.open(requests.get(image_file, stream=True).raw)
@@ -364,7 +368,9 @@ def test_small_model_integration_test_llama_batched(self):
364368
# Let' s make sure we test the preprocessing to replace what is used
365369
model_id = "rhymes-ai/Aria"
366370

367-
model = AriaForConditionalGeneration.from_pretrained(model_id, load_in_4bit=True)
371+
model = AriaForConditionalGeneration.from_pretrained(
372+
model_id, revision="6583f58908d092e52f348069485e64fef4867730", load_in_4bit=True
373+
)
368374
processor = AutoProcessor.from_pretrained(model_id)
369375

370376
prompts = [
@@ -389,7 +395,9 @@ def test_small_model_integration_test_llama_batched(self):
389395
@require_bitsandbytes
390396
def test_small_model_integration_test_batch(self):
391397
# Let' s make sure we test the preprocessing to replace what is used
392-
model = AriaForConditionalGeneration.from_pretrained("rhymes-ai/Aria", load_in_4bit=True)
398+
model = AriaForConditionalGeneration.from_pretrained(
399+
"rhymes-ai/Aria", revision="6583f58908d092e52f348069485e64fef4867730", load_in_4bit=True
400+
)
393401
# The first batch is longer in terms of text, but only has 1 image. The second batch will be padded in text, but the first will be padded because images take more space!.
394402
prompts = [
395403
"USER: <image>\nWhat are the things I should be cautious about when I visit this place? What should I bring with me?\nASSISTANT:",
@@ -418,7 +426,12 @@ def test_small_model_integration_test_llama_batched_regression(self):
418426
model_id = "rhymes-ai/Aria"
419427

420428
# Multi-image & multi-prompt (e.g. 3 images and 2 prompts now fails with SDPA, this tests if "eager" works as before)
421-
model = AriaForConditionalGeneration.from_pretrained(model_id, load_in_4bit=True, attn_implementation="eager")
429+
model = AriaForConditionalGeneration.from_pretrained(
430+
model_id,
431+
revision="6583f58908d092e52f348069485e64fef4867730",
432+
load_in_4bit=True,
433+
attn_implementation="eager",
434+
)
422435
processor = AutoProcessor.from_pretrained(model_id, pad_token="<pad>")
423436

424437
prompts = [
@@ -443,7 +456,9 @@ def test_small_model_integration_test_llama_batched_regression(self):
443456
@require_torch
444457
@require_vision
445458
def test_batched_generation(self):
446-
model = AriaForConditionalGeneration.from_pretrained("rhymes-ai/Aria", load_in_4bit=True)
459+
model = AriaForConditionalGeneration.from_pretrained(
460+
"rhymes-ai/Aria", revision="6583f58908d092e52f348069485e64fef4867730", load_in_4bit=True
461+
)
447462

448463
processor = AutoProcessor.from_pretrained("rhymes-ai/Aria")
449464

@@ -481,7 +496,9 @@ def test_aria_index_error_bug(self):
481496
# Please refer to that PR, or specifically https://github.com/huggingface/transformers/pull/28032#issuecomment-1860650043 for
482497
# more details
483498
model_id = "rhymes-ai/Aria"
484-
model = AriaForConditionalGeneration.from_pretrained(model_id, load_in_4bit=True)
499+
model = AriaForConditionalGeneration.from_pretrained(
500+
model_id, revision="6583f58908d092e52f348069485e64fef4867730", load_in_4bit=True
501+
)
485502

486503
processor = AutoProcessor.from_pretrained(model_id)
487504

@@ -501,7 +518,9 @@ def test_aria_index_error_bug(self):
501518
def test_aria_merge_inputs_error_bug(self):
502519
# This is a reproducer of https://github.com/huggingface/transformers/pull/28333 and makes sure it does not happen anymore
503520
model_id = "rhymes-ai/Aria"
504-
model = AriaForConditionalGeneration.from_pretrained(model_id, load_in_4bit=True)
521+
model = AriaForConditionalGeneration.from_pretrained(
522+
model_id, revision="6583f58908d092e52f348069485e64fef4867730", load_in_4bit=True
523+
)
505524

506525
# Simulate some user inputs
507526
pixel_values = torch.randn(
@@ -556,7 +575,9 @@ def test_tokenizer_integration(self):
556575
@require_bitsandbytes
557576
def test_generation_no_images(self):
558577
model_id = "rhymes-ai/Aria"
559-
model = AriaForConditionalGeneration.from_pretrained(model_id, load_in_4bit=True)
578+
model = AriaForConditionalGeneration.from_pretrained(
579+
model_id, revision="6583f58908d092e52f348069485e64fef4867730", load_in_4bit=True
580+
)
560581
processor = AutoProcessor.from_pretrained(model_id)
561582

562583
# Prepare inputs with no images
@@ -569,7 +590,12 @@ def test_generation_no_images(self):
569590
@require_bitsandbytes
570591
def test_generation_siglip_backbone(self):
571592
model_id = "rhymes-ai/Aria"
572-
model = AriaForConditionalGeneration.from_pretrained(model_id, torch_dtype="float16", device_map=torch_device)
593+
model = AriaForConditionalGeneration.from_pretrained(
594+
model_id,
595+
revision="6583f58908d092e52f348069485e64fef4867730",
596+
torch_dtype="float16",
597+
device_map=torch_device,
598+
)
573599
processor = AutoProcessor.from_pretrained(model_id)
574600

575601
# check processing with expansion of inputs (w/o expansion should work with any backbone)
@@ -594,7 +620,9 @@ def test_generation_siglip_backbone(self):
594620
@require_bitsandbytes
595621
def test_expansion_in_processing(self):
596622
model_id = "rhymes-ai/Aria"
597-
model = AriaForConditionalGeneration.from_pretrained(model_id, load_in_4bit=True)
623+
model = AriaForConditionalGeneration.from_pretrained(
624+
model_id, revision="6583f58908d092e52f348069485e64fef4867730", load_in_4bit=True
625+
)
598626
processor = AutoProcessor.from_pretrained(model_id)
599627

600628
prompt = "USER: <image>\nDescribe the image:\nASSISTANT:"
@@ -624,7 +652,9 @@ def test_expansion_in_processing(self):
624652
@require_bitsandbytes
625653
def test_pixtral(self):
626654
model_id = "rhymes-ai/Aria"
627-
model = AriaForConditionalGeneration.from_pretrained(model_id)
655+
model = AriaForConditionalGeneration.from_pretrained(
656+
model_id, revision="6583f58908d092e52f348069485e64fef4867730"
657+
)
628658
processor = AutoProcessor.from_pretrained(model_id)
629659

630660
IMG_URLS = [

0 commit comments

Comments
 (0)