Skip to content

Commit 81b8417

Browse files
update and add Expectations for mistral3/internvl tests (#42616)
1 parent 8d75aab commit 81b8417

File tree

2 files changed

+26
-8
lines changed

2 files changed

+26
-8
lines changed

tests/models/internvl/test_modeling_internvl.py

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -430,7 +430,14 @@ def test_qwen2_small_model_integration_batched_generate_multi_image(self):
430430
# Check first output
431431
decoded_output = processor.decode(output[0], skip_special_tokens=True)
432432
# Batching seems to alter the output slightly, but it is also the case in the original implementation. This seems to be expected: https://github.com/huggingface/transformers/issues/23017#issuecomment-1649630232
433-
expected_output = "user\n\nWrite a haiku for this image\nassistant\nSilky lake, \nWooden pier, \nNature's peace." # fmt: skip
433+
expected_outputs = Expectations(
434+
{
435+
("xpu", 3): 'user\n\nWrite a haiku for this image\nassistant\nSilky lake, \nWooden pier, \nNature\'s peace.',
436+
("cuda", 7): 'user\n\nWrite a haiku for this image\nassistant\nSilky lake, \nWooden pier, \nNature\'s peace.',
437+
("rocm", (9, 4)): 'user\n\nWrite a haiku for this image\nassistant\nSilky lake, \nWooden pier, \nNature\'s embrace.',
438+
}
439+
) # fmt: skip
440+
expected_output = expected_outputs.get_expectation()
434441
self.assertEqual(
435442
decoded_output,
436443
expected_output,
@@ -443,6 +450,7 @@ def test_qwen2_small_model_integration_batched_generate_multi_image(self):
443450
{
444451
("xpu", 3): "user\n\nWhat are the differences between these two images?\nassistant\nThe images show the Statue of Liberty and the Golden Gate Bridge from different angles. Here are the differences:\n\n1. **Foreground",
445452
("cuda", 7): "user\n\nWhat are the differences between these two images?\nassistant\nThe images show the Statue of Liberty and the Golden Gate Bridge from different angles. Here are the differences:\n\n1. **Foreground",
453+
("rocm", (9, 4)): "user\n\nWhat are the differences between these two images?\nassistant\nThe images show the Statue of Liberty and the Golden Gate Bridge from different angles. Here are the main differences:\n\n1. **",
446454
}
447455
) # fmt: skip
448456
expected_output = expected_outputs.get_expectation()
@@ -567,6 +575,7 @@ def test_qwen2_small_model_integration_interleaved_images_videos(self):
567575
{
568576
("xpu", 3): "user\n\n\nWhat are the differences between these two images?\nassistant\nThe images depict two distinct scenes:\n\n1. **Left Image:**\n - The Statue of Liberty is prominently featured on an",
569577
("cuda", 7): 'user\n\n\nWhat are the differences between these two images?\nassistant\nThe images depict two distinct scenes:\n\n1. **Left Image:**\n - The Statue of Liberty is prominently featured on an',
578+
("rocm", (9, 4)): 'user\n\n\nWhat are the differences between these two images?\nassistant\nThe images depict two distinct scenes:\n\n1. **Left Image:**\n - This image features the Statue of Liberty on Liberty',
570579
}
571580
) # fmt: skip
572581
expected_output = expected_outputs.get_expectation()
@@ -582,6 +591,7 @@ def test_qwen2_small_model_integration_interleaved_images_videos(self):
582591
{
583592
("xpu", 3): "user\nFrame1: \nFrame2: \nFrame3: \nFrame4: \nFrame5: \nFrame6: \nFrame7: \nFrame8: \nWhat type of shot is the man performing?\nassistant\nA forehand shot",
584593
("cuda", 7): 'user\nFrame1: \nFrame2: \nFrame3: \nFrame4: \nFrame5: \nFrame6: \nFrame7: \nFrame8: \nWhat type of shot is the man performing?\nassistant\nA forehand shot',
594+
("rocm", (9, 4)): 'user\nFrame1: \nFrame2: \nFrame3: \nFrame4: \nFrame5: \nFrame6: \nFrame7: \nFrame8: \nWhat type of shot is the man performing?\nassistant\nA forehand shot',
585595
}
586596
) # fmt: skip
587597
expected_output = expected_outputs.get_expectation()
@@ -593,9 +603,14 @@ def test_qwen2_small_model_integration_interleaved_images_videos(self):
593603

594604
# Check third output
595605
decoded_output = processor.decode(output[2], skip_special_tokens=True)
596-
expected_output = (
597-
"user\n\nWrite a haiku for this image\nassistant\nSilky lake, \nWooden pier, \nNature's peace."
598-
)
606+
expected_outputs = Expectations(
607+
{
608+
("xpu", 3): 'user\n\nWrite a haiku for this image\nassistant\nSilky lake, \nWooden pier, \nNature\'s peace.',
609+
("cuda", 7): 'user\n\nWrite a haiku for this image\nassistant\nSilky lake, \nWooden pier, \nNature\'s peace.',
610+
("rocm", (9, 4)): 'user\n\nWrite a haiku for this image\nassistant\nSilky lake, \nWooden pier, \nNature\'s embrace.',
611+
}
612+
) # fmt: skip
613+
expected_output = expected_outputs.get_expectation()
599614
self.assertEqual(
600615
decoded_output,
601616
expected_output,
@@ -658,7 +673,7 @@ def test_llama_small_model_integration_forward(self):
658673
("xpu", 3): [-9.8828, -0.4954, 1.4561, -10.3438, -10.3438],
659674
("cuda", 7): [-9.8750, -0.4861, 1.4648, -10.3359, -10.3359],
660675
("cuda", 8): [-9.8906, -0.4995, 1.4473, -10.3359, -10.3438],
661-
("rocm", (9, 4)): [ -9.8828, -0.5005, 1.4697, -10.3438, -10.3438],
676+
("rocm", (9, 4)): [ -9.8672, -0.4888, 1.4648, -10.3281, -10.3281],
662677
("rocm", (9, 5)): [ -9.8906, -0.4976, 1.4502, -10.3359, -10.3438],
663678
}
664679
) # fmt: skip
@@ -934,7 +949,7 @@ def test_llama_small_model_integration_interleaved_images_videos(self):
934949
("xpu", 3): "user\n\n\nWhat are the difference between these two images?\nassistant\nI apologize for the confusion in my previous response. Upon closer inspection, the differences between the two images are:\n\n1. **",
935950
("cuda", 7): 'user\n\n\nWhat are the difference between these two images?\nassistant\nI apologize for the confusion in my previous response. Upon closer inspection, the differences between the two images are:\n\n1. **',
936951
("cuda", 8): 'user\n\n\nWhat are the difference between these two images?\nassistant\nI apologize for the confusion in my previous response. After re-examining the images, I can see that there are no',
937-
("rocm", (9, 4)): 'user\n\n\nWhat are the difference between these two images?\nassistant\nI apologize for the confusion in my previous response. Upon closer inspection, the differences between the two images are:\n\n1. **',
952+
("rocm", (9, 4)): 'user\n\n\nWhat are the difference between these two images?\nassistant\nI apologize for the confusion in my previous response. After re-examining the images, I can see that there are no',
938953
("rocm", (9, 5)): 'user\n\n\nWhat are the difference between these two images?\nassistant\nI apologize for the confusion in my previous response. After re-examining the images, I can see that there are no',
939954
}
940955
) # fmt: skip

tests/models/mistral3/test_modeling_mistral3.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -355,7 +355,8 @@ def test_mistral3_integration_batched_generate(self):
355355
expected_outputs = Expectations(
356356
{
357357
("xpu", 3): "Calm lake's mirror gleams,\nWhispering pines stand in silence,\nPath to peace begins.",
358-
("cuda", 8): "Wooden path to calm,\nReflections whisper secrets,\nNature's peace unfolds.",
358+
("cuda", (8, 0)): "Wooden path to calm,\nReflections whisper secrets,\nNature's peace unfolds.",
359+
("cuda", (8, 6)): "Calm waters reflect\nWooden path to distant shore\nSilence in the woods",
359360
("rocm", (9, 5)): "Calm waters reflect\nWooden path to distant shore\nSilence in the scene"
360361
}
361362
) # fmt: skip
@@ -432,7 +433,8 @@ def test_mistral3_integration_batched_generate_multi_image(self):
432433
decoded_output = processor.decode(gen_tokens[0], skip_special_tokens=True)
433434
expected_outputs = Expectations(
434435
{
435-
("cuda", 8): 'Calm waters reflect\nWooden path to distant shore\nSilence in the scene',
436+
("cuda", 8): "Calm waters reflect\nWooden path to distant shore\nPeace in nature's hold",
437+
("rocm", (9, 4)): "Calm waters reflect\nWooden path to distant shore\nSilence in the pines"
436438
}
437439
) # fmt: skip
438440
expected_output = expected_outputs.get_expectation()
@@ -448,6 +450,7 @@ def test_mistral3_integration_batched_generate_multi_image(self):
448450
{
449451
("xpu", 3): "Certainly! The images depict two iconic landmarks:\n\n1. The first image shows the Statue of Liberty in New York City.",
450452
("cuda", 8): 'Certainly! The images depict two famous landmarks in the United States:\n\n1. The first image shows the Statue of Liberty,',
453+
("rocm", (9, 4)): 'Certainly! The images depict two famous landmarks in the United States:\n\n1. The first image shows the Statue of Liberty,',
451454
}
452455
) # fmt: skip
453456
expected_output = expected_outputs.get_expectation()

0 commit comments

Comments
 (0)