Skip to content

Commit 8f2fe0b

Browse files
jsondaicopybara-github
authored andcommitted
chore: add evals data converter to _genai
PiperOrigin-RevId: 766824712
1 parent 97be629 commit 8f2fe0b

File tree

4 files changed

+873
-103
lines changed

4 files changed

+873
-103
lines changed

tests/unit/vertexai/genai/test_evals.py

Lines changed: 296 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
import vertexai
2424
from google.cloud.aiplatform import initializer as aiplatform_initializer
2525
from vertexai import _genai
26+
from vertexai._genai import _evals_data_converters
2627
from vertexai._genai import types as vertexai_genai_types
2728
from google.genai import types as genai_types
2829
import google.genai.errors as genai_errors
@@ -771,3 +772,298 @@ def test_prompt_template_assemble_multimodal_variable_integration(self):
771772
assembled_content.parts[2].text
772773
== " and then answer: This is a simple text."
773774
)
775+
776+
777+
class TestGeminiEvalDataConverter:
778+
"""Unit tests for the _GeminiEvalDataConverter class."""
779+
780+
def setup_method(self):
781+
self.converter = _evals_data_converters._GeminiEvalDataConverter()
782+
783+
def test_convert_simple_prompt_response(self):
784+
raw_data = [
785+
{
786+
"request": {
787+
"contents": [{"role": "user", "parts": [{"text": "Hello"}]}]
788+
},
789+
"response": {
790+
"candidates": [
791+
{
792+
"content": {"role": "model", "parts": [{"text": "Hi"}]},
793+
"finish_reason": "STOP",
794+
}
795+
],
796+
"usage_metadata": {
797+
"prompt_token_count": 1,
798+
"candidates_token_count": 1,
799+
"total_token_count": 2,
800+
},
801+
},
802+
}
803+
]
804+
result_dataset = self.converter.convert(raw_data)
805+
assert isinstance(result_dataset, vertexai_genai_types.EvaluationDataset)
806+
assert len(result_dataset.eval_cases) == 1
807+
eval_case = result_dataset.eval_cases[0]
808+
809+
assert eval_case.prompt == genai_types.Content(
810+
parts=[genai_types.Part(text="Hello")], role="user"
811+
)
812+
assert len(eval_case.responses) == 1
813+
assert eval_case.responses[0].response == genai_types.Content(
814+
parts=[genai_types.Part(text="Hi")], role="model"
815+
)
816+
assert eval_case.reference.response is None
817+
assert eval_case.system_instruction.parts is None
818+
assert eval_case.conversation_history == []
819+
assert eval_case.eval_case_metadata.provider == "google"
820+
821+
def test_convert_with_system_instruction(self):
822+
raw_data = [
823+
{
824+
"request": {
825+
"system_instruction": {
826+
"role": "system",
827+
"parts": [{"text": "Be nice."}],
828+
},
829+
"contents": [{"role": "user", "parts": [{"text": "Hello"}]}],
830+
},
831+
"response": {
832+
"candidates": [
833+
{
834+
"content": {
835+
"role": "model",
836+
"parts": [{"text": "Hi there!"}],
837+
}
838+
}
839+
]
840+
},
841+
}
842+
]
843+
result_dataset = self.converter.convert(raw_data)
844+
eval_case = result_dataset.eval_cases[0]
845+
assert eval_case.system_instruction == genai_types.Content(
846+
parts=[genai_types.Part(text="Be nice.")], role="system"
847+
)
848+
assert eval_case.prompt == genai_types.Content(
849+
parts=[genai_types.Part(text="Hello")], role="user"
850+
)
851+
852+
def test_convert_with_conversation_history_and_reference(self):
853+
raw_data_for_reference = [
854+
{
855+
"request": {
856+
"contents": [
857+
{
858+
"role": "user",
859+
"parts": [{"text": "Initial user"}],
860+
}, # history
861+
{
862+
"role": "model",
863+
"parts": [{"text": "Initial model"}],
864+
}, # history
865+
{
866+
"role": "user",
867+
"parts": [{"text": "Actual prompt"}],
868+
}, # prompt
869+
{
870+
"role": "model",
871+
"parts": [{"text": "This is reference"}],
872+
}, # reference
873+
]
874+
},
875+
"response": {
876+
"candidates": [
877+
{
878+
"content": {
879+
"role": "model",
880+
"parts": [{"text": "Actual response"}],
881+
}
882+
}
883+
]
884+
},
885+
}
886+
]
887+
result_dataset = self.converter.convert(raw_data_for_reference)
888+
eval_case = result_dataset.eval_cases[0]
889+
890+
assert eval_case.prompt == genai_types.Content(
891+
parts=[genai_types.Part(text="Actual prompt")], role="user"
892+
)
893+
assert eval_case.reference.response == genai_types.Content(
894+
parts=[genai_types.Part(text="This is reference")], role="model"
895+
)
896+
assert len(eval_case.conversation_history) == 2
897+
assert eval_case.conversation_history[0].content == genai_types.Content(
898+
parts=[genai_types.Part(text="Initial user")], role="user"
899+
)
900+
assert eval_case.conversation_history[1].content == genai_types.Content(
901+
parts=[genai_types.Part(text="Initial model")], role="model"
902+
)
903+
assert eval_case.responses[0].response == genai_types.Content(
904+
parts=[genai_types.Part(text="Actual response")], role="model"
905+
)
906+
907+
def test_convert_with_conversation_history_no_reference(self):
908+
# Last message in contents is from user, so it becomes the prompt.
909+
raw_data = [
910+
{
911+
"request": {
912+
"contents": [
913+
{"role": "user", "parts": [{"text": "Old user msg"}]},
914+
{"role": "model", "parts": [{"text": "Old model msg"}]},
915+
{"role": "user", "parts": [{"text": "Current prompt"}]},
916+
]
917+
},
918+
"response": {
919+
"candidates": [
920+
{
921+
"content": {
922+
"role": "model",
923+
"parts": [{"text": "A response"}],
924+
}
925+
}
926+
]
927+
},
928+
}
929+
]
930+
result_dataset = self.converter.convert(raw_data)
931+
eval_case = result_dataset.eval_cases[0]
932+
933+
assert eval_case.prompt == genai_types.Content(
934+
parts=[genai_types.Part(text="Current prompt")], role="user"
935+
)
936+
assert eval_case.reference.response is None
937+
assert len(eval_case.conversation_history) == 2
938+
assert eval_case.conversation_history[0].content.parts[0].text == "Old user msg"
939+
assert (
940+
eval_case.conversation_history[1].content.parts[0].text == "Old model msg"
941+
)
942+
943+
def test_convert_no_candidates_in_response(self):
944+
raw_data = [
945+
{
946+
"request": {
947+
"contents": [{"role": "user", "parts": [{"text": "Hello"}]}]
948+
},
949+
"response": {
950+
"candidates": [],
951+
"prompt_feedback": {"block_reason": "SAFETY"},
952+
},
953+
}
954+
]
955+
result_dataset = self.converter.convert(raw_data)
956+
eval_case = result_dataset.eval_cases[0]
957+
assert len(eval_case.responses) == 1
958+
assert eval_case.responses[0].response is None
959+
960+
def test_convert_invalid_content_structure_raises_value_error(self):
961+
raw_data = [
962+
{
963+
"request": {"contents": ["not a dict"]}, # Invalid content
964+
"response": {
965+
"candidates": [
966+
{"content": {"role": "model", "parts": [{"text": "Hi"}]}}
967+
]
968+
},
969+
}
970+
]
971+
with pytest.raises(ValueError, match="Invalid content structure at turn 0"):
972+
self.converter.convert(raw_data)
973+
974+
raw_data_missing_parts = [
975+
{
976+
"request": {"contents": [{"role": "user"}]}, # Missing 'parts'
977+
"response": {
978+
"candidates": [
979+
{"content": {"role": "model", "parts": [{"text": "Hi"}]}}
980+
]
981+
},
982+
}
983+
]
984+
with pytest.raises(ValueError, match="Invalid content structure at turn 0"):
985+
self.converter.convert(raw_data_missing_parts)
986+
987+
def test_convert_multiple_items(self):
988+
raw_data = [
989+
{
990+
"request": {
991+
"contents": [{"role": "user", "parts": [{"text": "Item 1"}]}]
992+
},
993+
"response": {
994+
"candidates": [
995+
{
996+
"content": {
997+
"role": "model",
998+
"parts": [{"text": "Resp 1"}],
999+
}
1000+
}
1001+
]
1002+
},
1003+
},
1004+
{
1005+
"request": {
1006+
"contents": [{"role": "user", "parts": [{"text": "Item 2"}]}]
1007+
},
1008+
"response": {
1009+
"candidates": [
1010+
{
1011+
"content": {
1012+
"role": "model",
1013+
"parts": [{"text": "Resp 2"}],
1014+
}
1015+
}
1016+
]
1017+
},
1018+
},
1019+
]
1020+
result_dataset = self.converter.convert(raw_data)
1021+
assert len(result_dataset.eval_cases) == 2
1022+
assert result_dataset.eval_cases[0].prompt.parts[0].text == "Item 1"
1023+
assert result_dataset.eval_cases[1].prompt.parts[0].text == "Item 2"
1024+
1025+
def test_model_version_extraction(self):
1026+
example_batch_prediction_output = """{
1027+
"status": "",
1028+
"processed_time": "2024-11-01T18:13:16.826+00:00",
1029+
"request": {
1030+
"contents": [
1031+
{
1032+
"parts": [
1033+
{
1034+
"text": "What is the relation between the following video and image samples?"
1035+
}
1036+
],
1037+
"role": "user"
1038+
}
1039+
]
1040+
},
1041+
"response": {
1042+
"candidates": [
1043+
{
1044+
"avgLogprobs": -0.5782725546095107,
1045+
"content": {
1046+
"parts": [
1047+
{
1048+
"text": "This video shows a Google Photos marketing."
1049+
}
1050+
],
1051+
"role": "model"
1052+
},
1053+
"finishReason": "STOP"
1054+
}
1055+
],
1056+
"modelVersion": "gemini-2.0-flash-001@default",
1057+
"usageMetadata": {
1058+
"candidatesTokenCount": 36,
1059+
"promptTokenCount": 29180,
1060+
"totalTokenCount": 29216
1061+
}
1062+
}
1063+
}"""
1064+
loaded_gemini_data = [json.loads(example_batch_prediction_output)]
1065+
result_dataset = self.converter.convert(loaded_gemini_data)
1066+
assert (
1067+
result_dataset.eval_cases[0].eval_case_metadata.model
1068+
== "gemini-2.0-flash-001@default"
1069+
)

0 commit comments

Comments
 (0)