prefix: Add new GenAI Eval converter

vertex-sdk-bot · copybara-github · commit f43e34fcf344 · 2025-07-11T07:54:14.000-07:00
PiperOrigin-RevId: 781578088
diff --git a/vertexai/_genai/_evals_data_converters.py b/vertexai/_genai/_evals_data_converters.py
@@ -35,6 +35,7 @@ class EvalDatasetSchema(_common.CaseInSensitiveEnum):
     GEMINI = "gemini"
     FLATTEN = "flatten"
     OPENAI = "openai"
+    OBSERVABILITY = "observability"
     UNKNOWN = "unknown"
 
 
@@ -442,6 +443,179 @@ def convert(self, raw_data: list[dict[str, Any]]) -> types.EvaluationDataset:
         return types.EvaluationDataset(eval_cases=eval_cases)
 
 
+class _ObservabilityDataConverter(_EvalDataConverter):
+    """Converter for dataset in GCP Observability GenAI format."""
+
+    def _message_to_content(self, message: dict[str, Any]) -> genai_types.Content:
+        """Converts Obs GenAI message format to Content."""
+        parts = []
+        message_parts = message.get("parts", [])
+        if isinstance(message_parts, list):
+            for message_part in message_parts:
+                part = None
+                part_type = message_part.get("type", "")
+                match part_type:
+                    case "text":
+                        part = genai_types.Part(
+                            text=message_part.get("content", "")
+                        )
+
+                    case "blob":
+                        part = genai_types.Part(inline_data=genai_types.Blob(
+                            data=message_part.get("data", ""),
+                            mime_type=message_part.get("mime_type", "")
+                        ))
+
+                    case "file_data":
+                        part = genai_types.Part(file_data=genai_types.FileData(
+                            file_uri=message_part.get("file_uri", ""),
+                            mime_type=message_part.get("mime_type", "")
+                        ))
+
+                    case "tool_call":
+                        part = genai_types.Part(
+                            function_call=genai_types.FunctionCall(
+                                id=message_part.get("id", ""),
+                                name=message_part.get("name", ""),
+                                args=message_part.get("arguments", {})
+                            )
+                        )
+
+                    case "tool_call_response":
+                        part = genai_types.Part(
+                            function_response=genai_types.FunctionResponse(
+                                id=message_part.get("id", ""),
+                                name=message_part.get("name", ""),
+                                response=message_part.get("result", {})
+                            )
+                        )
+
+                    case _:
+                        logger.warning(
+                            "Unrecgonized message part type of '%s' found."
+                            "Skipping part.",
+                            part_type
+                        )
+
+                if part is not None:
+                    parts.append(part)
+
+        return genai_types.Content(
+            parts=parts,
+            role=message.get("role", "")
+        )
+
+    def _parse_messages(
+        self,
+        eval_case_id: str,
+        input_dict: dict[str, Any],
+        output_dict: dict[str, Any],
+        system_dict: Optional[dict[str, Any]] = None
+    ) -> types.EvalCase:
+        """Parses a set of messages into an EvalCase."""
+
+        # System message
+        system_instruction = None
+        if system_dict is not None:
+            system_msgs = system_dict.get("messages", [])
+            if system_msgs:
+                system_instruction = self._message_to_content(system_msgs[0])
+
+        # Input message
+        prompt = None
+        conversation_history = []
+        input_msgs = input_dict.get("messages", [])
+        if input_msgs:
+            # Extract latest message as prompt
+            prompt = self._message_to_content(input_msgs[-1])
+
+            # All previous messages are history
+            if len(input_msgs) > 1:
+                for turn_id, msg in enumerate(input_msgs[:-1]):
+                    conversation_history.append(types.Message(
+                        turn_id=str(turn_id),
+                        content=self._message_to_content(msg),
+                        author=msg.get("role", "")
+                    ))
+
+        # Output message
+        responses = []
+        output_choices = output_dict.get("choices", [])
+        for choice in output_choices:
+            response = types.ResponseCandidate(
+                response=self._message_to_content(choice.get("message", {}))
+            )
+            responses.append(response)
+
+        return types.EvalCase(
+            eval_case_id=eval_case_id,
+            prompt=prompt,
+            responses=responses,
+            system_instruction=system_instruction,
+            conversation_history=conversation_history,
+            reference=None
+        )
+
+    def _load_raw_data(self, data: Any, case_index: int) -> dict[Any, str]:
+        """Loads raw data into dict if possible."""
+        if isinstance(data, str):
+            try:
+                loaded_json = json.loads(data)
+                if isinstance(loaded_json, dict):
+                    return loaded_json
+                else:
+                    logger.warning(
+                        "Decoded response JSON is not a dictionary for case"
+                        " %s. Type: %s",
+                        case_index,
+                        type(loaded_json),
+                    )
+            except json.JSONDecodeError:
+                logger.warning(
+                    "Could not decode response JSON string for case %s."
+                    " Treating as empty response.",
+                    case_index,
+                )
+        elif isinstance(data, dict):
+            return data
+
+    @override
+    def convert(self, raw_data: list[dict[str, Any]]) -> types.EvaluationDataset:
+        """Converts a list of GCP Observability GenAI data into an EvaluationDataset."""
+        eval_cases = []
+
+        for i, item in enumerate(raw_data):
+            eval_case_id = f"observability_eval_case_{i}"
+
+            if "input" not in item or "output" not in item:
+                logger.warning(
+                    "Skipping case %s due to missing 'input' or 'output' key.",
+                    i
+                )
+                continue
+
+            input_data = item.get("input", {})
+            input_dict = self._load_raw_data(input_data, i)
+
+            output_data = item.get("output", {})
+            output_dict = self._load_raw_data(output_data, i)
+
+            system_dict = None
+            if "system" in item:
+                system_data = item.get("system", {})
+                system_dict = self._load_raw_data(system_data, i)
+
+            eval_case = self._parse_messages(
+                eval_case_id,
+                input_dict,
+                output_dict,
+                system_dict
+            )
+            eval_cases.append(eval_case)
+
+        return types.EvaluationDataset(eval_cases=eval_cases)
+
+
 def auto_detect_dataset_schema(
     raw_dataset: list[dict[str, Any]],
 ) -> Union[EvalDatasetSchema, str]:
@@ -476,6 +650,11 @@ def auto_detect_dataset_schema(
                 if "role" in messages_list[0] and "content" in messages_list[0]:
                     return EvalDatasetSchema.OPENAI
 
+    if "format" in keys:
+        format_content = first_item.get("format", "")
+        if isinstance(format_content, str) and format_content == "observability":
+            return EvalDatasetSchema.OBSERVABILITY
+
     if {"prompt", "response"}.issubset(keys) or {
         "response",
         "reference",
@@ -489,6 +668,7 @@ def auto_detect_dataset_schema(
     EvalDatasetSchema.GEMINI: _GeminiEvalDataConverter,
     EvalDatasetSchema.FLATTEN: _FlattenEvalDataConverter,
     EvalDatasetSchema.OPENAI: _OpenAIDataConverter,
+    EvalDatasetSchema.OBSERVABILITY: _ObservabilityDataConverter,
 }
 
 
diff --git a/vertexai/_genai/_evals_visualization.py b/vertexai/_genai/_evals_visualization.py
@@ -490,9 +490,18 @@ def display_evaluation_result(
             processed_df = _preprocess_df_for_json(single_dataset.eval_dataset_df)
             if processed_df is not None:
                 for _, row in processed_df.iterrows():
-                    prompt_key = "request" if "request" in row else "prompt"
+                    prompt_key = "prompt"
+                    if "request" in row:
+                        prompt_key = "request"
+                    elif "input" in row:
+                        prompt_key = "input"
+
+                    response_key = "response"
+                    if "output" in row:
+                        response_key = "output"
+
                     prompt_info = _extract_text_and_raw_json(row.get(prompt_key))
-                    response_info = _extract_text_and_raw_json(row.get("response"))
+                    response_info = _extract_text_and_raw_json(row.get(response_key))
                     processed_row = {
                         "prompt_display_text": prompt_info["display_text"],
                         "prompt_raw_json": prompt_info["raw_json"],
diff --git a/vertexai/_genai/types.py b/vertexai/_genai/types.py
@@ -6395,6 +6395,99 @@ def _check_pandas_installed(cls, data: Any) -> Any:
                 )
         return data
 
+    @classmethod
+    def load_from_sources(
+        cls,
+        input_source: str,
+        output_source: str,
+        system_source: Optional[str] = None,
+        client: Optional[Any] = None,
+    ) -> "EvaluationDataset":
+        if (
+            not input_source.startswith("gs://")
+            or not output_source.startswith("gs://")
+            or (
+                system_source is not None
+                and not system_source.startswith("gs://")
+            )
+        ):
+            raise TypeError("Only GCS sources are supported.")
+
+        try:
+            from google.cloud import storage
+
+            storage_client = storage.Client(
+                credentials=client._api_client._credentials if client else None
+            )
+
+            # Input source
+            try:
+                path_without_prefix = input_source[len("gs://") :]
+                bucket_name, blob_path = path_without_prefix.split("/", 1)
+
+                bucket = storage_client.bucket(bucket_name)
+                blob = bucket.blob(blob_path)
+
+                input_str = blob.download_as_bytes().decode("utf-8")
+            except Exception as e:
+                raise IOError(
+                    f"Failed to read from GCS path {input_source}: {e}"
+                ) from e
+
+            # Output source
+            try:
+                path_without_prefix = output_source[len("gs://") :]
+                bucket_name, blob_path = path_without_prefix.split("/", 1)
+
+                bucket = storage_client.bucket(bucket_name)
+                blob = bucket.blob(blob_path)
+
+                output_str = blob.download_as_bytes().decode("utf-8")
+            except Exception as e:
+                raise IOError(
+                    f"Failed to read from GCS path {output_source}: {e}"
+                ) from e
+
+            # System source
+            system_str = ""
+            if system_source is not None:
+                try:
+                    path_without_prefix = system_source[len("gs://") :]
+                    bucket_name, blob_path = path_without_prefix.split("/", 1)
+
+                    bucket = storage_client.bucket(bucket_name)
+                    blob = bucket.blob(blob_path)
+
+                    system_str = blob.download_as_bytes().decode("utf-8")
+                except Exception as e:
+                    raise IOError(
+                        f"Failed to read from GCS path {system_str}: {e}"
+                    ) from e
+
+        except ImportError as e:
+            raise ImportError(
+                "Reading from GCS requires the 'google-cloud-storage'"
+                " library. Please install it with 'pip install"
+                " google-cloud-aiplatform[evaluation]'."
+            ) from e
+
+        try:
+            import pandas as pd
+
+            eval_dataset_df = pd.DataFrame(
+                {
+                    "format": ["observability"],
+                    "input": [input_str],
+                    "output": [output_str],
+                    "system": [system_str],
+                }
+            )
+
+        except ImportError as e:
+            raise ImportError("Pandas DataFrame library is required.") from e
+
+        return EvaluationDataset(eval_dataset_df=eval_dataset_df)
+
     def show(self) -> None:
         """Shows the evaluation dataset."""
         from . import _evals_visualization