google · ishanrajsingh · Dec 5, 2025 · Dec 5, 2025 · Dec 5, 2025 · Dec 5, 2025
diff --git a/src/google/adk/cli/cli_tools_click.py b/src/google/adk/cli/cli_tools_click.py
@@ -24,8 +24,13 @@
 import os
 from pathlib import Path
 import tempfile
-import textwrap
 from typing import Optional
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+  from ..apps.app import App
+
+import textwrap
 
 import click
 from click.core import ParameterSource
@@ -36,6 +41,7 @@
 from . import cli_deploy
 from .. import version
 from ..evaluation.constants import MISSING_EVAL_DEPENDENCIES_MESSAGE
+from ..utils.app_loader import load_app_from_module
 from ..features import FeatureName
 from ..features import override_feature_enabled
 from .cli import run_cli
@@ -823,6 +829,13 @@ def cli_eval(
   )
 
   try:
+    # Try to load App if available (for plugin support like ReflectAndRetryToolPlugin)
+    app = load_app_from_module(agent_module_file_path)
+
+    if app:
+      logger.info("Using App instance for evaluation (plugins will be applied)")
+    else:
+      logger.info("No App found, using root_agent directly")
     metric_evaluator_registry = DEFAULT_METRIC_EVALUATOR_REGISTRY
     if eval_config.custom_metrics:
       for (
@@ -845,6 +858,7 @@ def cli_eval(
         root_agent=root_agent,
         eval_sets_manager=eval_sets_manager,
         eval_set_results_manager=eval_set_results_manager,
+        app=app,  # NEW: Pass app if available
         user_simulator_provider=user_simulator_provider,
         metric_evaluator_registry=metric_evaluator_registry,
     )

diff --git a/src/google/adk/evaluation/app_inference_adapter.py b/src/google/adk/evaluation/app_inference_adapter.py
@@ -0,0 +1,92 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import annotations
+
+from typing import Optional
+from typing import TYPE_CHECKING
+
+from ..artifacts.base_artifact_service import BaseArtifactService
+from ..memory.base_memory_service import BaseMemoryService
+from ..runners import Runner
+from ..sessions.base_session_service import BaseSessionService
+from ._retry_options_utils import EnsureRetryOptionsPlugin
+from .request_intercepter_plugin import _RequestIntercepterPlugin
+from .simulation.user_simulator import UserSimulator
+
+if TYPE_CHECKING:
+  from .eval_case import SessionInput
+
+
+class AppInferenceAdapter:
+  """Adapter to generate inferences from App without importing cli.*"""
+
+  @staticmethod
+  async def generate_inferences_from_app(
+      app,
+      user_simulator: UserSimulator,
+      initial_session: Optional["SessionInput"],
+      session_id: str,
+      session_service: BaseSessionService,
+      artifact_service: BaseArtifactService,
+      memory_service: BaseMemoryService,
+  ):
+    """Shared app inference logic extracted from EvaluationGenerator."""
+
+    user_id = initial_session.user_id if initial_session else "test_user_id"
+    app_name = initial_session.app_name if initial_session else app.name
+
+    # Create session
+    await session_service.create_session(
+        app_name=app_name,
+        user_id=user_id,
+        session_id=session_id,
+        state=initial_session.state if initial_session else {},
+    )
+
+    # Add evaluation-required plugins
+    request_intercepter_plugin = _RequestIntercepterPlugin(
+        name="request_intercepter_plugin"
+    )
+    ensure_retry_options_plugin = EnsureRetryOptionsPlugin(
+        name="ensure_retry_options"
+    )
+
+    # Duplicate app safely
+    app_for_runner = app.model_copy(deep=True)
+
+    plugin_names = {p.name for p in app_for_runner.plugins}
+    if request_intercepter_plugin.name not in plugin_names:
+      app_for_runner.plugins.append(request_intercepter_plugin)
+    if ensure_retry_options_plugin.name not in plugin_names:
+      app_for_runner.plugins.append(ensure_retry_options_plugin)
+
+    # Run simulation loop via runner
+    async with Runner(
+        app=app_for_runner,
+        session_service=session_service,
+        artifact_service=artifact_service,
+        memory_service=memory_service,
+    ) as runner:
+
+      # Reuse existing eval user simulation loop
+      from .evaluation_generator import EvaluationGenerator
+
+      return await EvaluationGenerator._run_user_simulation_loop(
+          runner=runner,
+          user_id=user_id,
+          session_id=session_id,
+          user_simulator=user_simulator,
+          request_intercepter_plugin=request_intercepter_plugin,
+      )
diff --git a/src/google/adk/evaluation/evaluation_generator.py b/src/google/adk/evaluation/evaluation_generator.py
@@ -19,6 +19,11 @@
 from typing import Any
 from typing import AsyncGenerator
 from typing import Optional
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+  from ..apps.app import App
+
 import uuid
 
 from google.genai.types import Content
@@ -39,6 +44,7 @@
 from .app_details import AgentDetails
 from .app_details import AppDetails
 from .eval_case import EvalCase
+from .eval_case import IntermediateData
 from .eval_case import Invocation
 from .eval_case import InvocationEvent
 from .eval_case import InvocationEvents
@@ -156,6 +162,55 @@ async def _process_query(
         initial_session=initial_session,
     )
 
+  @staticmethod
+  async def _run_user_simulation_loop(
+      runner: Runner,
+      user_id: str,
+      session_id: str,
+      user_simulator: UserSimulator,
+      request_intercepter_plugin: _RequestIntercepterPlugin,
+  ) -> list[Invocation]:
+    """Run the user simulation loop and return invocations.
+
+    Args:
+        runner: Configured Runner instance
+        user_id: User identifier
+        session_id: Session identifier
+        user_simulator: User simulator to generate messages
+        request_intercepter_plugin: Plugin to intercept requests for app_details
+
+    Returns:
+        List of Invocation objects from the simulation
+    """
+    events = []
+
+    # Loop through user simulator messages (handles both static and dynamic)
+    while True:
+      next_user_message = await user_simulator.get_next_user_message(
+          copy.deepcopy(events)
+      )
+      if next_user_message.status == UserSimulatorStatus.SUCCESS:
+        async for (
+            event
+        ) in EvaluationGenerator._generate_inferences_for_single_user_invocation(
+            runner, user_id, session_id, next_user_message.user_message
+        ):
+          events.append(event)
+      else:  # no more messages
+        break
+
+    # Extract app details from intercepted requests
+    app_details_by_invocation_id = (
+        EvaluationGenerator._get_app_details_by_invocation_id(
+            events, request_intercepter_plugin
+        )
+    )
+
+    # Convert events to invocations
+    return EvaluationGenerator.convert_events_to_eval_invocations(
+        events, app_details_by_invocation_id
+    )
+
   @staticmethod
   async def _generate_inferences_for_single_user_invocation(
       runner: Runner,
@@ -240,28 +295,12 @@ async def _generate_inferences_from_root_agent(
         memory_service=memory_service,
         plugins=[request_intercepter_plugin, ensure_retry_options_plugin],
     ) as runner:
-      events = []
-      while True:
-        next_user_message = await user_simulator.get_next_user_message(
-            copy.deepcopy(events)
-        )
-        if next_user_message.status == UserSimulatorStatus.SUCCESS:
-          async for (
-              event
-          ) in EvaluationGenerator._generate_inferences_for_single_user_invocation(
-              runner, user_id, session_id, next_user_message.user_message
-          ):
-            events.append(event)
-        else:  # no message generated
-          break
-
-      app_details_by_invocation_id = (
-          EvaluationGenerator._get_app_details_by_invocation_id(
-              events, request_intercepter_plugin
-          )
-      )
-      return EvaluationGenerator.convert_events_to_eval_invocations(
-          events, app_details_by_invocation_id
+      return await EvaluationGenerator._run_user_simulation_loop(
+          runner=runner,
+          user_id=user_id,
+          session_id=session_id,
+          user_simulator=user_simulator,
+          request_intercepter_plugin=request_intercepter_plugin,
       )
 
   @staticmethod
@@ -326,6 +365,62 @@ def convert_events_to_eval_invocations(
 
     return invocations
 
+  @staticmethod
+  async def _generate_inferences_from_app(
+      app: "App",
+      user_simulator: "UserSimulator",
+      initial_session: Optional["SessionInput"],
+      session_id: str,
+      session_service: "BaseSessionService",
+      artifact_service: "BaseArtifactService",
+      memory_service: "BaseMemoryService",
+  ) -> list["Invocation"]:
+    """Generate inferences by invoking through App (preserving plugins)."""
+
+    # Determine user_id consistently
+    user_id = initial_session.user_id if initial_session else "test_user_id"
+
+    # Initialize session
+    app_name = initial_session.app_name if initial_session else app.name
+    await session_service.create_session(
+        app_name=app_name,
+        user_id=user_id,
+        session_id=session_id,
+        state=initial_session.state if initial_session else {},
+    )
+
+    # Create plugins to track requests (needed for app_details)
+    request_intercepter_plugin = _RequestIntercepterPlugin(
+        name="request_intercepter_plugin"
+    )
+    ensure_retry_options_plugin = EnsureRetryOptionsPlugin(
+        name="ensure_retry_options"
+    )
+
+    # Create a copy of the app to avoid mutating the original object and add eval-specific plugins.
+    app_for_runner = app.model_copy(deep=True)
+    # Add eval-specific plugins, ensuring no duplicates.
+    existing_plugin_names = {p.name for p in app_for_runner.plugins}
+    if request_intercepter_plugin.name not in existing_plugin_names:
+      app_for_runner.plugins.append(request_intercepter_plugin)
+    if ensure_retry_options_plugin.name not in existing_plugin_names:
+      app_for_runner.plugins.append(ensure_retry_options_plugin)
+
+    # Create Runner with the modified App to preserve plugins
+    async with Runner(
+        app=app_for_runner,
+        session_service=session_service,
+        artifact_service=artifact_service,
+        memory_service=memory_service,
+    ) as runner:
+      return await EvaluationGenerator._run_user_simulation_loop(
+          runner=runner,
+          user_id=user_id,
+          session_id=session_id,
+          user_simulator=user_simulator,
+          request_intercepter_plugin=request_intercepter_plugin,
+      )
+
   @staticmethod
   def _get_app_details_by_invocation_id(
       events: list[Event], request_intercepter: _RequestIntercepterPlugin