From 8d00362998e9f68e06b68efe9f4acc56efac20b1 Mon Sep 17 00:00:00 2001
From: ftnext <takuyafjp+develop@gmail.com>
Date: Sun, 1 Feb 2026 10:59:29 +0900
Subject: [PATCH 1/4] feat(eval): Support custom metrics in AgentEvaluator

---
 src/google/adk/evaluation/agent_evaluator.py  |  37 +++++
 .../evaluation/test_agent_evaluator.py        | 137 ++++++++++++++++++
 2 files changed, 174 insertions(+)
 create mode 100644 tests/unittests/evaluation/test_agent_evaluator.py

diff --git a/src/google/adk/evaluation/agent_evaluator.py b/src/google/adk/evaluation/agent_evaluator.py
index c0fc736340..ae6323502d 100644
--- a/src/google/adk/evaluation/agent_evaluator.py
+++ b/src/google/adk/evaluation/agent_evaluator.py
@@ -43,6 +43,9 @@
 from .eval_metrics import BaseCriterion
 from .eval_metrics import EvalMetric
 from .eval_metrics import EvalMetricResult
+from .eval_metrics import Interval
+from .eval_metrics import MetricInfo
+from .eval_metrics import MetricValueInfo
 from .eval_metrics import PrebuiltMetrics
 from .eval_result import EvalCaseResult
 from .eval_set import EvalSet
@@ -50,6 +53,8 @@
 from .evaluator import EvalStatus
 from .in_memory_eval_sets_manager import InMemoryEvalSetsManager
 from .local_eval_sets_manager import convert_eval_set_to_pydantic_schema
+from .metric_evaluator_registry import _get_default_metric_evaluator_registry
+from .metric_evaluator_registry import MetricEvaluatorRegistry
 from .simulation.user_simulator_provider import UserSimulatorProvider
 
 logger = logging.getLogger("google_adk." + __name__)
@@ -82,6 +87,19 @@ def load_json(file_path: str) -> Union[Dict, List]:
     return json.load(f)
 
 
+def _get_default_metric_info(
+    metric_name: str, description: str = ""
+) -> MetricInfo:
+  """Returns a default MetricInfo for a metric."""
+  return MetricInfo(
+      metric_name=metric_name,
+      description=description,
+      metric_value_info=MetricValueInfo(
+          interval=Interval(min_value=0.0, max_value=1.0)
+      ),
+  )
+
+
 class _EvalMetricResultWithInvocation(BaseModel):
   """EvalMetricResult along with both actual and expected invocation.
 
@@ -154,6 +172,22 @@ async def evaluate_eval_set(
         user_simulator_config=eval_config.user_simulator_config
     )
 
+    metric_evaluator_registry = _get_default_metric_evaluator_registry()
+    if eval_config.custom_metrics:
+      from .custom_metric_evaluator import _CustomMetricEvaluator
+
+      for metric_name, config in eval_config.custom_metrics.items():
+        if config.metric_info:
+          metric_info = config.metric_info.model_copy()
+          metric_info.metric_name = metric_name
+        else:
+          metric_info = _get_default_metric_info(
+              metric_name=metric_name, description=config.description
+          )
+        metric_evaluator_registry.register_evaluator(
+            metric_info, _CustomMetricEvaluator
+        )
+
     # Step 1: Perform evals, basically inferencing and evaluation of metrics
     eval_results_by_eval_id = await AgentEvaluator._get_eval_results_by_eval_id(
         agent_for_eval=agent_for_eval,
@@ -161,6 +195,7 @@ async def evaluate_eval_set(
         eval_metrics=eval_metrics,
         num_runs=num_runs,
         user_simulator_provider=user_simulator_provider,
+        metric_evaluator_registry=metric_evaluator_registry,
     )
 
     # Step 2: Post-process the results!
@@ -536,6 +571,7 @@ async def _get_eval_results_by_eval_id(
       eval_metrics: list[EvalMetric],
       num_runs: int,
       user_simulator_provider: UserSimulatorProvider,
+      metric_evaluator_registry: Optional[MetricEvaluatorRegistry] = None,
   ) -> dict[str, list[EvalCaseResult]]:
     """Returns EvalCaseResults grouped by eval case id.
 
@@ -560,6 +596,7 @@ async def _get_eval_results_by_eval_id(
             app_name=app_name, eval_set=eval_set
         ),
         user_simulator_provider=user_simulator_provider,
+        metric_evaluator_registry=metric_evaluator_registry,
     )
 
     inference_requests = [
diff --git a/tests/unittests/evaluation/test_agent_evaluator.py b/tests/unittests/evaluation/test_agent_evaluator.py
new file mode 100644
index 0000000000..7bfb99469d
--- /dev/null
+++ b/tests/unittests/evaluation/test_agent_evaluator.py
@@ -0,0 +1,137 @@
+# Copyright 2026 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import annotations
+
+from google.adk.errors.not_found_error import NotFoundError
+from google.adk.evaluation.agent_evaluator import AgentEvaluator
+from google.adk.evaluation.custom_metric_evaluator import _CustomMetricEvaluator
+from google.adk.evaluation.eval_config import EvalConfig
+from google.adk.evaluation.eval_metrics import BaseCriterion
+from google.adk.evaluation.eval_metrics import EvalMetric
+from google.adk.evaluation.eval_set import EvalSet
+from google.adk.evaluation.metric_evaluator_registry import MetricEvaluatorRegistry
+import pytest
+
+
+@pytest.fixture(autouse=True)
+def restore_metric_registry():
+  original_registry = MetricEvaluatorRegistry._registry.copy()
+  yield
+  MetricEvaluatorRegistry._registry = original_registry
+
+
+def fake_custom_metric(*_args, **_kwargs):
+  return None
+
+
+@pytest.mark.asyncio
+async def test_evaluate_eval_set_registers_custom_metric(monkeypatch):
+  eval_config = EvalConfig(
+      criteria={"my_custom_metric": 0.5},
+      custom_metrics={
+          "my_custom_metric": {
+              "code_config": {
+                  "name": (
+                      "tests.unittests.evaluation."
+                      "test_agent_evaluator.fake_custom_metric"
+                  ),
+              },
+          },
+      },
+  )
+  eval_set = EvalSet(
+      eval_set_id="eval_set",
+      name="eval_set",
+      eval_cases=[],
+  )
+
+  async def fake_get_agent_for_eval(*_args, **_kwargs):
+    return object()
+
+  async def fake_get_eval_results_by_eval_id(
+      *_args, metric_evaluator_registry, **_kwargs
+  ):
+    eval_metric = EvalMetric(
+        metric_name="my_custom_metric",
+        threshold=0.5,
+        criterion=BaseCriterion(threshold=0.5),
+        custom_function_path=(
+            "tests.unittests.evaluation.test_agent_evaluator.fake_custom_metric"
+        ),
+    )
+    evaluator = metric_evaluator_registry.get_evaluator(eval_metric)
+    assert isinstance(evaluator, _CustomMetricEvaluator)
+    return {}
+
+  monkeypatch.setattr(
+      AgentEvaluator, "_get_agent_for_eval", fake_get_agent_for_eval
+  )
+  monkeypatch.setattr(
+      AgentEvaluator,
+      "_get_eval_results_by_eval_id",
+      fake_get_eval_results_by_eval_id,
+  )
+
+  await AgentEvaluator.evaluate_eval_set(
+      agent_module="dummy.module",
+      eval_set=eval_set,
+      eval_config=eval_config,
+      num_runs=1,
+      print_detailed_results=False,
+  )
+
+
+@pytest.mark.asyncio
+async def test_evaluate_eval_set_does_not_register_without_custom_metrics(
+    monkeypatch,
+):
+  eval_config = EvalConfig(criteria={"response_match_score": 0.8})
+  eval_set = EvalSet(
+      eval_set_id="eval_set",
+      name="eval_set",
+      eval_cases=[],
+  )
+
+  async def fake_get_agent_for_eval(*_args, **_kwargs):
+    return object()
+
+  async def fake_get_eval_results_by_eval_id(
+      *_args, metric_evaluator_registry, **_kwargs
+  ):
+    eval_metric = EvalMetric(
+        metric_name="my_custom_metric",
+        threshold=0.5,
+        criterion=BaseCriterion(threshold=0.5),
+    )
+    with pytest.raises(NotFoundError):
+      metric_evaluator_registry.get_evaluator(eval_metric)
+    return {}
+
+  monkeypatch.setattr(
+      AgentEvaluator, "_get_agent_for_eval", fake_get_agent_for_eval
+  )
+  monkeypatch.setattr(
+      AgentEvaluator,
+      "_get_eval_results_by_eval_id",
+      fake_get_eval_results_by_eval_id,
+  )
+
+  await AgentEvaluator.evaluate_eval_set(
+      agent_module="dummy.module",
+      eval_set=eval_set,
+      eval_config=eval_config,
+      num_runs=1,
+      print_detailed_results=False,
+  )

From 944633288c93ca26b0b79c179072904b904fbca8 Mon Sep 17 00:00:00 2001
From: ftnext <takuyafjp+develop@gmail.com>
Date: Sun, 1 Feb 2026 11:17:31 +0900
Subject: [PATCH 2/4] refactor(eval): Extract default metric info helper

---
 src/google/adk/cli/cli_eval.py                | 17 +---------
 src/google/adk/evaluation/agent_evaluator.py  | 19 ++---------
 src/google/adk/evaluation/metric_defaults.py  | 32 +++++++++++++++++++
 .../evaluation/test_metric_info_utils.py      | 26 +++++++++++++++
 4 files changed, 61 insertions(+), 33 deletions(-)
 create mode 100644 src/google/adk/evaluation/metric_defaults.py
 create mode 100644 tests/unittests/evaluation/test_metric_info_utils.py

diff --git a/src/google/adk/cli/cli_eval.py b/src/google/adk/cli/cli_eval.py
index 33c1693208..571d95c5fb 100644
--- a/src/google/adk/cli/cli_eval.py
+++ b/src/google/adk/cli/cli_eval.py
@@ -34,11 +34,9 @@
 from ..evaluation.eval_case import get_all_tool_calls
 from ..evaluation.eval_case import IntermediateDataType
 from ..evaluation.eval_metrics import EvalMetric
-from ..evaluation.eval_metrics import Interval
-from ..evaluation.eval_metrics import MetricInfo
-from ..evaluation.eval_metrics import MetricValueInfo
 from ..evaluation.eval_result import EvalCaseResult
 from ..evaluation.eval_sets_manager import EvalSetsManager
+from ..evaluation.metric_defaults import get_default_metric_info
 from ..utils.context_utils import Aclosing
 
 logger = logging.getLogger("google_adk." + __name__)
@@ -73,19 +71,6 @@ def _get_agent_module(agent_module_file_path: str):
   return _import_from_path(module_name, file_path)
 
 
-def get_default_metric_info(
-    metric_name: str, description: str = ""
-) -> MetricInfo:
-  """Returns a default MetricInfo for a metric."""
-  return MetricInfo(
-      metric_name=metric_name,
-      description=description,
-      metric_value_info=MetricValueInfo(
-          interval=Interval(min_value=0.0, max_value=1.0)
-      ),
-  )
-
-
 def get_root_agent(agent_module_file_path: str) -> Agent:
   """Returns root agent given the agent module."""
   agent_module = _get_agent_module(agent_module_file_path)
diff --git a/src/google/adk/evaluation/agent_evaluator.py b/src/google/adk/evaluation/agent_evaluator.py
index ae6323502d..9cad502b5a 100644
--- a/src/google/adk/evaluation/agent_evaluator.py
+++ b/src/google/adk/evaluation/agent_evaluator.py
@@ -43,9 +43,6 @@
 from .eval_metrics import BaseCriterion
 from .eval_metrics import EvalMetric
 from .eval_metrics import EvalMetricResult
-from .eval_metrics import Interval
-from .eval_metrics import MetricInfo
-from .eval_metrics import MetricValueInfo
 from .eval_metrics import PrebuiltMetrics
 from .eval_result import EvalCaseResult
 from .eval_set import EvalSet
@@ -53,6 +50,7 @@
 from .evaluator import EvalStatus
 from .in_memory_eval_sets_manager import InMemoryEvalSetsManager
 from .local_eval_sets_manager import convert_eval_set_to_pydantic_schema
+from .metric_defaults import get_default_metric_info
 from .metric_evaluator_registry import _get_default_metric_evaluator_registry
 from .metric_evaluator_registry import MetricEvaluatorRegistry
 from .simulation.user_simulator_provider import UserSimulatorProvider
@@ -87,19 +85,6 @@ def load_json(file_path: str) -> Union[Dict, List]:
     return json.load(f)
 
 
-def _get_default_metric_info(
-    metric_name: str, description: str = ""
-) -> MetricInfo:
-  """Returns a default MetricInfo for a metric."""
-  return MetricInfo(
-      metric_name=metric_name,
-      description=description,
-      metric_value_info=MetricValueInfo(
-          interval=Interval(min_value=0.0, max_value=1.0)
-      ),
-  )
-
-
 class _EvalMetricResultWithInvocation(BaseModel):
   """EvalMetricResult along with both actual and expected invocation.
 
@@ -181,7 +166,7 @@ async def evaluate_eval_set(
           metric_info = config.metric_info.model_copy()
           metric_info.metric_name = metric_name
         else:
-          metric_info = _get_default_metric_info(
+          metric_info = get_default_metric_info(
               metric_name=metric_name, description=config.description
           )
         metric_evaluator_registry.register_evaluator(
diff --git a/src/google/adk/evaluation/metric_defaults.py b/src/google/adk/evaluation/metric_defaults.py
new file mode 100644
index 0000000000..acbaedca17
--- /dev/null
+++ b/src/google/adk/evaluation/metric_defaults.py
@@ -0,0 +1,32 @@
+# Copyright 2026 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import annotations
+
+from .eval_metrics import Interval
+from .eval_metrics import MetricInfo
+from .eval_metrics import MetricValueInfo
+
+
+def get_default_metric_info(
+    metric_name: str, description: str = ""
+) -> MetricInfo:
+  """Returns a default MetricInfo for a metric."""
+  return MetricInfo(
+      metric_name=metric_name,
+      description=description,
+      metric_value_info=MetricValueInfo(
+          interval=Interval(min_value=0.0, max_value=1.0)
+      ),
+  )
diff --git a/tests/unittests/evaluation/test_metric_info_utils.py b/tests/unittests/evaluation/test_metric_info_utils.py
new file mode 100644
index 0000000000..b8d269d43b
--- /dev/null
+++ b/tests/unittests/evaluation/test_metric_info_utils.py
@@ -0,0 +1,26 @@
+# Copyright 2026 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import annotations
+
+from google.adk.evaluation.metric_defaults import get_default_metric_info
+
+
+def test_get_default_metric_info():
+  metric_info = get_default_metric_info("my_metric", "test description")
+
+  assert metric_info.metric_name == "my_metric"
+  assert metric_info.description == "test description"
+  assert metric_info.metric_value_info.interval.min_value == 0.0
+  assert metric_info.metric_value_info.interval.max_value == 1.0

From 2985224393f426dac49a073d6016a7cdd5c2b8e7 Mon Sep 17 00:00:00 2001
From: ftnext <takuyafjp+develop@gmail.com>
Date: Sun, 1 Feb 2026 12:14:07 +0900
Subject: [PATCH 3/4] test(integration): Add custom metric example eval

---
 .../test_files/custom_metrics/metrics.py      | 69 +++++++++++++++++++
 .../simple_custom_metric.test.json            | 65 +++++++++++++++++
 .../custom_metrics/test_config.json           | 13 ++++
 tests/integration/test_with_test_file.py      | 12 ++++
 4 files changed, 159 insertions(+)
 create mode 100644 tests/integration/fixture/home_automation_agent/test_files/custom_metrics/metrics.py
 create mode 100644 tests/integration/fixture/home_automation_agent/test_files/custom_metrics/simple_custom_metric.test.json
 create mode 100644 tests/integration/fixture/home_automation_agent/test_files/custom_metrics/test_config.json

diff --git a/tests/integration/fixture/home_automation_agent/test_files/custom_metrics/metrics.py b/tests/integration/fixture/home_automation_agent/test_files/custom_metrics/metrics.py
new file mode 100644
index 0000000000..2ecbf8bd5e
--- /dev/null
+++ b/tests/integration/fixture/home_automation_agent/test_files/custom_metrics/metrics.py
@@ -0,0 +1,69 @@
+# Copyright 2026 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import annotations
+
+from typing import Optional
+
+from google.adk.evaluation.eval_case import ConversationScenario
+from google.adk.evaluation.eval_case import get_all_tool_calls
+from google.adk.evaluation.eval_case import Invocation
+from google.adk.evaluation.eval_metrics import EvalMetric
+from google.adk.evaluation.eval_metrics import EvalStatus
+from google.adk.evaluation.evaluator import EvaluationResult
+from google.adk.evaluation.evaluator import PerInvocationResult
+
+
+def tool_trajectory_length_match(
+    eval_metric: EvalMetric,
+    actual_invocations: list[Invocation],
+    expected_invocations: Optional[list[Invocation]] = None,
+    conversation_scenario: Optional[ConversationScenario] = None,
+) -> EvaluationResult:
+  del eval_metric
+  del conversation_scenario
+  expected_invocations = expected_invocations or []
+
+  per_invocation_results = []
+  for idx, actual in enumerate(actual_invocations):
+    expected = (
+        expected_invocations[idx] if idx < len(expected_invocations) else None
+    )
+    actual_tools = get_all_tool_calls(actual.intermediate_data)
+    expected_tools = (
+        get_all_tool_calls(expected.intermediate_data) if expected else []
+    )
+    match = len(actual_tools) == len(expected_tools)
+    per_invocation_results.append(
+        PerInvocationResult(
+            actual_invocation=actual,
+            expected_invocation=expected,
+            score=1.0 if match else 0.0,
+            eval_status=EvalStatus.PASSED if match else EvalStatus.FAILED,
+        )
+    )
+
+  overall_score = (
+      sum(r.score for r in per_invocation_results) / len(per_invocation_results)
+      if per_invocation_results
+      else 0.0
+  )
+  overall_eval_status = (
+      EvalStatus.PASSED if overall_score == 1.0 else EvalStatus.FAILED
+  )
+  return EvaluationResult(
+      overall_score=overall_score,
+      overall_eval_status=overall_eval_status,
+      per_invocation_results=per_invocation_results,
+  )
diff --git a/tests/integration/fixture/home_automation_agent/test_files/custom_metrics/simple_custom_metric.test.json b/tests/integration/fixture/home_automation_agent/test_files/custom_metrics/simple_custom_metric.test.json
new file mode 100644
index 0000000000..42a8d51470
--- /dev/null
+++ b/tests/integration/fixture/home_automation_agent/test_files/custom_metrics/simple_custom_metric.test.json
@@ -0,0 +1,65 @@
+{
+  "eval_set_id": "custom_metrics_eval_set",
+  "name": "custom_metrics_eval_set",
+  "description": "Custom metric evaluation sample.",
+  "eval_cases": [
+    {
+      "eval_id": "tests/integration/fixture/home_automation_agent/test_files/custom_metrics/simple_custom_metric.test.json",
+      "conversation": [
+        {
+          "invocation_id": "a9e4f840-7f1e-4b69-b9c1-3b85c03a60a4",
+          "user_content": {
+            "parts": [
+              {
+                "video_metadata": null,
+                "thought": null,
+                "code_execution_result": null,
+                "executable_code": null,
+                "file_data": null,
+                "function_call": null,
+                "function_response": null,
+                "inline_data": null,
+                "text": "Turn off device_2 in the Bedroom."
+              }
+            ],
+            "role": "user"
+          },
+          "final_response": {
+            "parts": [
+              {
+                "video_metadata": null,
+                "thought": null,
+                "code_execution_result": null,
+                "executable_code": null,
+                "file_data": null,
+                "function_call": null,
+                "function_response": null,
+                "inline_data": null,
+                "text": "I have set the device_2 status to off."
+              }
+            ],
+            "role": "model"
+          },
+          "intermediate_data": {
+            "tool_uses": [
+              {
+                "id": null,
+                "args": {
+                  "location": "Bedroom",
+                  "device_id": "device_2",
+                  "status": "OFF"
+                },
+                "name": "set_device_info"
+              }
+            ],
+            "intermediate_responses": []
+          },
+          "creation_timestamp": 1747337309.2360144
+        }
+      ],
+      "session_input": null,
+      "creation_timestamp": 1747337309.2360282
+    }
+  ],
+  "creation_timestamp": 1747337309.2360387
+}
diff --git a/tests/integration/fixture/home_automation_agent/test_files/custom_metrics/test_config.json b/tests/integration/fixture/home_automation_agent/test_files/custom_metrics/test_config.json
new file mode 100644
index 0000000000..405f7910f8
--- /dev/null
+++ b/tests/integration/fixture/home_automation_agent/test_files/custom_metrics/test_config.json
@@ -0,0 +1,13 @@
+{
+  "criteria": {
+    "tool_trajectory_length_match": 1.0
+  },
+  "custom_metrics": {
+    "tool_trajectory_length_match": {
+      "code_config": {
+        "name": "tests.integration.fixture.home_automation_agent.test_files.custom_metrics.metrics.tool_trajectory_length_match"
+      },
+      "description": "Checks that actual and expected tool trajectories have the same length."
+    }
+  }
+}
diff --git a/tests/integration/test_with_test_file.py b/tests/integration/test_with_test_file.py
index eed2a2d732..aa644d0b71 100644
--- a/tests/integration/test_with_test_file.py
+++ b/tests/integration/test_with_test_file.py
@@ -25,6 +25,18 @@ async def test_with_single_test_file():
   )
 
 
+@pytest.mark.asyncio
+async def test_with_custom_metric():
+  """Test eval with a custom metric."""
+  await AgentEvaluator.evaluate(
+      agent_module="tests.integration.fixture.home_automation_agent",
+      eval_dataset_file_path_or_dir=(
+          "tests/integration/fixture/home_automation_agent/test_files/custom_metrics/simple_custom_metric.test.json"
+      ),
+      num_runs=1,
+  )
+
+
 @pytest.mark.asyncio
 async def test_with_folder_of_test_files_long_running():
   """Test the agent's basic ability via a folder of session files."""

From 2afcec12d7d69da7a42d29983ca03faa9478cafd Mon Sep 17 00:00:00 2001
From: ftnext <takuyafjp+develop@gmail.com>
Date: Sun, 1 Feb 2026 12:24:25 +0900
Subject: [PATCH 4/4] fix(eval): Isolate metric evaluator registry per instance

---
 src/google/adk/evaluation/metric_evaluator_registry.py | 4 +++-
 tests/unittests/evaluation/test_agent_evaluator.py     | 8 --------
 2 files changed, 3 insertions(+), 9 deletions(-)

diff --git a/src/google/adk/evaluation/metric_evaluator_registry.py b/src/google/adk/evaluation/metric_evaluator_registry.py
index 775d5c2d7a..981e11926b 100644
--- a/src/google/adk/evaluation/metric_evaluator_registry.py
+++ b/src/google/adk/evaluation/metric_evaluator_registry.py
@@ -47,7 +47,9 @@
 class MetricEvaluatorRegistry:
   """A registry for metric Evaluators."""
 
-  _registry: dict[str, tuple[type[Evaluator], MetricInfo]] = {}
+  def __init__(self):
+    """Initializes an empty registry."""
+    self._registry: dict[str, tuple[type[Evaluator], MetricInfo]] = {}
 
   def get_evaluator(self, eval_metric: EvalMetric) -> Evaluator:
     """Returns an Evaluator for the given metric.
diff --git a/tests/unittests/evaluation/test_agent_evaluator.py b/tests/unittests/evaluation/test_agent_evaluator.py
index 7bfb99469d..fcd4e93b79 100644
--- a/tests/unittests/evaluation/test_agent_evaluator.py
+++ b/tests/unittests/evaluation/test_agent_evaluator.py
@@ -21,17 +21,9 @@
 from google.adk.evaluation.eval_metrics import BaseCriterion
 from google.adk.evaluation.eval_metrics import EvalMetric
 from google.adk.evaluation.eval_set import EvalSet
-from google.adk.evaluation.metric_evaluator_registry import MetricEvaluatorRegistry
 import pytest
 
 
-@pytest.fixture(autouse=True)
-def restore_metric_registry():
-  original_registry = MetricEvaluatorRegistry._registry.copy()
-  yield
-  MetricEvaluatorRegistry._registry = original_registry
-
-
 def fake_custom_metric(*_args, **_kwargs):
   return None