From a446492eac74734896e4374d52a4b62c20e7aa4b Mon Sep 17 00:00:00 2001 From: Aryan Bagade Date: Wed, 21 Jan 2026 18:49:01 -0800 Subject: [PATCH 1/3] fix: Force AUDIO modality for native-audio models in /run_live (#4206) Signed-off-by: Aryan Bagade --- src/google/adk/cli/adk_web_server.py | 18 +++- tests/unittests/cli/test_fast_api.py | 139 +++++++++++++++++++++++++++ 2 files changed, 156 insertions(+), 1 deletion(-) diff --git a/src/google/adk/cli/adk_web_server.py b/src/google/adk/cli/adk_web_server.py index 4404d62e4e..aef73eb8f4 100644 --- a/src/google/adk/cli/adk_web_server.py +++ b/src/google/adk/cli/adk_web_server.py @@ -1652,7 +1652,23 @@ async def run_agent_live( async def forward_events(): runner = await self.get_runner_async(app_name) - run_config = RunConfig(response_modalities=modalities) + + # Check if agent uses a native-audio model. + # Native audio models only support AUDIO modality, so we force it. + agent_or_app = self.agent_loader.load_agent(app_name) + root_agent = self._get_root_agent(agent_or_app) + model_name = ( + root_agent.model + if hasattr(root_agent, "model") + and isinstance(root_agent.model, str) + else "" + ) + if "native-audio" in model_name: + effective_modalities = ["AUDIO"] + else: + effective_modalities = modalities + + run_config = RunConfig(response_modalities=effective_modalities) async with Aclosing( runner.run_live( session=session, diff --git a/tests/unittests/cli/test_fast_api.py b/tests/unittests/cli/test_fast_api.py index 0c69605349..a173523be7 100755 --- a/tests/unittests/cli/test_fast_api.py +++ b/tests/unittests/cli/test_fast_api.py @@ -29,6 +29,7 @@ from fastapi.testclient import TestClient from google.adk.agents.base_agent import BaseAgent +from google.adk.agents.llm_agent import LlmAgent from google.adk.agents.run_config import RunConfig from google.adk.apps.app import App from google.adk.artifacts.base_artifact_service import ArtifactVersion @@ -1411,5 +1412,143 @@ def test_builder_save_rejects_traversal(builder_test_client, tmp_path): assert not (tmp_path / "app" / "tmp" / "escape.yaml").exists() +def test_native_audio_model_forces_audio_modality(): + """Test that native-audio models force AUDIO modality in run_live endpoint.""" + from google.adk.cli.adk_web_server import AdkWebServer + + # Create an LlmAgent with a native-audio model + native_audio_agent = LlmAgent( + name="native_audio_agent", + model="gemini-live-2.5-flash-native-audio", + ) + + class NativeAudioAgentLoader: + + def load_agent(self, app_name): + return native_audio_agent + + def list_agents(self): + return ["test_native_audio_app"] + + session_service = AsyncMock() + session = Session( + id="session_id", + app_name="test_native_audio_app", + user_id="user", + state={}, + events=[], + ) + session_service.get_session.return_value = session + + adk_web_server = AdkWebServer( + agent_loader=NativeAudioAgentLoader(), + session_service=session_service, + memory_service=MagicMock(), + artifact_service=MagicMock(), + credential_service=MagicMock(), + eval_sets_manager=MagicMock(), + eval_set_results_manager=MagicMock(), + agents_dir=".", + ) + + # Verify the _get_root_agent method works correctly + root_agent = adk_web_server._get_root_agent(native_audio_agent) + assert root_agent.model == "gemini-live-2.5-flash-native-audio" + assert "native-audio" in root_agent.model + + # Verify the model name detection logic + model_name = ( + root_agent.model + if hasattr(root_agent, "model") and isinstance(root_agent.model, str) + else "" + ) + assert "native-audio" in model_name + + # Test with App wrapping the agent + app_agent = App(name="test_app", root_agent=native_audio_agent) + root_agent_from_app = adk_web_server._get_root_agent(app_agent) + assert root_agent_from_app.model == "gemini-live-2.5-flash-native-audio" + + +def test_non_native_audio_model_keeps_requested_modality(): + """Test that non-native-audio models keep the requested modality.""" + from google.adk.cli.adk_web_server import AdkWebServer + + # Create an LlmAgent with a regular model (not native-audio) + regular_agent = LlmAgent( + name="regular_agent", + model="gemini-2.5-flash", + ) + + class RegularAgentLoader: + + def load_agent(self, app_name): + return regular_agent + + def list_agents(self): + return ["test_regular_app"] + + adk_web_server = AdkWebServer( + agent_loader=RegularAgentLoader(), + session_service=MagicMock(), + memory_service=MagicMock(), + artifact_service=MagicMock(), + credential_service=MagicMock(), + eval_sets_manager=MagicMock(), + eval_set_results_manager=MagicMock(), + agents_dir=".", + ) + + root_agent = adk_web_server._get_root_agent(regular_agent) + model_name = ( + root_agent.model + if hasattr(root_agent, "model") and isinstance(root_agent.model, str) + else "" + ) + + # For regular models, the modality should NOT be forced to AUDIO + assert "native-audio" not in model_name + + +def test_agent_without_model_attribute(): + """Test handling of agents without model attribute (BaseAgent).""" + from google.adk.cli.adk_web_server import AdkWebServer + + # Create a BaseAgent (which doesn't have a model attribute) + base_agent = DummyAgent(name="base_agent") + + class BaseAgentLoader: + + def load_agent(self, app_name): + return base_agent + + def list_agents(self): + return ["test_base_app"] + + adk_web_server = AdkWebServer( + agent_loader=BaseAgentLoader(), + session_service=MagicMock(), + memory_service=MagicMock(), + artifact_service=MagicMock(), + credential_service=MagicMock(), + eval_sets_manager=MagicMock(), + eval_set_results_manager=MagicMock(), + agents_dir=".", + ) + + root_agent = adk_web_server._get_root_agent(base_agent) + + # BaseAgent (DummyAgent) doesn't have a model attribute + model_name = ( + root_agent.model + if hasattr(root_agent, "model") and isinstance(root_agent.model, str) + else "" + ) + + # Should default to empty string when no model attribute + assert model_name == "" + assert "native-audio" not in model_name + + if __name__ == "__main__": pytest.main(["-xvs", __file__]) From 9fc0b220749ed0c40aaa52dfc1ef9c8766142ecd Mon Sep 17 00:00:00 2001 From: Aryan Bagade Date: Wed, 21 Jan 2026 20:54:53 -0800 Subject: [PATCH 2/3] fix: Refactor modality logic into helper method Signed-off-by: Aryan Bagade --- src/google/adk/cli/adk_web_server.py | 41 +++++++++------ tests/unittests/cli/test_fast_api.py | 76 ++++++++-------------------- 2 files changed, 47 insertions(+), 70 deletions(-) diff --git a/src/google/adk/cli/adk_web_server.py b/src/google/adk/cli/adk_web_server.py index aef73eb8f4..1f2f441622 100644 --- a/src/google/adk/cli/adk_web_server.py +++ b/src/google/adk/cli/adk_web_server.py @@ -550,6 +550,30 @@ def _get_root_agent(self, agent_or_app: BaseAgent | App) -> BaseAgent: return agent_or_app.root_agent return agent_or_app + def _get_effective_modalities( + self, app_name: str, requested_modalities: List[str] + ) -> List[str]: + """Determines effective modalities, forcing AUDIO for native-audio models. + + Native-audio models only support AUDIO modality. This method detects + native-audio models by checking if the model name contains "native-audio" + and forces AUDIO modality for those models. + + Args: + app_name: The name of the application/agent. + requested_modalities: The modalities requested by the client. + + Returns: + The effective modalities to use. + """ + agent_or_app = self.agent_loader.load_agent(app_name) + root_agent = self._get_root_agent(agent_or_app) + model = getattr(root_agent, "model", None) + model_name = model if isinstance(model, str) else "" + if "native-audio" in model_name: + return ["AUDIO"] + return requested_modalities + def _create_runner(self, agentic_app: App) -> Runner: """Create a runner with common services.""" return Runner( @@ -1652,22 +1676,9 @@ async def run_agent_live( async def forward_events(): runner = await self.get_runner_async(app_name) - - # Check if agent uses a native-audio model. - # Native audio models only support AUDIO modality, so we force it. - agent_or_app = self.agent_loader.load_agent(app_name) - root_agent = self._get_root_agent(agent_or_app) - model_name = ( - root_agent.model - if hasattr(root_agent, "model") - and isinstance(root_agent.model, str) - else "" + effective_modalities = self._get_effective_modalities( + app_name, modalities ) - if "native-audio" in model_name: - effective_modalities = ["AUDIO"] - else: - effective_modalities = modalities - run_config = RunConfig(response_modalities=effective_modalities) async with Aclosing( runner.run_live( diff --git a/tests/unittests/cli/test_fast_api.py b/tests/unittests/cli/test_fast_api.py index a173523be7..cd1b50475e 100755 --- a/tests/unittests/cli/test_fast_api.py +++ b/tests/unittests/cli/test_fast_api.py @@ -1413,10 +1413,9 @@ def test_builder_save_rejects_traversal(builder_test_client, tmp_path): def test_native_audio_model_forces_audio_modality(): - """Test that native-audio models force AUDIO modality in run_live endpoint.""" + """Test that native-audio models force AUDIO modality.""" from google.adk.cli.adk_web_server import AdkWebServer - # Create an LlmAgent with a native-audio model native_audio_agent = LlmAgent( name="native_audio_agent", model="gemini-live-2.5-flash-native-audio", @@ -1428,21 +1427,11 @@ def load_agent(self, app_name): return native_audio_agent def list_agents(self): - return ["test_native_audio_app"] - - session_service = AsyncMock() - session = Session( - id="session_id", - app_name="test_native_audio_app", - user_id="user", - state={}, - events=[], - ) - session_service.get_session.return_value = session + return ["test_app"] adk_web_server = AdkWebServer( agent_loader=NativeAudioAgentLoader(), - session_service=session_service, + session_service=MagicMock(), memory_service=MagicMock(), artifact_service=MagicMock(), credential_service=MagicMock(), @@ -1451,30 +1440,19 @@ def list_agents(self): agents_dir=".", ) - # Verify the _get_root_agent method works correctly - root_agent = adk_web_server._get_root_agent(native_audio_agent) - assert root_agent.model == "gemini-live-2.5-flash-native-audio" - assert "native-audio" in root_agent.model - - # Verify the model name detection logic - model_name = ( - root_agent.model - if hasattr(root_agent, "model") and isinstance(root_agent.model, str) - else "" - ) - assert "native-audio" in model_name + # Test: requesting TEXT should be forced to AUDIO + modalities = adk_web_server._get_effective_modalities("test_app", ["TEXT"]) + assert modalities == ["AUDIO"] - # Test with App wrapping the agent - app_agent = App(name="test_app", root_agent=native_audio_agent) - root_agent_from_app = adk_web_server._get_root_agent(app_agent) - assert root_agent_from_app.model == "gemini-live-2.5-flash-native-audio" + # Test: requesting AUDIO should stay AUDIO + modalities = adk_web_server._get_effective_modalities("test_app", ["AUDIO"]) + assert modalities == ["AUDIO"] def test_non_native_audio_model_keeps_requested_modality(): """Test that non-native-audio models keep the requested modality.""" from google.adk.cli.adk_web_server import AdkWebServer - # Create an LlmAgent with a regular model (not native-audio) regular_agent = LlmAgent( name="regular_agent", model="gemini-2.5-flash", @@ -1486,7 +1464,7 @@ def load_agent(self, app_name): return regular_agent def list_agents(self): - return ["test_regular_app"] + return ["test_app"] adk_web_server = AdkWebServer( agent_loader=RegularAgentLoader(), @@ -1499,22 +1477,19 @@ def list_agents(self): agents_dir=".", ) - root_agent = adk_web_server._get_root_agent(regular_agent) - model_name = ( - root_agent.model - if hasattr(root_agent, "model") and isinstance(root_agent.model, str) - else "" - ) + # Test: requesting TEXT should stay TEXT + modalities = adk_web_server._get_effective_modalities("test_app", ["TEXT"]) + assert modalities == ["TEXT"] - # For regular models, the modality should NOT be forced to AUDIO - assert "native-audio" not in model_name + # Test: requesting AUDIO should stay AUDIO + modalities = adk_web_server._get_effective_modalities("test_app", ["AUDIO"]) + assert modalities == ["AUDIO"] def test_agent_without_model_attribute(): - """Test handling of agents without model attribute (BaseAgent).""" + """Test that agents without model attribute keep requested modality.""" from google.adk.cli.adk_web_server import AdkWebServer - # Create a BaseAgent (which doesn't have a model attribute) base_agent = DummyAgent(name="base_agent") class BaseAgentLoader: @@ -1523,7 +1498,7 @@ def load_agent(self, app_name): return base_agent def list_agents(self): - return ["test_base_app"] + return ["test_app"] adk_web_server = AdkWebServer( agent_loader=BaseAgentLoader(), @@ -1536,18 +1511,9 @@ def list_agents(self): agents_dir=".", ) - root_agent = adk_web_server._get_root_agent(base_agent) - - # BaseAgent (DummyAgent) doesn't have a model attribute - model_name = ( - root_agent.model - if hasattr(root_agent, "model") and isinstance(root_agent.model, str) - else "" - ) - - # Should default to empty string when no model attribute - assert model_name == "" - assert "native-audio" not in model_name + # Test: BaseAgent without model attr should keep requested modality + modalities = adk_web_server._get_effective_modalities("test_app", ["TEXT"]) + assert modalities == ["TEXT"] if __name__ == "__main__": From 45ea2bc493434517229449388bf70360a15ecafe Mon Sep 17 00:00:00 2001 From: Aryan Bagade Date: Wed, 21 Jan 2026 21:01:29 -0800 Subject: [PATCH 3/3] fix: Pass root_agent directly to avoid redundant loading Signed-off-by: Aryan Bagade --- src/google/adk/cli/adk_web_server.py | 8 ++--- tests/unittests/cli/test_fast_api.py | 46 +++++++++------------------- 2 files changed, 17 insertions(+), 37 deletions(-) diff --git a/src/google/adk/cli/adk_web_server.py b/src/google/adk/cli/adk_web_server.py index 1f2f441622..b12ccabd35 100644 --- a/src/google/adk/cli/adk_web_server.py +++ b/src/google/adk/cli/adk_web_server.py @@ -551,7 +551,7 @@ def _get_root_agent(self, agent_or_app: BaseAgent | App) -> BaseAgent: return agent_or_app def _get_effective_modalities( - self, app_name: str, requested_modalities: List[str] + self, root_agent: BaseAgent, requested_modalities: List[str] ) -> List[str]: """Determines effective modalities, forcing AUDIO for native-audio models. @@ -560,14 +560,12 @@ def _get_effective_modalities( and forces AUDIO modality for those models. Args: - app_name: The name of the application/agent. + root_agent: The root agent of the application. requested_modalities: The modalities requested by the client. Returns: The effective modalities to use. """ - agent_or_app = self.agent_loader.load_agent(app_name) - root_agent = self._get_root_agent(agent_or_app) model = getattr(root_agent, "model", None) model_name = model if isinstance(model, str) else "" if "native-audio" in model_name: @@ -1677,7 +1675,7 @@ async def run_agent_live( async def forward_events(): runner = await self.get_runner_async(app_name) effective_modalities = self._get_effective_modalities( - app_name, modalities + runner.app.root_agent, modalities ) run_config = RunConfig(response_modalities=effective_modalities) async with Aclosing( diff --git a/tests/unittests/cli/test_fast_api.py b/tests/unittests/cli/test_fast_api.py index cd1b50475e..42b17f93ce 100755 --- a/tests/unittests/cli/test_fast_api.py +++ b/tests/unittests/cli/test_fast_api.py @@ -1421,16 +1421,8 @@ def test_native_audio_model_forces_audio_modality(): model="gemini-live-2.5-flash-native-audio", ) - class NativeAudioAgentLoader: - - def load_agent(self, app_name): - return native_audio_agent - - def list_agents(self): - return ["test_app"] - adk_web_server = AdkWebServer( - agent_loader=NativeAudioAgentLoader(), + agent_loader=MagicMock(), session_service=MagicMock(), memory_service=MagicMock(), artifact_service=MagicMock(), @@ -1441,11 +1433,15 @@ def list_agents(self): ) # Test: requesting TEXT should be forced to AUDIO - modalities = adk_web_server._get_effective_modalities("test_app", ["TEXT"]) + modalities = adk_web_server._get_effective_modalities( + native_audio_agent, ["TEXT"] + ) assert modalities == ["AUDIO"] # Test: requesting AUDIO should stay AUDIO - modalities = adk_web_server._get_effective_modalities("test_app", ["AUDIO"]) + modalities = adk_web_server._get_effective_modalities( + native_audio_agent, ["AUDIO"] + ) assert modalities == ["AUDIO"] @@ -1458,16 +1454,8 @@ def test_non_native_audio_model_keeps_requested_modality(): model="gemini-2.5-flash", ) - class RegularAgentLoader: - - def load_agent(self, app_name): - return regular_agent - - def list_agents(self): - return ["test_app"] - adk_web_server = AdkWebServer( - agent_loader=RegularAgentLoader(), + agent_loader=MagicMock(), session_service=MagicMock(), memory_service=MagicMock(), artifact_service=MagicMock(), @@ -1478,11 +1466,13 @@ def list_agents(self): ) # Test: requesting TEXT should stay TEXT - modalities = adk_web_server._get_effective_modalities("test_app", ["TEXT"]) + modalities = adk_web_server._get_effective_modalities(regular_agent, ["TEXT"]) assert modalities == ["TEXT"] # Test: requesting AUDIO should stay AUDIO - modalities = adk_web_server._get_effective_modalities("test_app", ["AUDIO"]) + modalities = adk_web_server._get_effective_modalities( + regular_agent, ["AUDIO"] + ) assert modalities == ["AUDIO"] @@ -1492,16 +1482,8 @@ def test_agent_without_model_attribute(): base_agent = DummyAgent(name="base_agent") - class BaseAgentLoader: - - def load_agent(self, app_name): - return base_agent - - def list_agents(self): - return ["test_app"] - adk_web_server = AdkWebServer( - agent_loader=BaseAgentLoader(), + agent_loader=MagicMock(), session_service=MagicMock(), memory_service=MagicMock(), artifact_service=MagicMock(), @@ -1512,7 +1494,7 @@ def list_agents(self): ) # Test: BaseAgent without model attr should keep requested modality - modalities = adk_web_server._get_effective_modalities("test_app", ["TEXT"]) + modalities = adk_web_server._get_effective_modalities(base_agent, ["TEXT"]) assert modalities == ["TEXT"]