Skip to content

Commit 7870480

Browse files
jawoszekcopybara-github
authored andcommitted
feat(otel): add --otel_to_cloud experimental support
Cloud Trace, Cloud Monitoring and Cloud Logging integrations are set up via OTel if otel_to_cloud CLI param/fast_api arg is provided. This is similar to current Cloud Trace integration via trace_to_cloud, just extended to Monitoring and Logging as well. PiperOrigin-RevId: 807230668
1 parent b9735b2 commit 7870480

File tree

15 files changed

+430
-30
lines changed

15 files changed

+430
-30
lines changed

pyproject.toml

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,9 +41,12 @@ dependencies = [
4141
"google-genai>=1.21.1, <2.0.0", # Google GenAI SDK
4242
"graphviz>=0.20.2, <1.0.0", # Graphviz for graph rendering
4343
"mcp>=1.8.0, <2.0.0;python_version>='3.10'", # For MCP Toolset
44-
"opentelemetry-api>=1.31.0, <2.0.0", # OpenTelemetry
44+
"opentelemetry-api>=1.31.0, <=1.37.0", # OpenTelemetry - limit upper version for sdk and api to not risk breaking changes from unstable _logs package.
45+
"opentelemetry-exporter-gcp-logging>=1.9.0a0, <2.0.0",
46+
"opentelemetry-exporter-gcp-monitoring>=1.9.0a0, <2.0.0",
4547
"opentelemetry-exporter-gcp-trace>=1.9.0, <2.0.0",
46-
"opentelemetry-sdk>=1.31.0, <2.0.0",
48+
"opentelemetry-resourcedetector-gcp>=1.9.0a0, <2.0.0",
49+
"opentelemetry-sdk>=1.31.0, <=1.37.0",
4750
"pydantic>=2.0, <3.0.0", # For data validation/models
4851
"python-dateutil>=2.9.0.post0, <3.0.0", # For Vertext AI Session Service
4952
"python-dotenv>=1.0.0, <2.0.0", # To manage environment variables
@@ -147,6 +150,13 @@ extensions = [
147150
"toolbox-core>=0.1.0", # For tools.toolbox_toolset.ToolboxToolset
148151
]
149152

153+
otel-gcp = [
154+
"opentelemetry-exporter-gcp-logging>=1.9.0a0, <2.0.0",
155+
"opentelemetry-exporter-gcp-monitoring>=1.9.0a0, <2.0.0",
156+
"opentelemetry-exporter-gcp-trace>=1.9.0, <2.0.0",
157+
"opentelemetry-resourcedetector-gcp>=1.9.0a0, <2.0.0",
158+
]
159+
150160

151161
[tool.pyink]
152162
# Format py files following Google style-guide

src/google/adk/cli/adk_web_server.py

Lines changed: 68 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141
from opentelemetry import trace
4242
from opentelemetry.sdk.trace import export as export_lib
4343
from opentelemetry.sdk.trace import ReadableSpan
44+
from opentelemetry.sdk.trace import SpanProcessor
4445
from opentelemetry.sdk.trace import TracerProvider
4546
from pydantic import Field
4647
from pydantic import ValidationError
@@ -257,6 +258,59 @@ class ListMetricsInfoResponse(common.BaseModel):
257258
metrics_info: list[MetricInfo]
258259

259260

261+
def _setup_telemetry(
262+
otel_to_cloud: bool = False,
263+
internal_exporters: Optional[list[SpanProcessor]] = None,
264+
):
265+
# TODO - remove the condition and else branch here once
266+
# maybe_set_otel_providers is no longer experimental.
267+
if otel_to_cloud:
268+
_setup_telemetry_experimental(
269+
otel_to_cloud=otel_to_cloud, internal_exporters=internal_exporters
270+
)
271+
else:
272+
# Old logic - to be removed when above leaves experimental.
273+
tracer_provider = TracerProvider()
274+
for exporter in internal_exporters:
275+
tracer_provider.add_span_processor(exporter)
276+
trace.set_tracer_provider(tracer_provider=tracer_provider)
277+
278+
279+
def _setup_telemetry_experimental(
280+
otel_to_cloud: bool = False,
281+
internal_exporters: list[SpanProcessor] = None,
282+
):
283+
from ..telemetry.setup import maybe_set_otel_providers
284+
285+
otel_hooks_to_add = []
286+
otel_resource = None
287+
288+
if internal_exporters:
289+
from ..telemetry.setup import OTelHooks
290+
291+
# Register ADK-specific exporters in trace provider.
292+
otel_hooks_to_add.append(OTelHooks(span_processors=internal_exporters))
293+
294+
if otel_to_cloud:
295+
from ..telemetry.google_cloud import get_gcp_exporters
296+
from ..telemetry.google_cloud import get_gcp_resource
297+
298+
otel_hooks_to_add.append(
299+
get_gcp_exporters(
300+
# TODO - use trace_to_cloud here as well once otel_to_cloud is no
301+
# longer experimental.
302+
enable_cloud_tracing=True,
303+
enable_cloud_metrics=True,
304+
enable_cloud_logging=True,
305+
)
306+
)
307+
otel_resource = get_gcp_resource()
308+
309+
maybe_set_otel_providers(
310+
otel_hooks_to_setup=otel_hooks_to_add, otel_resource=otel_resource
311+
)
312+
313+
260314
class AdkWebServer:
261315
"""Helper class for setting up and running the ADK web server on FastAPI.
262316
@@ -355,6 +409,7 @@ def get_fast_api_app(
355409
[Observer, "AdkWebServer"], None
356410
] = lambda o, s: None,
357411
register_processors: Callable[[TracerProvider], None] = lambda o: None,
412+
otel_to_cloud: bool = False,
358413
):
359414
"""Creates a FastAPI app for the ADK web server.
360415
@@ -371,6 +426,8 @@ def get_fast_api_app(
371426
tear_down_observer: Callback for cleaning up the file system observer.
372427
register_processors: Callback for additional Span processors to be added
373428
to the TracerProvider.
429+
otel_to_cloud: EXPERIMENTAL. Whether to enable Cloud Trace,
430+
Cloud Monitoring and Cloud Logging integrations.
374431
375432
Returns:
376433
A FastAPI app instance.
@@ -395,17 +452,20 @@ async def internal_lifespan(app: FastAPI):
395452
# Create tasks for all runner closures to run concurrently
396453
await cleanup.close_runners(list(self.runner_dict.values()))
397454

398-
# Set up tracing in the FastAPI server.
399-
provider = TracerProvider()
400-
provider.add_span_processor(
401-
export_lib.SimpleSpanProcessor(ApiServerSpanExporter(trace_dict))
402-
)
403455
memory_exporter = InMemoryExporter(session_trace_dict)
404-
provider.add_span_processor(export_lib.SimpleSpanProcessor(memory_exporter))
405456

406-
register_processors(provider)
457+
_setup_telemetry(
458+
otel_to_cloud=otel_to_cloud,
459+
internal_exporters=[
460+
export_lib.SimpleSpanProcessor(ApiServerSpanExporter(trace_dict)),
461+
export_lib.SimpleSpanProcessor(memory_exporter),
462+
],
463+
)
407464

408-
trace.set_tracer_provider(provider)
465+
# TODO - register_processors to be removed once --otel_to_cloud is no
466+
# longer experimental.
467+
tracer_provider = trace.get_tracer_provider()
468+
register_processors(tracer_provider)
409469

410470
# Run the FastAPI server.
411471
app = FastAPI(lifespan=internal_lifespan)

src/google/adk/cli/cli_tools_click.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -655,6 +655,17 @@ def decorator(func):
655655
default=False,
656656
help="Optional. Whether to enable cloud trace for telemetry.",
657657
)
658+
@click.option(
659+
"--otel_to_cloud",
660+
is_flag=True,
661+
show_default=True,
662+
default=False,
663+
help=(
664+
"EXPERIMENTAL Optional. Whether to write OTel data to Google Cloud"
665+
" Observability services - Cloud Trace, Cloud Monitoring and Cloud"
666+
" Logging."
667+
),
668+
)
658669
@click.option(
659670
"--reload/--no-reload",
660671
default=True,
@@ -723,6 +734,7 @@ def cli_web(
723734
host: str = "127.0.0.1",
724735
port: int = 8000,
725736
trace_to_cloud: bool = False,
737+
otel_to_cloud: bool = False,
726738
reload: bool = True,
727739
session_service_uri: Optional[str] = None,
728740
artifact_service_uri: Optional[str] = None,
@@ -776,6 +788,7 @@ async def _lifespan(app: FastAPI):
776788
allow_origins=allow_origins,
777789
web=True,
778790
trace_to_cloud=trace_to_cloud,
791+
otel_to_cloud=otel_to_cloud,
779792
lifespan=_lifespan,
780793
a2a=a2a,
781794
host=host,
@@ -814,6 +827,7 @@ def cli_api_server(
814827
host: str = "127.0.0.1",
815828
port: int = 8000,
816829
trace_to_cloud: bool = False,
830+
otel_to_cloud: bool = False,
817831
reload: bool = True,
818832
session_service_uri: Optional[str] = None,
819833
artifact_service_uri: Optional[str] = None,
@@ -846,6 +860,7 @@ def cli_api_server(
846860
allow_origins=allow_origins,
847861
web=False,
848862
trace_to_cloud=trace_to_cloud,
863+
otel_to_cloud=otel_to_cloud,
849864
a2a=a2a,
850865
host=host,
851866
port=port,

src/google/adk/cli/fast_api.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ def get_fast_api_app(
6767
host: str = "127.0.0.1",
6868
port: int = 8000,
6969
trace_to_cloud: bool = False,
70+
otel_to_cloud: bool = False,
7071
reload_agents: bool = False,
7172
lifespan: Optional[Lifespan[FastAPI]] = None,
7273
) -> FastAPI:
@@ -191,7 +192,9 @@ def _parse_agent_engine_resource_name(agent_engine_id_or_resource_name):
191192
# Callbacks & other optional args for when constructing the FastAPI instance
192193
extra_fast_api_args = {}
193194

194-
if trace_to_cloud:
195+
# TODO - Remove separate trace_to_cloud logic once otel_to_cloud stops being
196+
# EXPERIMENTAL.
197+
if trace_to_cloud and not otel_to_cloud:
195198
from opentelemetry.exporter.cloud_trace import CloudTraceSpanExporter
196199

197200
def register_processors(provider: TracerProvider) -> None:
@@ -241,6 +244,7 @@ def tear_down_observer(observer: Observer, _: AdkWebServer):
241244
app = adk_web_server.get_fast_api_app(
242245
lifespan=lifespan,
243246
allow_origins=allow_origins,
247+
otel_to_cloud=otel_to_cloud,
244248
**extra_fast_api_args,
245249
)
246250

src/google/adk/flows/llm_flows/base_llm_flow.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -41,9 +41,9 @@
4141
from ...models.base_llm_connection import BaseLlmConnection
4242
from ...models.llm_request import LlmRequest
4343
from ...models.llm_response import LlmResponse
44-
from ...telemetry import trace_call_llm
45-
from ...telemetry import trace_send_data
46-
from ...telemetry import tracer
44+
from ...telemetry.tracing import trace_call_llm
45+
from ...telemetry.tracing import trace_send_data
46+
from ...telemetry.tracing import tracer
4747
from ...tools.base_toolset import BaseToolset
4848
from ...tools.tool_context import ToolContext
4949
from ...utils.context_utils import Aclosing

src/google/adk/flows/llm_flows/functions.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -35,9 +35,9 @@
3535
from ...auth.auth_tool import AuthToolArguments
3636
from ...events.event import Event
3737
from ...events.event_actions import EventActions
38-
from ...telemetry import trace_merged_tool_calls
39-
from ...telemetry import trace_tool_call
40-
from ...telemetry import tracer
38+
from ...telemetry.tracing import trace_merged_tool_calls
39+
from ...telemetry.tracing import trace_tool_call
40+
from ...telemetry.tracing import tracer
4141
from ...tools.base_tool import BaseTool
4242
from ...tools.tool_confirmation import ToolConfirmation
4343
from ...tools.tool_context import ToolContext

src/google/adk/runners.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@
5151
from .sessions.base_session_service import BaseSessionService
5252
from .sessions.in_memory_session_service import InMemorySessionService
5353
from .sessions.session import Session
54-
from .telemetry import tracer
54+
from .telemetry.tracing import tracer
5555
from .tools.base_toolset import BaseToolset
5656
from .utils.context_utils import Aclosing
5757

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
from .tracing import trace_call_llm
16+
from .tracing import trace_merged_tool_calls
17+
from .tracing import trace_send_data
18+
from .tracing import trace_tool_call
19+
from .tracing import tracer
20+
21+
__all__ = [
22+
'trace_call_llm',
23+
'trace_merged_tool_calls',
24+
'trace_send_data',
25+
'trace_tool_call',
26+
'tracer',
27+
]
Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
from __future__ import annotations
16+
17+
import logging
18+
19+
import google.auth
20+
from opentelemetry.resourcedetector.gcp_resource_detector import GoogleCloudResourceDetector
21+
from opentelemetry.sdk._logs import LogRecordProcessor
22+
from opentelemetry.sdk._logs.export import BatchLogRecordProcessor
23+
from opentelemetry.sdk.metrics.export import MetricReader
24+
from opentelemetry.sdk.metrics.export import PeriodicExportingMetricReader
25+
from opentelemetry.sdk.resources import OTELResourceDetector
26+
from opentelemetry.sdk.resources import Resource
27+
from opentelemetry.sdk.trace import SpanProcessor
28+
from opentelemetry.sdk.trace.export import BatchSpanProcessor
29+
30+
from ..utils.feature_decorator import experimental
31+
from .setup import OTelHooks
32+
33+
logger = logging.getLogger('google_adk.' + __name__)
34+
35+
36+
@experimental
37+
def get_gcp_exporters(
38+
enable_cloud_tracing: bool = False,
39+
enable_cloud_metrics: bool = False,
40+
enable_cloud_logging: bool = False,
41+
) -> OTelHooks:
42+
"""Returns GCP OTel exporters to be used in the app.
43+
44+
Args:
45+
enable_tracing: whether to enable tracing to Cloud Trace.
46+
enable_metrics: whether to enable raporting metrics to Cloud Monitoring.
47+
enable_logging: whether to enable sending logs to Cloud Logging.
48+
"""
49+
_, project_id = google.auth.default()
50+
if not project_id:
51+
logger.warning(
52+
'Cannot determine GCP Project. OTel GCP Exporters cannot be set up.'
53+
' Please make sure to log into correct GCP Project.'
54+
)
55+
return OTelHooks()
56+
57+
span_processors = []
58+
if enable_cloud_tracing:
59+
exporter = _get_gcp_span_exporter(project_id)
60+
span_processors.append(exporter)
61+
62+
metric_readers = []
63+
if enable_cloud_metrics:
64+
exporter = _get_gcp_metrics_exporter(project_id)
65+
if exporter:
66+
metric_readers.append(exporter)
67+
68+
log_record_processors = []
69+
if enable_cloud_logging:
70+
exporter = _get_gcp_logs_exporter(project_id)
71+
if exporter:
72+
log_record_processors.append(exporter)
73+
74+
return OTelHooks(
75+
span_processors=span_processors,
76+
metric_readers=metric_readers,
77+
log_record_processors=log_record_processors,
78+
)
79+
80+
81+
def _get_gcp_span_exporter(project_id: str) -> SpanProcessor:
82+
from opentelemetry.exporter.cloud_trace import CloudTraceSpanExporter
83+
84+
return BatchSpanProcessor(CloudTraceSpanExporter(project_id=project_id))
85+
86+
87+
def _get_gcp_metrics_exporter(project_id: str) -> MetricReader:
88+
from opentelemetry.exporter.cloud_monitoring import CloudMonitoringMetricsExporter
89+
90+
return PeriodicExportingMetricReader(
91+
CloudMonitoringMetricsExporter(project_id=project_id),
92+
export_interval_millis=5000,
93+
)
94+
95+
96+
def _get_gcp_logs_exporter(project_id: str) -> LogRecordProcessor:
97+
from opentelemetry.exporter.cloud_logging import CloudLoggingExporter
98+
99+
return BatchLogRecordProcessor(
100+
# TODO(jawoszek) - add default_log_name once design is approved.
101+
CloudLoggingExporter(project_id=project_id)
102+
)
103+
104+
105+
def get_gcp_resource() -> Resource:
106+
# The OTELResourceDetector populates resource labels from
107+
# environment variables like OTEL_SERVICE_NAME and OTEL_RESOURCE_ATTRIBUTES.
108+
# Then the GCP detector adds attributes corresponding to a correct
109+
# monitored resource if ADK runs on one of supported platforms
110+
# (e.g. GCE, GKE, CloudRun).
111+
return (
112+
OTELResourceDetector()
113+
.detect()
114+
.merge(GoogleCloudResourceDetector(raise_on_error=False).detect())
115+
)

0 commit comments

Comments
 (0)