|
5 | 5 | # TelemetryFlow Python SDK - Community Enterprise Observability Platform (CEOP) |
6 | 6 | # Copyright (c) 2024-2026 DevOpsCorner Indonesia. All rights reserved. |
7 | 7 | # |
8 | | -# This configuration is for local development and testing. |
9 | | -# For production, use the TelemetryFlow managed collector. |
| 8 | +# This config uses STANDARD OpenTelemetry Collector format. |
| 9 | +# Use with OTEL Collector Contrib: otelcol-contrib --config otel-collector.yaml |
10 | 10 | # |
11 | 11 | # ============================================================================= |
| 12 | +# OTLP HTTP Endpoints (OCB/OTEL Community - v1 ONLY) |
| 13 | +# ============================================================================= |
| 14 | +# Standard OpenTelemetry endpoints: |
| 15 | +# POST http://localhost:4318/v1/traces |
| 16 | +# POST http://localhost:4318/v1/metrics |
| 17 | +# POST http://localhost:4318/v1/logs |
| 18 | +# |
| 19 | +# gRPC: |
| 20 | +# localhost:4317 |
| 21 | +# |
| 22 | +# NOTE: v2 endpoints are NOT supported in OCB builds. |
| 23 | +# Use TFO Standalone build for v2 endpoint support. |
| 24 | +# |
| 25 | +# ============================================================================= |
| 26 | +# Features demonstrated: |
| 27 | +# - Metrics, Logs, Traces collection |
| 28 | +# - Exemplars support via spanmetrics connector |
| 29 | +# - Service graph generation |
| 30 | +# ============================================================================= |
12 | 31 |
|
| 32 | +# ============================================================================= |
| 33 | +# RECEIVERS - How telemetry data enters the collector |
| 34 | +# ============================================================================= |
13 | 35 | receivers: |
| 36 | + # OTLP receiver for OpenTelemetry Protocol (metrics, logs, traces) |
14 | 37 | otlp: |
15 | 38 | protocols: |
16 | 39 | grpc: |
17 | | - endpoint: 0.0.0.0:4317 |
| 40 | + endpoint: "0.0.0.0:4317" |
| 41 | + max_recv_msg_size_mib: 4 |
| 42 | + max_concurrent_streams: 100 |
| 43 | + read_buffer_size: 524288 |
| 44 | + write_buffer_size: 524288 |
| 45 | + keepalive: |
| 46 | + server_parameters: |
| 47 | + max_connection_idle: 15s |
| 48 | + max_connection_age: 30s |
| 49 | + max_connection_age_grace: 5s |
| 50 | + time: 10s |
| 51 | + timeout: 5s |
18 | 52 | http: |
19 | | - endpoint: 0.0.0.0:4318 |
| 53 | + endpoint: "0.0.0.0:4318" |
| 54 | + cors: |
| 55 | + allowed_origins: |
| 56 | + - "*" |
| 57 | + allowed_headers: |
| 58 | + - "*" |
| 59 | + max_age: 7200 |
20 | 60 |
|
| 61 | +# ============================================================================= |
| 62 | +# PROCESSORS - How telemetry data is processed |
| 63 | +# ============================================================================= |
21 | 64 | processors: |
| 65 | + # Batch processor for efficient data handling |
22 | 66 | batch: |
23 | | - timeout: 10s |
24 | | - send_batch_size: 1024 |
25 | | - send_batch_max_size: 2048 |
| 67 | + timeout: 200ms |
| 68 | + send_batch_size: 8192 |
| 69 | + send_batch_max_size: 0 |
26 | 70 |
|
| 71 | + # Memory limiter to prevent OOM |
27 | 72 | memory_limiter: |
28 | | - check_interval: 5s |
29 | | - limit_mib: 512 |
30 | | - spike_limit_mib: 128 |
| 73 | + check_interval: 1s |
| 74 | + limit_percentage: 80 |
| 75 | + spike_limit_percentage: 25 |
31 | 76 |
|
| 77 | + # Resource processor for adding attributes |
32 | 78 | resource: |
33 | 79 | attributes: |
34 | | - - key: telemetryflow.sdk.language |
35 | | - value: python |
| 80 | + - key: service.namespace |
| 81 | + value: telemetryflow |
36 | 82 | action: upsert |
37 | | - - key: telemetryflow.environment |
| 83 | + - key: deployment.environment |
38 | 84 | value: development |
39 | 85 | action: upsert |
| 86 | + - key: telemetryflow.sdk.language |
| 87 | + value: python |
| 88 | + action: upsert |
40 | 89 |
|
| 90 | +# ============================================================================= |
| 91 | +# CONNECTORS - Pipeline bridging for Exemplars and derived metrics |
| 92 | +# ============================================================================= |
| 93 | +connectors: |
| 94 | + # Span metrics connector - derives metrics from traces with EXEMPLARS support |
| 95 | + # This enables metrics with trace exemplars for drill-down from metrics to traces |
| 96 | + spanmetrics: |
| 97 | + histogram: |
| 98 | + explicit: |
| 99 | + buckets: [1ms, 5ms, 10ms, 25ms, 50ms, 100ms, 250ms, 500ms, 1s, 2.5s, 5s, 10s] |
| 100 | + dimensions: |
| 101 | + - name: http.method |
| 102 | + default: GET |
| 103 | + - name: http.status_code |
| 104 | + - name: http.route |
| 105 | + - name: rpc.method |
| 106 | + - name: rpc.service |
| 107 | + exemplars: |
| 108 | + enabled: true # Enable exemplars for metrics-to-traces correlation |
| 109 | + namespace: traces |
| 110 | + metrics_flush_interval: 15s |
| 111 | + |
| 112 | + # Service graph connector - builds service dependency graphs from traces |
| 113 | + servicegraph: |
| 114 | + latency_histogram_buckets: [1ms, 5ms, 10ms, 25ms, 50ms, 100ms, 250ms, 500ms, 1s, 2.5s, 5s, 10s] |
| 115 | + dimensions: |
| 116 | + - http.method |
| 117 | + - http.status_code |
| 118 | + store: |
| 119 | + ttl: 2s |
| 120 | + max_items: 1000 |
| 121 | + cache_loop: 1s |
| 122 | + store_expiration_loop: 2s |
| 123 | + virtual_node_peer_attributes: |
| 124 | + - db.system |
| 125 | + - messaging.system |
| 126 | + - rpc.service |
| 127 | + |
| 128 | +# ============================================================================= |
| 129 | +# EXPORTERS - Where telemetry data is sent |
| 130 | +# ============================================================================= |
41 | 131 | exporters: |
42 | | - # Debug exporter for development |
| 132 | + # Debug exporter for development/troubleshooting |
43 | 133 | debug: |
44 | 134 | verbosity: detailed |
45 | 135 | sampling_initial: 5 |
46 | 136 | sampling_thereafter: 200 |
47 | 137 |
|
48 | | - # OTLP exporter to Jaeger |
49 | | - otlp/jaeger: |
50 | | - endpoint: jaeger:4317 |
51 | | - tls: |
52 | | - insecure: true |
| 138 | + # OTLP exporter to Jaeger (uncomment to enable) |
| 139 | + # otlp/jaeger: |
| 140 | + # endpoint: jaeger:4317 |
| 141 | + # tls: |
| 142 | + # insecure: true |
53 | 143 |
|
54 | | - # Prometheus exporter for metrics |
| 144 | + # Prometheus exporter for metrics scraping (with exemplars support) |
55 | 145 | prometheus: |
56 | | - endpoint: 0.0.0.0:8889 |
| 146 | + endpoint: "0.0.0.0:8889" |
57 | 147 | namespace: telemetryflow |
58 | 148 | const_labels: |
| 149 | + collector: tfo-collector |
59 | 150 | sdk: python |
| 151 | + send_timestamps: true |
| 152 | + metric_expiration: 5m |
| 153 | + enable_open_metrics: true # Required for exemplars |
| 154 | + resource_to_telemetry_conversion: |
| 155 | + enabled: true |
60 | 156 |
|
61 | | - # Logging exporter |
62 | | - logging: |
63 | | - verbosity: normal |
64 | | - sampling_initial: 2 |
65 | | - sampling_thereafter: 500 |
66 | | - |
| 157 | +# ============================================================================= |
| 158 | +# EXTENSIONS - Additional collector capabilities |
| 159 | +# ============================================================================= |
67 | 160 | extensions: |
| 161 | + # Health check extension |
68 | 162 | health_check: |
69 | | - endpoint: 0.0.0.0:13133 |
70 | | - |
71 | | - pprof: |
72 | | - endpoint: 0.0.0.0:1777 |
| 163 | + endpoint: "0.0.0.0:13133" |
73 | 164 |
|
| 165 | + # zPages extension for debugging |
74 | 166 | zpages: |
75 | | - endpoint: 0.0.0.0:55679 |
| 167 | + endpoint: "0.0.0.0:55679" |
| 168 | + |
| 169 | + # pprof extension for profiling |
| 170 | + pprof: |
| 171 | + endpoint: "0.0.0.0:1777" |
76 | 172 |
|
| 173 | +# ============================================================================= |
| 174 | +# SERVICE - Defines active components and pipelines |
| 175 | +# ============================================================================= |
77 | 176 | service: |
78 | | - extensions: [health_check, pprof, zpages] |
| 177 | + extensions: [health_check, zpages, pprof] |
79 | 178 |
|
80 | 179 | pipelines: |
| 180 | + # ========================================================================== |
| 181 | + # Traces pipeline - receives traces, exports to debug and spanmetrics connector |
| 182 | + # ========================================================================== |
81 | 183 | traces: |
82 | 184 | receivers: [otlp] |
83 | 185 | processors: [memory_limiter, batch, resource] |
84 | | - exporters: [debug, logging] |
| 186 | + exporters: [debug, spanmetrics, servicegraph] # Export to connectors for derived metrics |
85 | 187 |
|
| 188 | + # ========================================================================== |
| 189 | + # Metrics pipeline - receives metrics from OTLP |
| 190 | + # ========================================================================== |
86 | 191 | metrics: |
87 | 192 | receivers: [otlp] |
88 | 193 | processors: [memory_limiter, batch, resource] |
89 | | - exporters: [debug, prometheus, logging] |
90 | | - |
| 194 | + exporters: [debug, prometheus] |
| 195 | + |
| 196 | + # ========================================================================== |
| 197 | + # Metrics from traces pipeline - receives derived metrics from spanmetrics connector |
| 198 | + # These metrics include EXEMPLARS for correlation with traces |
| 199 | + # ========================================================================== |
| 200 | + metrics/spanmetrics: |
| 201 | + receivers: [spanmetrics] |
| 202 | + processors: [memory_limiter, batch] |
| 203 | + exporters: [prometheus] |
| 204 | + |
| 205 | + # ========================================================================== |
| 206 | + # Metrics from service graph - receives service dependency metrics |
| 207 | + # ========================================================================== |
| 208 | + metrics/servicegraph: |
| 209 | + receivers: [servicegraph] |
| 210 | + processors: [memory_limiter, batch] |
| 211 | + exporters: [prometheus] |
| 212 | + |
| 213 | + # ========================================================================== |
| 214 | + # Logs pipeline |
| 215 | + # ========================================================================== |
91 | 216 | logs: |
92 | 217 | receivers: [otlp] |
93 | 218 | processors: [memory_limiter, batch, resource] |
94 | | - exporters: [debug, logging] |
| 219 | + exporters: [debug] |
95 | 220 |
|
| 221 | + # Internal telemetry configuration |
96 | 222 | telemetry: |
97 | 223 | logs: |
98 | 224 | level: info |
| 225 | + encoding: json |
| 226 | + |
99 | 227 | metrics: |
100 | | - address: 0.0.0.0:8888 |
| 228 | + level: detailed |
| 229 | + readers: |
| 230 | + - pull: |
| 231 | + exporter: |
| 232 | + prometheus: |
| 233 | + host: "0.0.0.0" |
| 234 | + port: 8888 |
0 commit comments