Skip to content

Commit ad382c0

Browse files
committed
Simplified metrics_v1 value columns into a single column, improved how we are filtering out system metrics in dev, removed a few of the event loop lag metrics
1 parent cd7f869 commit ad382c0

File tree

12 files changed

+52
-98
lines changed

12 files changed

+52
-98
lines changed

apps/webapp/app/components/query/QueryEditor.tsx

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1175,9 +1175,9 @@ function QueryResultsCallouts({
11751175
<div className="flex flex-col gap-2 px-2 pt-2">
11761176
{hiddenColumns && hiddenColumns.length > 0 && (
11771177
<Callout variant="warning" className="shrink-0 text-sm">
1178-
<code>SELECT *</code> doesn't return all columns because it's slow. The following columns
1179-
are not shown: <span className="font-mono text-xs">{hiddenColumns.join(", ")}</span>.
1180-
Specify them explicitly to include them.
1178+
<code>SELECT *</code> returns core columns only. To include{" "}
1179+
<span className="font-mono text-xs">{hiddenColumns.join(", ")}</span>, add them to your
1180+
SELECT explicitly.
11811181
</Callout>
11821182
)}
11831183
{periodClipped && (

apps/webapp/app/v3/otlpExporter.server.ts

Lines changed: 3 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -505,11 +505,7 @@ function convertMetricsToClickhouseRows(
505505
metric_type: "gauge",
506506
metric_subject: resolved.machineId ?? "unknown",
507507
bucket_start: floorToTenSecondBucket(dp.timeUnixNano),
508-
count: 0,
509-
sum_value: 0,
510-
max_value: value,
511-
min_value: value,
512-
last_value: value,
508+
value,
513509
attributes: resolved.attributes,
514510
});
515511
}
@@ -530,11 +526,7 @@ function convertMetricsToClickhouseRows(
530526
metric_type: "sum",
531527
metric_subject: resolved.machineId ?? "unknown",
532528
bucket_start: floorToTenSecondBucket(dp.timeUnixNano),
533-
count: 1,
534-
sum_value: value,
535-
max_value: value,
536-
min_value: value,
537-
last_value: value,
529+
value,
538530
attributes: resolved.attributes,
539531
});
540532
}
@@ -546,8 +538,6 @@ function convertMetricsToClickhouseRows(
546538
const resolved = resolveDataPointContext(dp.attributes ?? [], resourceCtx);
547539
const count = Number(dp.count);
548540
const sum = dp.sum ?? 0;
549-
const max = dp.max ?? 0;
550-
const min = dp.min ?? 0;
551541

552542
rows.push({
553543
organization_id: organizationId,
@@ -557,11 +547,7 @@ function convertMetricsToClickhouseRows(
557547
metric_type: "histogram",
558548
metric_subject: resolved.machineId ?? "unknown",
559549
bucket_start: floorToTenSecondBucket(dp.timeUnixNano),
560-
count,
561-
sum_value: sum,
562-
max_value: max,
563-
min_value: min,
564-
last_value: count > 0 ? sum / count : 0,
550+
value: count > 0 ? sum / count : 0,
565551
attributes: resolved.attributes,
566552
});
567553
}

apps/webapp/app/v3/querySchemas.ts

Lines changed: 11 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -498,44 +498,21 @@ export const metricsSchema: TableSchema = {
498498
coreColumn: true,
499499
}),
500500
},
501-
count: {
502-
name: "count",
503-
...column("UInt64", {
504-
description: "Number of data points in this bucket",
505-
example: "6",
506-
coreColumn: true,
507-
}),
508-
},
509-
sum_value: {
510-
name: "sum_value",
511-
...column("Float64", {
512-
description: "Sum of values in this bucket",
513-
example: "0.45",
514-
coreColumn: true,
515-
}),
516-
},
517-
max_value: {
518-
name: "max_value",
501+
value: {
502+
name: "value",
519503
...column("Float64", {
520-
description: "Maximum value in this bucket",
521-
example: "0.85",
504+
description: "The metric value",
505+
example: "0.75",
522506
coreColumn: true,
523507
}),
524508
},
525-
min_value: {
526-
name: "min_value",
527-
...column("Float64", {
528-
description: "Minimum value in this bucket",
529-
example: "0.12",
530-
coreColumn: true,
531-
}),
532-
},
533-
last_value: {
534-
name: "last_value",
535-
...column("Float64", {
536-
description: "Last recorded value in this bucket",
537-
example: "0.42",
538-
coreColumn: true,
509+
510+
// Attributes (JSON column for user-defined and system attributes)
511+
attributes: {
512+
name: "attributes",
513+
...column("JSON", {
514+
description: "JSON attributes attached to the metric data point.",
515+
example: '{"region": "us-east-1"}',
539516
}),
540517
},
541518

apps/webapp/app/v3/services/aiQueryService.server.ts

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -453,8 +453,8 @@ Only use explicit \`toStartOfHour\`/\`toStartOfDay\` etc. if the user specifical
453453
- Filter by metric name: WHERE metric_name = 'process.cpu.utilization'
454454
- Filter by run: WHERE run_id = 'run_abc123'
455455
- Filter by task: WHERE task_identifier = 'my-task'
456-
- Available metric names: process.cpu.utilization, process.cpu.time, process.memory.usage, system.memory.usage, system.memory.utilization, system.network.io, system.network.dropped, system.network.errors, nodejs.event_loop.utilization, nodejs.event_loop.delay.p50, nodejs.event_loop.delay.p99, nodejs.event_loop.delay.max, nodejs.heap.used, nodejs.heap.total
457-
- Use max_value or last_value for gauges (CPU utilization, memory usage), sum_value for counters (CPU time, network IO)
456+
- Available metric names: process.cpu.utilization, process.cpu.time, process.memory.usage, system.memory.usage, system.memory.utilization, system.network.io, system.network.dropped, system.network.errors, nodejs.event_loop.utilization, nodejs.event_loop.delay.p95, nodejs.event_loop.delay.max, nodejs.heap.used, nodejs.heap.total
457+
- Use \`value\` — the metric's observed value
458458
- Use prettyFormat(expr, 'bytes') to tell the UI to format values as bytes (e.g., "1.50 GiB") — keeps values numeric for charts
459459
- Use prettyFormat(expr, 'percent') for percentage values
460460
- prettyFormat does NOT change the SQL — it only adds a display hint
@@ -464,7 +464,7 @@ Only use explicit \`toStartOfHour\`/\`toStartOfDay\` etc. if the user specifical
464464
465465
\`\`\`sql
466466
-- CPU utilization over time for a task
467-
SELECT timeBucket(), task_identifier, prettyFormat(avg(max_value), 'percent') AS avg_cpu
467+
SELECT timeBucket(), task_identifier, prettyFormat(avg(value), 'percent') AS avg_cpu
468468
FROM metrics
469469
WHERE metric_name = 'process.cpu.utilization'
470470
GROUP BY timeBucket, task_identifier
@@ -474,7 +474,7 @@ LIMIT 1000
474474
475475
\`\`\`sql
476476
-- Peak memory usage per run
477-
SELECT run_id, task_identifier, prettyFormat(max(max_value), 'bytes') AS peak_memory
477+
SELECT run_id, task_identifier, prettyFormat(max(value), 'bytes') AS peak_memory
478478
FROM metrics
479479
WHERE metric_name = 'process.memory.usage'
480480
GROUP BY run_id, task_identifier
@@ -589,7 +589,7 @@ LIMIT 1000
589589
### Common Metrics Patterns
590590
- Filter by metric: WHERE metric_name = 'process.cpu.utilization'
591591
- Available metric names: process.cpu.utilization, process.cpu.time, process.memory.usage, system.memory.usage, system.memory.utilization, system.network.io, system.network.dropped, system.network.errors, nodejs.event_loop.utilization, nodejs.event_loop.delay.p50, nodejs.event_loop.delay.p99, nodejs.event_loop.delay.max, nodejs.heap.used, nodejs.heap.total
592-
- Use max_value or last_value for gauges (CPU utilization, memory usage), sum_value for counters (CPU time, network IO)
592+
- Use \`value\` — the metric's observed value
593593
- Use prettyFormat(expr, 'bytes') for memory metrics (including nodejs.heap.*), prettyFormat(expr, 'percent') for CPU utilization
594594
- prettyFormat does NOT change the SQL — it only adds a display hint for the UI
595595

apps/webapp/package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@
7070
"@opentelemetry/exporter-logs-otlp-http": "0.203.0",
7171
"@opentelemetry/exporter-metrics-otlp-proto": "0.203.0",
7272
"@opentelemetry/exporter-trace-otlp-http": "0.203.0",
73-
"@opentelemetry/host-metrics": "^0.36.0",
73+
"@opentelemetry/host-metrics": "^0.37.0",
7474
"@opentelemetry/instrumentation": "0.203.0",
7575
"@opentelemetry/instrumentation-aws-sdk": "^0.57.0",
7676
"@opentelemetry/instrumentation-express": "^0.52.0",

internal-packages/clickhouse/schema/017_create_metrics_v1.sql

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,7 @@ CREATE TABLE IF NOT EXISTS trigger_dev.metrics_v1
88
metric_type LowCardinality(String),
99
metric_subject String CODEC(ZSTD(1)),
1010
bucket_start DateTime CODEC(Delta(4), ZSTD(1)),
11-
count UInt64 DEFAULT 0 CODEC(ZSTD(1)),
12-
sum_value Float64 DEFAULT 0 CODEC(ZSTD(1)),
13-
max_value Float64 DEFAULT 0 CODEC(ZSTD(1)),
14-
min_value Float64 DEFAULT 0 CODEC(ZSTD(1)),
15-
last_value Float64 DEFAULT 0 CODEC(ZSTD(1)),
11+
value Float64 DEFAULT 0 CODEC(ZSTD(1)),
1612
attributes JSON(
1713
`trigger.run_id` String,
1814
`trigger.task_slug` String,

internal-packages/clickhouse/src/metrics.ts

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,7 @@ export const MetricsV1Input = z.object({
99
metric_type: z.string(),
1010
metric_subject: z.string(),
1111
bucket_start: z.string(),
12-
count: z.number(),
13-
sum_value: z.number(),
14-
max_value: z.number(),
15-
min_value: z.number(),
16-
last_value: z.number(),
12+
value: z.number(),
1713
attributes: z.unknown(),
1814
});
1915

packages/cli-v3/src/entryPoints/dev-run-worker.ts

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -213,9 +213,8 @@ async function doBootstrap() {
213213
forceFlushTimeoutMillis: 30_000,
214214
resource: config.telemetry?.resource,
215215
hostMetrics: true,
216+
hostMetricGroups: ["process.cpu", "process.memory"],
216217
nodejsRuntimeMetrics: true,
217-
// Drop all system metrics from dev metrics export
218-
droppedMetrics: ["system.*"],
219218
});
220219

221220
const otelTracer: Tracer = tracingSDK.getTracer("trigger-dev-worker", VERSION);

packages/core/package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -177,7 +177,7 @@
177177
"@opentelemetry/core": "2.0.1",
178178
"@opentelemetry/exporter-logs-otlp-http": "0.203.0",
179179
"@opentelemetry/exporter-metrics-otlp-http": "0.203.0",
180-
"@opentelemetry/host-metrics": "^0.36.0",
180+
"@opentelemetry/host-metrics": "^0.37.0",
181181
"@opentelemetry/exporter-trace-otlp-http": "0.203.0",
182182
"@opentelemetry/instrumentation": "0.203.0",
183183
"@opentelemetry/resources": "2.0.1",

packages/core/src/v3/otel/nodejsRuntimeMetrics.ts

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,8 @@ export function startNodejsRuntimeMetrics(meterProvider: MeterProvider) {
1616
const eld = monitorEventLoopDelay({ resolution: 20 });
1717
eld.enable();
1818

19-
const eldP50 = meter.createObservableGauge("nodejs.event_loop.delay.p50", {
20-
description: "Median event loop delay",
21-
unit: "s",
22-
});
23-
const eldP99 = meter.createObservableGauge("nodejs.event_loop.delay.p99", {
24-
description: "p99 event loop delay",
19+
const eldP95 = meter.createObservableGauge("nodejs.event_loop.delay.p95", {
20+
description: "p95 event loop delay",
2521
unit: "s",
2622
});
2723
const eldMax = meter.createObservableGauge("nodejs.event_loop.delay.max", {
@@ -49,8 +45,7 @@ export function startNodejsRuntimeMetrics(meterProvider: MeterProvider) {
4945
obs.observe(eluGauge, diff.utilization);
5046

5147
// Event loop delay (nanoseconds -> seconds)
52-
obs.observe(eldP50, eld.percentile(50) / 1e9);
53-
obs.observe(eldP99, eld.percentile(99) / 1e9);
48+
obs.observe(eldP95, eld.percentile(95) / 1e9);
5449
obs.observe(eldMax, eld.max / 1e9);
5550
eld.reset();
5651

@@ -59,6 +54,6 @@ export function startNodejsRuntimeMetrics(meterProvider: MeterProvider) {
5954
obs.observe(heapUsed, mem.heapUsed);
6055
obs.observe(heapTotal, mem.heapTotal);
6156
},
62-
[eluGauge, eldP50, eldP99, eldMax, heapUsed, heapTotal]
57+
[eluGauge, eldP95, eldMax, heapUsed, heapTotal]
6358
);
6459
}

0 commit comments

Comments
 (0)