Skip to content

Commit 5b4a32f

Browse files
committed
implement semantic system filesystem and diskio metrics for nodejs and bun
1 parent 3b0f8f7 commit 5b4a32f

File tree

5 files changed

+247
-0
lines changed

5 files changed

+247
-0
lines changed

internal-packages/clickhouse/schema/017_create_metrics_v1.sql

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,11 @@ CREATE TABLE IF NOT EXISTS trigger_dev.metrics_v1
2222
`system.cpu.state` LowCardinality(String),
2323
`system.memory.state` LowCardinality(String),
2424
`system.device` String,
25+
`system.filesystem.type` LowCardinality(String),
26+
`system.filesystem.mountpoint` String,
27+
`system.filesystem.mode` LowCardinality(String),
28+
`system.filesystem.state` LowCardinality(String),
29+
`disk.io.direction` LowCardinality(String),
2530
`process.cpu.state` LowCardinality(String),
2631
`network.io.direction` LowCardinality(String),
2732
max_dynamic_paths=8

packages/cli-v3/src/entryPoints/managed-run-worker.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -193,6 +193,8 @@ async function doBootstrap() {
193193
resource: config.telemetry?.resource,
194194
hostMetrics: true,
195195
nodejsRuntimeMetrics: true,
196+
filesystemMetrics: true,
197+
diskIoMetrics: true,
196198
});
197199

198200
const otelTracer: Tracer = tracingSDK.getTracer("trigger-dev-worker", VERSION);
Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
import { type MeterProvider } from "@opentelemetry/sdk-metrics";
2+
import * as fs from "node:fs";
3+
import * as fsPromises from "node:fs/promises";
4+
5+
const SECTOR_SIZE = 512;
6+
7+
const FILTERED_DEVICE_PREFIXES = ["loop", "ram", "dm-"];
8+
9+
type DiskStats = {
10+
device: string;
11+
readsCompleted: number;
12+
sectorsRead: number;
13+
writesCompleted: number;
14+
sectorsWritten: number;
15+
};
16+
17+
function parseProcDiskstats(content: string): DiskStats[] {
18+
const entries: DiskStats[] = [];
19+
20+
for (const line of content.split("\n")) {
21+
const trimmed = line.trim();
22+
if (!trimmed) continue;
23+
24+
const fields = trimmed.split(/\s+/);
25+
if (fields.length < 14) continue;
26+
27+
const device = fields[2]!;
28+
29+
if (FILTERED_DEVICE_PREFIXES.some((prefix) => device.startsWith(prefix))) {
30+
continue;
31+
}
32+
33+
entries.push({
34+
device,
35+
readsCompleted: parseInt(fields[3]!, 10),
36+
sectorsRead: parseInt(fields[5]!, 10),
37+
writesCompleted: parseInt(fields[7]!, 10),
38+
sectorsWritten: parseInt(fields[9]!, 10),
39+
});
40+
}
41+
42+
return entries;
43+
}
44+
45+
export function startDiskIoMetrics(meterProvider: MeterProvider) {
46+
try {
47+
fs.accessSync("/proc/diskstats", fs.constants.R_OK);
48+
} catch {
49+
return;
50+
}
51+
52+
const meter = meterProvider.getMeter("system-disk", "1.0.0");
53+
54+
const ioCounter = meter.createObservableCounter("system.disk.io", {
55+
description: "Disk I/O bytes read and written per device",
56+
unit: "By",
57+
});
58+
59+
const opsCounter = meter.createObservableCounter("system.disk.operations", {
60+
description: "Disk read/write operation counts per device",
61+
unit: "{operation}",
62+
});
63+
64+
meter.addBatchObservableCallback(
65+
async (obs) => {
66+
try {
67+
const content = await fsPromises.readFile("/proc/diskstats", "utf-8");
68+
const stats = parseProcDiskstats(content);
69+
70+
for (const entry of stats) {
71+
const readAttrs = {
72+
"system.device": entry.device,
73+
"disk.io.direction": "read",
74+
};
75+
const writeAttrs = {
76+
"system.device": entry.device,
77+
"disk.io.direction": "write",
78+
};
79+
80+
obs.observe(ioCounter, entry.sectorsRead * SECTOR_SIZE, readAttrs);
81+
obs.observe(ioCounter, entry.sectorsWritten * SECTOR_SIZE, writeAttrs);
82+
83+
obs.observe(opsCounter, entry.readsCompleted, readAttrs);
84+
obs.observe(opsCounter, entry.writesCompleted, writeAttrs);
85+
}
86+
} catch {
87+
// Skip entire cycle on failure
88+
}
89+
},
90+
[ioCounter, opsCounter]
91+
);
92+
}
Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,134 @@
1+
import { type MeterProvider } from "@opentelemetry/sdk-metrics";
2+
import * as fs from "node:fs";
3+
import * as fsPromises from "node:fs/promises";
4+
5+
const VIRTUAL_FS_TYPES = new Set([
6+
"proc",
7+
"sysfs",
8+
"devpts",
9+
"tmpfs",
10+
"devtmpfs",
11+
"cgroup",
12+
"cgroup2",
13+
"squashfs",
14+
"autofs",
15+
"debugfs",
16+
"securityfs",
17+
"pstore",
18+
"bpf",
19+
"tracefs",
20+
"hugetlbfs",
21+
"mqueue",
22+
"fusectl",
23+
"configfs",
24+
"binfmt_misc",
25+
]);
26+
27+
type MountEntry = {
28+
device: string;
29+
mountpoint: string;
30+
fsType: string;
31+
options: string;
32+
};
33+
34+
function parseProcMounts(content: string): MountEntry[] {
35+
const entries: MountEntry[] = [];
36+
37+
for (const line of content.split("\n")) {
38+
if (!line.trim()) continue;
39+
40+
const parts = line.split(" ");
41+
if (parts.length < 4) continue;
42+
43+
const fsType = parts[2]!;
44+
if (VIRTUAL_FS_TYPES.has(fsType)) continue;
45+
46+
entries.push({
47+
device: parts[0]!,
48+
mountpoint: unescapeMountPath(parts[1]!),
49+
fsType,
50+
options: parts[3]!,
51+
});
52+
}
53+
54+
return entries;
55+
}
56+
57+
function unescapeMountPath(path: string): string {
58+
return path.replace(/\\040/g, " ").replace(/\\011/g, "\t");
59+
}
60+
61+
export function startFilesystemMetrics(meterProvider: MeterProvider) {
62+
try {
63+
fs.accessSync("/proc/mounts", fs.constants.R_OK);
64+
} catch {
65+
return;
66+
}
67+
68+
if (typeof fsPromises.statfs !== "function") {
69+
return;
70+
}
71+
72+
const meter = meterProvider.getMeter("system-filesystem", "1.0.0");
73+
74+
const usageCounter = meter.createObservableUpDownCounter("system.filesystem.usage", {
75+
description: "Filesystem bytes used, free, and reserved per mountpoint",
76+
unit: "By",
77+
});
78+
79+
const utilizationGauge = meter.createObservableGauge("system.filesystem.utilization", {
80+
description: "Fraction of filesystem space used (0-1)",
81+
unit: "1",
82+
});
83+
84+
meter.addBatchObservableCallback(
85+
async (obs) => {
86+
try {
87+
const mountsContent = await fsPromises.readFile("/proc/mounts", "utf-8");
88+
const mounts = parseProcMounts(mountsContent);
89+
90+
for (const mount of mounts) {
91+
try {
92+
const stats = await fsPromises.statfs(mount.mountpoint);
93+
const bsize = stats.bsize;
94+
const total = stats.blocks * bsize;
95+
const free = stats.bavail * bsize;
96+
const reserved = (stats.bfree - stats.bavail) * bsize;
97+
const used = total - stats.bfree * bsize;
98+
99+
const mode = mount.options.startsWith("ro") ? "ro" : "rw";
100+
101+
const baseAttrs = {
102+
"system.device": mount.device,
103+
"system.filesystem.type": mount.fsType,
104+
"system.filesystem.mountpoint": mount.mountpoint,
105+
"system.filesystem.mode": mode,
106+
};
107+
108+
obs.observe(usageCounter, used, {
109+
...baseAttrs,
110+
"system.filesystem.state": "used",
111+
});
112+
obs.observe(usageCounter, free, {
113+
...baseAttrs,
114+
"system.filesystem.state": "free",
115+
});
116+
obs.observe(usageCounter, reserved, {
117+
...baseAttrs,
118+
"system.filesystem.state": "reserved",
119+
});
120+
121+
if (total > 0) {
122+
obs.observe(utilizationGauge, used / total, baseAttrs);
123+
}
124+
} catch {
125+
// Skip this mount on statfs failure
126+
}
127+
}
128+
} catch {
129+
// Skip entire cycle on failure
130+
}
131+
},
132+
[usageCounter, utilizationGauge]
133+
);
134+
}

packages/core/src/v3/otel/tracingSDK.ts

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,8 @@ import {
6363
import { traceContext } from "../trace-context-api.js";
6464
import { getEnvVar } from "../utils/getEnv.js";
6565
import { machineId } from "./machineId.js";
66+
import { startDiskIoMetrics } from "./diskIoMetrics.js";
67+
import { startFilesystemMetrics } from "./filesystemMetrics.js";
6668
import { startNodejsRuntimeMetrics } from "./nodejsRuntimeMetrics.js";
6769

6870
export type TracingDiagnosticLogLevel =
@@ -89,6 +91,10 @@ export type TracingSDKConfig = {
8991
hostMetricGroups?: string[];
9092
/** Enable Node.js runtime metrics (event loop utilization, heap usage, etc.) */
9193
nodejsRuntimeMetrics?: boolean;
94+
/** Enable filesystem metrics (Linux only, reads /proc/mounts + fs.statfs) */
95+
filesystemMetrics?: boolean;
96+
/** Enable disk I/O metrics (Linux only, reads /proc/diskstats) */
97+
diskIoMetrics?: boolean;
9298
/** Metric instrument name patterns to drop (supports wildcards, e.g. "system.cpu.*") */
9399
droppedMetrics?: string[];
94100
};
@@ -340,6 +346,14 @@ export class TracingSDK {
340346
startNodejsRuntimeMetrics(meterProvider);
341347
}
342348

349+
if (config.filesystemMetrics) {
350+
startFilesystemMetrics(meterProvider);
351+
}
352+
353+
if (config.diskIoMetrics) {
354+
startDiskIoMetrics(meterProvider);
355+
}
356+
343357
this.getLogger = loggerProvider.getLogger.bind(loggerProvider);
344358
this.getTracer = traceProvider.getTracer.bind(traceProvider);
345359
}

0 commit comments

Comments
 (0)