Skip to content

Commit 180bd6f

Browse files
committed
add pod cleaner and tests
1 parent e96981f commit 180bd6f

File tree

8 files changed

+643
-3
lines changed

8 files changed

+643
-3
lines changed

apps/supervisor/package.json

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,15 +6,18 @@
66
"type": "module",
77
"scripts": {
88
"build": "tsc",
9-
"dev": "tsx --experimental-sqlite --require dotenv/config --watch src/index.ts || (echo '!! Remember to run: nvm use'; exit 1)",
10-
"start": "node --experimental-sqlite dist/index.js",
9+
"dev": "tsx --require dotenv/config --watch src/index.ts || (echo '!! Remember to run: nvm use'; exit 1)",
10+
"start": "node dist/index.js",
11+
"test": "vitest run",
12+
"test:watch": "vitest",
1113
"typecheck": "tsc --noEmit"
1214
},
1315
"dependencies": {
1416
"@kubernetes/client-node": "^1.0.0",
1517
"@trigger.dev/core": "workspace:*",
1618
"dockerode": "^4.0.3",
1719
"nanoid": "^5.0.9",
20+
"prom-client": "^15.1.0",
1821
"socket.io": "4.7.4",
1922
"std-env": "^3.8.0",
2023
"tinyexec": "^0.3.1",

apps/supervisor/src/clients/kubernetes.ts

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,28 @@
11
import * as k8s from "@kubernetes/client-node";
2+
import { Informer } from "@kubernetes/client-node";
3+
import { ListPromise } from "@kubernetes/client-node";
4+
import { KubernetesObject } from "@kubernetes/client-node";
25
import { assertExhaustive } from "@trigger.dev/core/utils";
36

47
export const RUNTIME_ENV = process.env.KUBERNETES_PORT ? "kubernetes" : "local";
58

69
export function createK8sApi() {
710
const kubeConfig = getKubeConfig();
811

12+
function makeInformer<T extends KubernetesObject>(
13+
path: string,
14+
listPromiseFn: ListPromise<T>,
15+
labelSelector?: string,
16+
fieldSelector?: string
17+
): Informer<T> {
18+
return k8s.makeInformer(kubeConfig, path, listPromiseFn, labelSelector, fieldSelector);
19+
}
20+
921
const api = {
1022
core: kubeConfig.makeApiClient(k8s.CoreV1Api),
1123
batch: kubeConfig.makeApiClient(k8s.BatchV1Api),
1224
apps: kubeConfig.makeApiClient(k8s.AppsV1Api),
25+
makeInformer,
1326
};
1427

1528
return api;

apps/supervisor/src/env.ts

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,14 @@ const Env = z.object({
5151
KUBERNETES_NAMESPACE: z.string().default("default"),
5252
EPHEMERAL_STORAGE_SIZE_LIMIT: z.string().default("10Gi"),
5353
EPHEMERAL_STORAGE_SIZE_REQUEST: z.string().default("2Gi"),
54+
55+
// Metrics
56+
METRICS_COLLECT_DEFAULTS: BoolEnv.default(true),
57+
58+
// Pod cleaner
59+
POD_CLEANER_ENABLED: BoolEnv.default(true),
60+
POD_CLEANER_INTERVAL_MS: z.coerce.number().int().default(10000),
61+
POD_CLEANER_BATCH_SIZE: z.coerce.number().int().default(500),
5462
});
5563

5664
export const env = Env.parse(stdEnv);

apps/supervisor/src/index.ts

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,14 @@ import {
1818
CheckpointClient,
1919
isKubernetesEnvironment,
2020
} from "@trigger.dev/core/v3/serverOnly";
21-
import { createK8sApi, RUNTIME_ENV } from "./clients/kubernetes.js";
21+
import { createK8sApi } from "./clients/kubernetes.js";
22+
import { collectDefaultMetrics } from "prom-client";
23+
import { register } from "./metrics.js";
24+
import { PodCleaner } from "./services/podCleaner.js";
25+
26+
if (env.METRICS_COLLECT_DEFAULTS) {
27+
collectDefaultMetrics({ register });
28+
}
2229

2330
class ManagedSupervisor {
2431
private readonly workerSession: SupervisorSession;
@@ -28,6 +35,7 @@ class ManagedSupervisor {
2835
private readonly logger = new SimpleStructuredLogger("managed-worker");
2936
private readonly resourceMonitor: ResourceMonitor;
3037
private readonly checkpointClient?: CheckpointClient;
38+
private readonly podCleaner?: PodCleaner;
3139

3240
private readonly isKubernetes = isKubernetesEnvironment(env.KUBERNETES_FORCE_ENABLED);
3341
private readonly warmStartUrl = env.TRIGGER_WARM_START_URL;
@@ -37,6 +45,14 @@ class ManagedSupervisor {
3745
const workloadApiDomain = env.TRIGGER_WORKLOAD_API_DOMAIN;
3846
const workloadApiPortExternal = env.TRIGGER_WORKLOAD_API_PORT_EXTERNAL;
3947

48+
if (env.POD_CLEANER_ENABLED) {
49+
this.podCleaner = new PodCleaner({
50+
namespace: env.KUBERNETES_NAMESPACE,
51+
batchSize: env.POD_CLEANER_BATCH_SIZE,
52+
intervalMs: env.POD_CLEANER_INTERVAL_MS,
53+
});
54+
}
55+
4056
if (this.warmStartUrl) {
4157
this.logger.log("[ManagedWorker] 🔥 Warm starts enabled", {
4258
warmStartUrl: this.warmStartUrl,
@@ -273,6 +289,10 @@ class ManagedSupervisor {
273289
async start() {
274290
this.logger.log("[ManagedWorker] Starting up");
275291

292+
if (this.podCleaner) {
293+
await this.podCleaner.start();
294+
}
295+
276296
if (env.TRIGGER_WORKLOAD_API_ENABLED) {
277297
this.logger.log("[ManagedWorker] Workload API enabled", {
278298
protocol: env.TRIGGER_WORKLOAD_API_PROTOCOL,
@@ -292,6 +312,10 @@ class ManagedSupervisor {
292312
async stop() {
293313
this.logger.log("[ManagedWorker] Shutting down");
294314
await this.httpServer.stop();
315+
316+
if (this.podCleaner) {
317+
await this.podCleaner.stop();
318+
}
295319
}
296320
}
297321

apps/supervisor/src/metrics.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
import { Registry } from "prom-client";
2+
3+
export const register = new Registry();

0 commit comments

Comments
 (0)