@@ -22,6 +22,7 @@ import { createK8sApi } from "./clients/kubernetes.js";
2222import { collectDefaultMetrics } from "prom-client" ;
2323import { register } from "./metrics.js" ;
2424import { PodCleaner } from "./services/podCleaner.js" ;
25+ import { FailedPodHandler } from "./services/failedPodHandler.js" ;
2526
2627if ( env . METRICS_COLLECT_DEFAULTS ) {
2728 collectDefaultMetrics ( { register } ) ;
@@ -35,7 +36,9 @@ class ManagedSupervisor {
3536 private readonly logger = new SimpleStructuredLogger ( "managed-worker" ) ;
3637 private readonly resourceMonitor : ResourceMonitor ;
3738 private readonly checkpointClient ?: CheckpointClient ;
39+
3840 private readonly podCleaner ?: PodCleaner ;
41+ private readonly failedPodHandler ?: FailedPodHandler ;
3942
4043 private readonly isKubernetes = isKubernetesEnvironment ( env . KUBERNETES_FORCE_ENABLED ) ;
4144 private readonly warmStartUrl = env . TRIGGER_WARM_START_URL ;
@@ -53,6 +56,13 @@ class ManagedSupervisor {
5356 } ) ;
5457 }
5558
59+ if ( env . FAILED_POD_HANDLER_ENABLED ) {
60+ this . failedPodHandler = new FailedPodHandler ( {
61+ namespace : env . KUBERNETES_NAMESPACE ,
62+ reconnectIntervalMs : env . FAILED_POD_HANDLER_RECONNECT_INTERVAL_MS ,
63+ } ) ;
64+ }
65+
5666 if ( this . warmStartUrl ) {
5767 this . logger . log ( "[ManagedWorker] 🔥 Warm starts enabled" , {
5868 warmStartUrl : this . warmStartUrl ,
@@ -293,6 +303,10 @@ class ManagedSupervisor {
293303 await this . podCleaner . start ( ) ;
294304 }
295305
306+ if ( this . failedPodHandler ) {
307+ await this . failedPodHandler . start ( ) ;
308+ }
309+
296310 if ( env . TRIGGER_WORKLOAD_API_ENABLED ) {
297311 this . logger . log ( "[ManagedWorker] Workload API enabled" , {
298312 protocol : env . TRIGGER_WORKLOAD_API_PROTOCOL ,
@@ -316,6 +330,10 @@ class ManagedSupervisor {
316330 if ( this . podCleaner ) {
317331 await this . podCleaner . stop ( ) ;
318332 }
333+
334+ if ( this . failedPodHandler ) {
335+ await this . failedPodHandler . stop ( ) ;
336+ }
319337 }
320338}
321339
0 commit comments