Skip to content

Commit 8e00344

Browse files
authored
feat(supervisor): project-based scheduling affinity for image cache locality (#2995)
Adds optional pod affinity so pods from the same project prefer scheduling on the same node. This can help improve image cache hit rates; subsequent pods benefit from already-pulled image layers, reducing startup time. Complements the built-in ImageLocality scheduler plugin by helping during burst scheduling scenarios. Pod affinity sees scheduled pods immediately, while ImageLocality only sees images after they're fully pulled. Configuration: - `KUBERNETES_PROJECT_AFFINITY_ENABLED` - Enable/disable (default: false) - `KUBERNETES_PROJECT_AFFINITY_WEIGHT` - Scheduler weight 1-100 (default: 50) - `KUBERNETES_PROJECT_AFFINITY_TOPOLOGY_KEY` - Topology key (default: kubernetes.io/hostname) Uses soft (preferred) affinity so pods always schedule even if preferred node is full. <!-- devin-review-badge-begin --> --- <a href="https://app.devin.ai/review/triggerdotdev/trigger.dev/pull/2995"> <picture> <source media="(prefers-color-scheme: dark)" srcset="https://static.devin.ai/assets/gh-open-in-devin-review-dark.svg?v=1"> <img src="https://static.devin.ai/assets/gh-open-in-devin-review-light.svg?v=1" alt="Open with Devin"> </picture> </a> <!-- devin-review-badge-end -->
1 parent b72cacc commit 8e00344

File tree

2 files changed

+67
-26
lines changed

2 files changed

+67
-26
lines changed

apps/supervisor/src/env.ts

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,11 @@ const Env = z.object({
112112
KUBERNETES_SCHEDULER_NAME: z.string().optional(), // Custom scheduler name for pods
113113
KUBERNETES_LARGE_MACHINE_POOL_LABEL: z.string().optional(), // if set, large-* presets affinity for machinepool=<value>
114114

115+
// Project affinity settings - pods from the same project prefer the same node
116+
KUBERNETES_PROJECT_AFFINITY_ENABLED: BoolEnv.default(false),
117+
KUBERNETES_PROJECT_AFFINITY_WEIGHT: z.coerce.number().int().min(1).max(100).default(50),
118+
KUBERNETES_PROJECT_AFFINITY_TOPOLOGY_KEY: z.string().trim().min(1).default("kubernetes.io/hostname"),
119+
115120
// Placement tags settings
116121
PLACEMENT_TAGS_ENABLED: BoolEnv.default(false),
117122
PLACEMENT_TAGS_PREFIX: z.string().default("node.cluster.x-k8s.io"),

apps/supervisor/src/workloadManager/kubernetes.ts

Lines changed: 62 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,7 @@ export class KubernetesWorkloadManager implements WorkloadManager {
120120
},
121121
spec: {
122122
...this.addPlacementTags(this.#defaultPodSpec, opts.placementTags),
123-
affinity: this.#getNodeAffinity(opts.machine),
123+
affinity: this.#getAffinity(opts.machine, opts.projectId),
124124
terminationGracePeriodSeconds: 60 * 60,
125125
containers: [
126126
{
@@ -390,50 +390,86 @@ export class KubernetesWorkloadManager implements WorkloadManager {
390390
return preset.name.startsWith("large-");
391391
}
392392

393-
#getNodeAffinity(preset: MachinePreset): k8s.V1Affinity | undefined {
393+
#getAffinity(preset: MachinePreset, projectId: string): k8s.V1Affinity | undefined {
394+
const nodeAffinity = this.#getNodeAffinityRules(preset);
395+
const podAffinity = this.#getProjectPodAffinity(projectId);
396+
397+
if (!nodeAffinity && !podAffinity) {
398+
return undefined;
399+
}
400+
401+
return {
402+
...(nodeAffinity && { nodeAffinity }),
403+
...(podAffinity && { podAffinity }),
404+
};
405+
}
406+
407+
#getNodeAffinityRules(preset: MachinePreset): k8s.V1NodeAffinity | undefined {
394408
if (!env.KUBERNETES_LARGE_MACHINE_POOL_LABEL) {
395409
return undefined;
396410
}
397411

398412
if (this.#isLargeMachine(preset)) {
399413
// soft preference for the large-machine pool, falls back to standard if unavailable
400414
return {
401-
nodeAffinity: {
402-
preferredDuringSchedulingIgnoredDuringExecution: [
403-
{
404-
weight: 100,
405-
preference: {
406-
matchExpressions: [
407-
{
408-
key: "node.cluster.x-k8s.io/machinepool",
409-
operator: "In",
410-
values: [env.KUBERNETES_LARGE_MACHINE_POOL_LABEL],
411-
},
412-
],
413-
},
415+
preferredDuringSchedulingIgnoredDuringExecution: [
416+
{
417+
weight: 100,
418+
preference: {
419+
matchExpressions: [
420+
{
421+
key: "node.cluster.x-k8s.io/machinepool",
422+
operator: "In",
423+
values: [env.KUBERNETES_LARGE_MACHINE_POOL_LABEL],
424+
},
425+
],
414426
},
415-
],
416-
},
427+
},
428+
],
417429
};
418430
}
419431

420432
// not schedulable in the large-machine pool
421433
return {
422-
nodeAffinity: {
423-
requiredDuringSchedulingIgnoredDuringExecution: {
424-
nodeSelectorTerms: [
425-
{
434+
requiredDuringSchedulingIgnoredDuringExecution: {
435+
nodeSelectorTerms: [
436+
{
437+
matchExpressions: [
438+
{
439+
key: "node.cluster.x-k8s.io/machinepool",
440+
operator: "NotIn",
441+
values: [env.KUBERNETES_LARGE_MACHINE_POOL_LABEL],
442+
},
443+
],
444+
},
445+
],
446+
},
447+
};
448+
}
449+
450+
#getProjectPodAffinity(projectId: string): k8s.V1PodAffinity | undefined {
451+
if (!env.KUBERNETES_PROJECT_AFFINITY_ENABLED) {
452+
return undefined;
453+
}
454+
455+
return {
456+
preferredDuringSchedulingIgnoredDuringExecution: [
457+
{
458+
weight: env.KUBERNETES_PROJECT_AFFINITY_WEIGHT,
459+
podAffinityTerm: {
460+
labelSelector: {
426461
matchExpressions: [
427462
{
428-
key: "node.cluster.x-k8s.io/machinepool",
429-
operator: "NotIn",
430-
values: [env.KUBERNETES_LARGE_MACHINE_POOL_LABEL],
463+
key: "project",
464+
operator: "In",
465+
values: [projectId],
431466
},
432467
],
433468
},
434-
],
469+
topologyKey: env.KUBERNETES_PROJECT_AFFINITY_TOPOLOGY_KEY,
470+
},
435471
},
436-
},
472+
],
437473
};
438474
}
439475
}

0 commit comments

Comments
 (0)