Skip to content

Commit 306cbc8

Browse files
committed
feat(supervisor): project-based scheduling affinity for image cache locality
Adds optional pod affinity so pods from the same project prefer scheduling on the same node. This can help improve image cache hit rates; subsequent pods benefit from already-pulled image layers, reducing startup time. Complements the built-in ImageLocality scheduler plugin by helping during burst scheduling scenarios. Pod affinity sees scheduled pods immediately, while ImageLocality only sees images after they're fully pulled. Configuration: - `KUBERNETES_PROJECT_AFFINITY_ENABLED` - Enable/disable (default: false) - `KUBERNETES_PROJECT_AFFINITY_WEIGHT` - Scheduler weight 1-100 (default: 50) - `KUBERNETES_PROJECT_AFFINITY_TOPOLOGY_KEY` - Topology key (default: kubernetes.io/hostname) Uses soft (preferred) affinity so pods always schedule even if preferred node is full.
1 parent b72cacc commit 306cbc8

File tree

2 files changed

+67
-26
lines changed

2 files changed

+67
-26
lines changed

apps/supervisor/src/env.ts

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,11 @@ const Env = z.object({
112112
KUBERNETES_SCHEDULER_NAME: z.string().optional(), // Custom scheduler name for pods
113113
KUBERNETES_LARGE_MACHINE_POOL_LABEL: z.string().optional(), // if set, large-* presets affinity for machinepool=<value>
114114

115+
// Project affinity settings - pods from the same project prefer the same node
116+
KUBERNETES_PROJECT_AFFINITY_ENABLED: BoolEnv.default(false),
117+
KUBERNETES_PROJECT_AFFINITY_WEIGHT: z.coerce.number().int().min(1).max(100).default(50),
118+
KUBERNETES_PROJECT_AFFINITY_TOPOLOGY_KEY: z.string().default("kubernetes.io/hostname"),
119+
115120
// Placement tags settings
116121
PLACEMENT_TAGS_ENABLED: BoolEnv.default(false),
117122
PLACEMENT_TAGS_PREFIX: z.string().default("node.cluster.x-k8s.io"),

apps/supervisor/src/workloadManager/kubernetes.ts

Lines changed: 62 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,7 @@ export class KubernetesWorkloadManager implements WorkloadManager {
120120
},
121121
spec: {
122122
...this.addPlacementTags(this.#defaultPodSpec, opts.placementTags),
123-
affinity: this.#getNodeAffinity(opts.machine),
123+
affinity: this.#getAffinity(opts.machine, opts.projectId),
124124
terminationGracePeriodSeconds: 60 * 60,
125125
containers: [
126126
{
@@ -390,50 +390,86 @@ export class KubernetesWorkloadManager implements WorkloadManager {
390390
return preset.name.startsWith("large-");
391391
}
392392

393-
#getNodeAffinity(preset: MachinePreset): k8s.V1Affinity | undefined {
393+
#getAffinity(preset: MachinePreset, projectId: string): k8s.V1Affinity | undefined {
394+
const nodeAffinity = this.#getNodeAffinityRules(preset);
395+
const podAffinity = this.#getProjectPodAffinity(projectId);
396+
397+
if (!nodeAffinity && !podAffinity) {
398+
return undefined;
399+
}
400+
401+
return {
402+
...(nodeAffinity && { nodeAffinity }),
403+
...(podAffinity && { podAffinity }),
404+
};
405+
}
406+
407+
#getNodeAffinityRules(preset: MachinePreset): k8s.V1NodeAffinity | undefined {
394408
if (!env.KUBERNETES_LARGE_MACHINE_POOL_LABEL) {
395409
return undefined;
396410
}
397411

398412
if (this.#isLargeMachine(preset)) {
399413
// soft preference for the large-machine pool, falls back to standard if unavailable
400414
return {
401-
nodeAffinity: {
402-
preferredDuringSchedulingIgnoredDuringExecution: [
403-
{
404-
weight: 100,
405-
preference: {
406-
matchExpressions: [
407-
{
408-
key: "node.cluster.x-k8s.io/machinepool",
409-
operator: "In",
410-
values: [env.KUBERNETES_LARGE_MACHINE_POOL_LABEL],
411-
},
412-
],
413-
},
415+
preferredDuringSchedulingIgnoredDuringExecution: [
416+
{
417+
weight: 100,
418+
preference: {
419+
matchExpressions: [
420+
{
421+
key: "node.cluster.x-k8s.io/machinepool",
422+
operator: "In",
423+
values: [env.KUBERNETES_LARGE_MACHINE_POOL_LABEL],
424+
},
425+
],
414426
},
415-
],
416-
},
427+
},
428+
],
417429
};
418430
}
419431

420432
// not schedulable in the large-machine pool
421433
return {
422-
nodeAffinity: {
423-
requiredDuringSchedulingIgnoredDuringExecution: {
424-
nodeSelectorTerms: [
425-
{
434+
requiredDuringSchedulingIgnoredDuringExecution: {
435+
nodeSelectorTerms: [
436+
{
437+
matchExpressions: [
438+
{
439+
key: "node.cluster.x-k8s.io/machinepool",
440+
operator: "NotIn",
441+
values: [env.KUBERNETES_LARGE_MACHINE_POOL_LABEL],
442+
},
443+
],
444+
},
445+
],
446+
},
447+
};
448+
}
449+
450+
#getProjectPodAffinity(projectId: string): k8s.V1PodAffinity | undefined {
451+
if (!env.KUBERNETES_PROJECT_AFFINITY_ENABLED) {
452+
return undefined;
453+
}
454+
455+
return {
456+
preferredDuringSchedulingIgnoredDuringExecution: [
457+
{
458+
weight: env.KUBERNETES_PROJECT_AFFINITY_WEIGHT,
459+
podAffinityTerm: {
460+
labelSelector: {
426461
matchExpressions: [
427462
{
428-
key: "node.cluster.x-k8s.io/machinepool",
429-
operator: "NotIn",
430-
values: [env.KUBERNETES_LARGE_MACHINE_POOL_LABEL],
463+
key: "project",
464+
operator: "In",
465+
values: [projectId],
431466
},
432467
],
433468
},
434-
],
469+
topologyKey: env.KUBERNETES_PROJECT_AFFINITY_TOPOLOGY_KEY,
470+
},
435471
},
436-
},
472+
],
437473
};
438474
}
439475
}

0 commit comments

Comments
 (0)