From 6fd64400efe2a81ae24a813932e9c31002a5b56f Mon Sep 17 00:00:00 2001 From: Amber Agent Date: Fri, 5 Dec 2025 22:55:52 +0000 Subject: [PATCH 1/3] fix(operator,runner): mount runner token as volume for dynamic refresh MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Long-running sessions were failing due to ServiceAccount token expiration because tokens were injected as environment variables at pod startup and never refreshed, even though the operator was refreshing the Secret. Changes: - Operator: Mount runner token Secret as volume instead of env var - Operator: Inject BOT_TOKEN_PATH env var pointing to mounted token file - Runner: Read token from BOT_TOKEN_PATH file on each connection/reconnection - Runner: Fall back to BOT_TOKEN env var for backward compatibility - Runner: Improved error messages for token authentication issues The operator already refreshes tokens every 45 minutes via ensureFreshRunnerToken(). Now the runner can read the refreshed token from the mounted Secret volume without requiring pod restart. Fixes #445 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- .../operator/internal/handlers/sessions.go | 45 +++++++++++-------- .../runner_shell/core/transport_ws.py | 23 ++++++++-- 2 files changed, 46 insertions(+), 22 deletions(-) diff --git a/components/operator/internal/handlers/sessions.go b/components/operator/internal/handlers/sessions.go index 1059c807c..a066a9cbb 100644 --- a/components/operator/internal/handlers/sessions.go +++ b/components/operator/internal/handlers/sessions.go @@ -931,6 +931,17 @@ func handleAgenticSessionEvent(obj *unstructured.Unstructured) error { } log.Printf("Session %s initiated by user: %s (userId: %s)", name, userName, userID) + // Determine runner token secret name for volume mount + runnerTokenSecretName := "" + if annotations := currentObj.GetAnnotations(); annotations != nil { + if v, ok := annotations["ambient-code.io/runner-token-secret"]; ok && strings.TrimSpace(v) != "" { + runnerTokenSecretName = strings.TrimSpace(v) + } + } + if runnerTokenSecretName == "" { + runnerTokenSecretName = fmt.Sprintf("ambient-runner-token-%s", name) + } + // Create the Job job := &batchv1.Job{ ObjectMeta: v1.ObjectMeta{ @@ -979,6 +990,14 @@ func handleAgenticSessionEvent(obj *unstructured.Unstructured) error { }, }, }, + { + Name: "runner-token", + VolumeSource: corev1.VolumeSource{ + Secret: &corev1.SecretVolumeSource{ + SecretName: runnerTokenSecretName, + }, + }, + }, }, // InitContainer to ensure workspace directory structure exists @@ -1037,6 +1056,8 @@ func handleAgenticSessionEvent(obj *unstructured.Unstructured) error { // Mount .claude directory for session state persistence // This enables SDK's built-in resume functionality {Name: "workspace", MountPath: "/app/.claude", SubPath: fmt.Sprintf("sessions/%s/.claude", name), ReadOnly: false}, + // Mount runner token secret as volume for dynamic token refresh + {Name: "runner-token", MountPath: "/app/runner-token", ReadOnly: true}, }, Env: func() []corev1.EnvVar { @@ -1153,26 +1174,12 @@ func handleAgenticSessionEvent(obj *unstructured.Unstructured) error { base = append(base, corev1.EnvVar{Name: "PARENT_SESSION_ID", Value: parentSessionID}) log.Printf("Session %s: passing PARENT_SESSION_ID=%s to runner", name, parentSessionID) } - // If backend annotated the session with a runner token secret, inject only BOT_TOKEN - // Secret contains: 'k8s-token' (for CR updates) - // Prefer annotated secret name; fallback to deterministic name - secretName := "" - if meta, ok := currentObj.Object["metadata"].(map[string]interface{}); ok { - if anns, ok := meta["annotations"].(map[string]interface{}); ok { - if v, ok := anns["ambient-code.io/runner-token-secret"].(string); ok && strings.TrimSpace(v) != "" { - secretName = strings.TrimSpace(v) - } - } - } - if secretName == "" { - secretName = fmt.Sprintf("ambient-runner-token-%s", name) - } + // Inject BOT_TOKEN_PATH pointing to mounted secret volume + // Token is mounted from runnerTokenSecretName at /app/runner-token + // This allows the runner to read refreshed tokens without pod restart base = append(base, corev1.EnvVar{ - Name: "BOT_TOKEN", - ValueFrom: &corev1.EnvVarSource{SecretKeyRef: &corev1.SecretKeySelector{ - LocalObjectReference: corev1.LocalObjectReference{Name: secretName}, - Key: "k8s-token", - }}, + Name: "BOT_TOKEN_PATH", + Value: "/app/runner-token/k8s-token", }) // Add CR-provided envs last (override base when same key) if spec, ok := currentObj.Object["spec"].(map[string]interface{}); ok { diff --git a/components/runners/runner-shell/runner_shell/core/transport_ws.py b/components/runners/runner-shell/runner_shell/core/transport_ws.py index 8c0053bf6..b03c3bc64 100644 --- a/components/runners/runner-shell/runner_shell/core/transport_ws.py +++ b/components/runners/runner-shell/runner_shell/core/transport_ws.py @@ -30,8 +30,19 @@ async def connect(self): """Connect to WebSocket endpoint.""" try: # Forward Authorization header if BOT_TOKEN (runner SA token) is present + # Read from file if BOT_TOKEN_PATH is set (for dynamic token refresh) + # Otherwise fall back to BOT_TOKEN env var (backward compatibility) headers: Dict[str, str] = {} - token = (os.getenv("BOT_TOKEN") or "").strip() + token = "" + token_path = (os.getenv("BOT_TOKEN_PATH") or "").strip() + if token_path: + try: + with open(token_path, "r") as f: + token = f.read().strip() + except Exception as e: + logger.warning(f"Failed to read token from {token_path}: {e}") + if not token: + token = (os.getenv("BOT_TOKEN") or "").strip() if token: headers["Authorization"] = f"Bearer {token}" @@ -69,10 +80,16 @@ async def connect(self): ) # Surface a clearer hint when auth is likely missing if status == 401: + token_path = (os.getenv("BOT_TOKEN_PATH") or "").strip() has_token = bool((os.getenv("BOT_TOKEN") or "").strip()) - if not has_token: + has_token_path = bool(token_path) + if not has_token and not has_token_path: logger.error( - "No BOT_TOKEN present; backend project routes require Authorization." + "No BOT_TOKEN or BOT_TOKEN_PATH present; backend project routes require Authorization." + ) + elif has_token_path and not token: + logger.error( + f"BOT_TOKEN_PATH is set to {token_path} but token could not be read." ) raise except Exception as e: From 4f940d254a6779acdfc257fe6f6bb48b0d72559f Mon Sep 17 00:00:00 2001 From: Bob Gregor Date: Fri, 5 Dec 2025 19:00:28 -0500 Subject: [PATCH 2/3] Update components/runners/runner-shell/runner_shell/core/transport_ws.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- .../runners/runner-shell/runner_shell/core/transport_ws.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/components/runners/runner-shell/runner_shell/core/transport_ws.py b/components/runners/runner-shell/runner_shell/core/transport_ws.py index b03c3bc64..aae26ebfc 100644 --- a/components/runners/runner-shell/runner_shell/core/transport_ws.py +++ b/components/runners/runner-shell/runner_shell/core/transport_ws.py @@ -37,7 +37,7 @@ async def connect(self): token_path = (os.getenv("BOT_TOKEN_PATH") or "").strip() if token_path: try: - with open(token_path, "r") as f: + with open(token_path, "r", encoding="utf-8") as f: token = f.read().strip() except Exception as e: logger.warning(f"Failed to read token from {token_path}: {e}") From cb20cd1a58f992dfdf02532285a00c990e89e207 Mon Sep 17 00:00:00 2001 From: Bob Gregor Date: Fri, 5 Dec 2025 19:00:38 -0500 Subject: [PATCH 3/3] Update components/runners/runner-shell/runner_shell/core/transport_ws.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- .../runners/runner-shell/runner_shell/core/transport_ws.py | 1 + 1 file changed, 1 insertion(+) diff --git a/components/runners/runner-shell/runner_shell/core/transport_ws.py b/components/runners/runner-shell/runner_shell/core/transport_ws.py index aae26ebfc..e22d73c7a 100644 --- a/components/runners/runner-shell/runner_shell/core/transport_ws.py +++ b/components/runners/runner-shell/runner_shell/core/transport_ws.py @@ -39,6 +39,7 @@ async def connect(self): try: with open(token_path, "r", encoding="utf-8") as f: token = f.read().strip() + logger.info(f"Read token from {token_path}") except Exception as e: logger.warning(f"Failed to read token from {token_path}: {e}") if not token: