From 65178f193fd314ae5fcf92b63cc705f4991070ac Mon Sep 17 00:00:00 2001 From: Ammar Date: Tue, 28 Oct 2025 17:35:03 +0000 Subject: [PATCH 01/10] =?UTF-8?q?=F0=9F=A4=96=20Encourage=20setting=20fina?= =?UTF-8?q?l=20status=20before=20completing=20response?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Updated status_set tool description to remind agents to set a final completion status before finishing their response. Examples added: '✅ Complete', '🎉 Done', '✓ Finished' This helps users understand when the agent has completed all work and isn't just waiting or still processing. Generated with `cmux` --- src/utils/tools/toolDefinitions.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/src/utils/tools/toolDefinitions.ts b/src/utils/tools/toolDefinitions.ts index 5906cfeded..49967dc628 100644 --- a/src/utils/tools/toolDefinitions.ts +++ b/src/utils/tools/toolDefinitions.ts @@ -186,6 +186,7 @@ export const TOOL_DEFINITIONS = { "Set a status indicator to show what the agent is currently doing. " + "The emoji appears left of the streaming indicator, and the message shows on hover. " + "IMPORTANT: Always set a status at the start of each response and update it as your work progresses. " + + "Set a final status before finishing your response (e.g., '✅ Complete', '🎉 Done', '✓ Finished'). " + "The status is cleared when a new stream starts, so you must set it again for each response. " + "Use this to communicate ongoing activities (e.g., '🔍 Analyzing code', '📝 Writing tests', '🔧 Refactoring logic').", schema: z From f802ab7feb205c54c09b6eafe956a6ee2386a6c5 Mon Sep 17 00:00:00 2001 From: Ammar Date: Tue, 28 Oct 2025 17:36:21 +0000 Subject: [PATCH 02/10] =?UTF-8?q?=F0=9F=A4=96=20Replace=20'stream'=20with?= =?UTF-8?q?=20'response'=20in=20tool=20description?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Models don't understand 'stream' terminology. Changed to: 'The status is cleared at the start of each new response, so you must set it again.' More concise and uses terminology models understand. Generated with `cmux` --- src/utils/tools/toolDefinitions.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/utils/tools/toolDefinitions.ts b/src/utils/tools/toolDefinitions.ts index 49967dc628..b76cfa0dba 100644 --- a/src/utils/tools/toolDefinitions.ts +++ b/src/utils/tools/toolDefinitions.ts @@ -187,7 +187,7 @@ export const TOOL_DEFINITIONS = { "The emoji appears left of the streaming indicator, and the message shows on hover. " + "IMPORTANT: Always set a status at the start of each response and update it as your work progresses. " + "Set a final status before finishing your response (e.g., '✅ Complete', '🎉 Done', '✓ Finished'). " + - "The status is cleared when a new stream starts, so you must set it again for each response. " + + "The status is cleared at the start of each new response, so you must set it again. " + "Use this to communicate ongoing activities (e.g., '🔍 Analyzing code', '📝 Writing tests', '🔧 Refactoring logic').", schema: z .object({ From 9f65d01b80a7f5f88d4561ff3ce3ebfd9cf1b5bd Mon Sep 17 00:00:00 2001 From: Ammar Date: Tue, 28 Oct 2025 17:45:03 +0000 Subject: [PATCH 03/10] =?UTF-8?q?=F0=9F=A4=96=20fix:=20Show=20'failed'=20s?= =?UTF-8?q?tatus=20when=20status=5Fset=20validation=20fails?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Problem When status_set validation failed (e.g., invalid emoji), the tool showed 'completed' status in the UI even though it failed. This made users think validation was silently failing, especially when the status didn't appear in the sidebar. ## Root Cause Tool status determination only checked part.state === 'output-available', which is true even for failed results. It didn't check result.success. ## Solution 1. Enhanced status determination to check result.success for tools that return { success: boolean } pattern 2. Show 'failed' status when result.success === false 3. Display error message in StatusSetToolCall UI for failed validations ## Testing - Added unit tests for failed/completed status display - All 839 tests pass - Typecheck passes When validation fails now: - Tool shows 'failed' status (not 'completed') - Error message displays in UI - agentStatus NOT updated (correct existing behavior) - Clear feedback to user that validation failed --- src/components/tools/StatusSetToolCall.tsx | 9 +- .../StreamingMessageAggregator.status.test.ts | 120 ++++++++++++++++++ .../messages/StreamingMessageAggregator.ts | 26 ++-- 3 files changed, 146 insertions(+), 9 deletions(-) diff --git a/src/components/tools/StatusSetToolCall.tsx b/src/components/tools/StatusSetToolCall.tsx index 4c313fa5c8..803362a58e 100644 --- a/src/components/tools/StatusSetToolCall.tsx +++ b/src/components/tools/StatusSetToolCall.tsx @@ -12,11 +12,17 @@ interface StatusSetToolCallProps { export const StatusSetToolCall: React.FC = ({ args, - result: _result, + result, status = "pending", }) => { const statusDisplay = getStatusDisplay(status); + // Show error message if validation failed + const errorMessage = + status === "failed" && result && typeof result === "object" && "error" in result + ? String(result.error) + : undefined; + return ( @@ -25,6 +31,7 @@ export const StatusSetToolCall: React.FC = ({ status_set {args.message} + {errorMessage && ({errorMessage})} {statusDisplay} diff --git a/src/utils/messages/StreamingMessageAggregator.status.test.ts b/src/utils/messages/StreamingMessageAggregator.status.test.ts index fac616c4aa..57159ae2b5 100644 --- a/src/utils/messages/StreamingMessageAggregator.status.test.ts +++ b/src/utils/messages/StreamingMessageAggregator.status.test.ts @@ -257,4 +257,124 @@ describe("StreamingMessageAggregator - Agent Status", () => { // Status should be cleared on new stream start expect(aggregator.getAgentStatus()).toBeUndefined(); }); + + it("should show 'failed' status in UI when status_set validation fails", () => { + const aggregator = new StreamingMessageAggregator("2024-01-01T00:00:00.000Z"); + const messageId = "msg1"; + + // Start a stream + aggregator.handleStreamStart({ + type: "stream-start", + workspaceId: "workspace1", + messageId, + model: "test-model", + historySequence: 1, + }); + + // Add a status_set tool call with invalid emoji + aggregator.handleToolCallStart({ + type: "tool-call-start", + workspaceId: "workspace1", + messageId, + toolCallId: "tool1", + toolName: "status_set", + args: { emoji: "not-an-emoji", message: "test" }, + tokens: 10, + timestamp: Date.now(), + }); + + // Complete with validation failure + aggregator.handleToolCallEnd({ + type: "tool-call-end", + workspaceId: "workspace1", + messageId, + toolCallId: "tool1", + toolName: "status_set", + result: { success: false, error: "emoji must be a single emoji character" }, + }); + + // End the stream to finalize message + aggregator.handleStreamEnd({ + type: "stream-end", + workspaceId: "workspace1", + messageId, + metadata: { model: "test-model" }, + parts: [], + }); + + // Check that the tool message shows 'failed' status in the UI + const displayedMessages = aggregator.getDisplayedMessages(); + const toolMessage = displayedMessages.find((m) => m.type === "tool"); + expect(toolMessage).toBeDefined(); + expect(toolMessage?.type).toBe("tool"); + if (toolMessage?.type === "tool") { + expect(toolMessage.status).toBe("failed"); + expect(toolMessage.toolName).toBe("status_set"); + } + + // And status should NOT be updated in aggregator + expect(aggregator.getAgentStatus()).toBeUndefined(); + }); + + it("should show 'completed' status in UI when status_set validation succeeds", () => { + const aggregator = new StreamingMessageAggregator("2024-01-01T00:00:00.000Z"); + const messageId = "msg1"; + + // Start a stream + aggregator.handleStreamStart({ + type: "stream-start", + workspaceId: "workspace1", + messageId, + model: "test-model", + historySequence: 1, + }); + + // Add a successful status_set tool call + aggregator.handleToolCallStart({ + type: "tool-call-start", + workspaceId: "workspace1", + messageId, + toolCallId: "tool1", + toolName: "status_set", + args: { emoji: "🔍", message: "Analyzing code" }, + tokens: 10, + timestamp: Date.now(), + }); + + // Complete successfully + aggregator.handleToolCallEnd({ + type: "tool-call-end", + workspaceId: "workspace1", + messageId, + toolCallId: "tool1", + toolName: "status_set", + result: { success: true, emoji: "🔍", message: "Analyzing code" }, + }); + + // End the stream to finalize message + aggregator.handleStreamEnd({ + type: "stream-end", + workspaceId: "workspace1", + messageId, + metadata: { model: "test-model" }, + parts: [], + }); + + // Check that the tool message shows 'completed' status in the UI + const displayedMessages = aggregator.getDisplayedMessages(); + const toolMessage = displayedMessages.find((m) => m.type === "tool"); + expect(toolMessage).toBeDefined(); + expect(toolMessage?.type).toBe("tool"); + if (toolMessage?.type === "tool") { + expect(toolMessage.status).toBe("completed"); + expect(toolMessage.toolName).toBe("status_set"); + } + + // And status SHOULD be updated in aggregator + const status = aggregator.getAgentStatus(); + expect(status).toBeDefined(); + expect(status?.emoji).toBe("🔍"); + expect(status?.message).toBe("Analyzing code"); + }); + }); diff --git a/src/utils/messages/StreamingMessageAggregator.ts b/src/utils/messages/StreamingMessageAggregator.ts index a235c2d111..e264571f87 100644 --- a/src/utils/messages/StreamingMessageAggregator.ts +++ b/src/utils/messages/StreamingMessageAggregator.ts @@ -769,14 +769,24 @@ export class StreamingMessageAggregator { timestamp: part.timestamp ?? baseTimestamp, }); } else if (isDynamicToolPart(part)) { - const status = - part.state === "output-available" - ? "completed" - : part.state === "input-available" && message.metadata?.partial - ? "interrupted" - : part.state === "input-available" - ? "executing" - : "pending"; + // Determine status based on part state and result + let status: "pending" | "executing" | "completed" | "failed" | "interrupted"; + if (part.state === "output-available") { + // Check if result indicates failure (for tools that return { success: boolean }) + const output = part.output as unknown; + const isFailed = + typeof output === "object" && + output !== null && + "success" in output && + output.success === false; + status = isFailed ? "failed" : "completed"; + } else if (part.state === "input-available" && message.metadata?.partial) { + status = "interrupted"; + } else if (part.state === "input-available") { + status = "executing"; + } else { + status = "pending"; + } displayedMessages.push({ type: "tool", From cf1603bdf600b8850ad1a1ad087de1d5805e094d Mon Sep 17 00:00:00 2001 From: Ammar Date: Tue, 28 Oct 2025 17:48:22 +0000 Subject: [PATCH 04/10] =?UTF-8?q?=F0=9F=A4=96=20fix:=20Remove=20unnecessar?= =?UTF-8?q?y=20type=20assertion?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ESLint flagged 'as unknown' as unnecessary since part.output is already typed as unknown. Simplified to use part.output directly. --- .../messages/StreamingMessageAggregator.status.test.ts | 1 - src/utils/messages/StreamingMessageAggregator.ts | 9 ++++----- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/src/utils/messages/StreamingMessageAggregator.status.test.ts b/src/utils/messages/StreamingMessageAggregator.status.test.ts index 57159ae2b5..924882234e 100644 --- a/src/utils/messages/StreamingMessageAggregator.status.test.ts +++ b/src/utils/messages/StreamingMessageAggregator.status.test.ts @@ -376,5 +376,4 @@ describe("StreamingMessageAggregator - Agent Status", () => { expect(status?.emoji).toBe("🔍"); expect(status?.message).toBe("Analyzing code"); }); - }); diff --git a/src/utils/messages/StreamingMessageAggregator.ts b/src/utils/messages/StreamingMessageAggregator.ts index e264571f87..5ea41822e8 100644 --- a/src/utils/messages/StreamingMessageAggregator.ts +++ b/src/utils/messages/StreamingMessageAggregator.ts @@ -773,12 +773,11 @@ export class StreamingMessageAggregator { let status: "pending" | "executing" | "completed" | "failed" | "interrupted"; if (part.state === "output-available") { // Check if result indicates failure (for tools that return { success: boolean }) - const output = part.output as unknown; const isFailed = - typeof output === "object" && - output !== null && - "success" in output && - output.success === false; + typeof part.output === "object" && + part.output !== null && + "success" in part.output && + part.output.success === false; status = isFailed ? "failed" : "completed"; } else if (part.state === "input-available" && message.metadata?.partial) { status = "interrupted"; From eaec61ff328eb08665ceabcbb97752504ffc6fe8 Mon Sep 17 00:00:00 2001 From: Ammar Date: Tue, 28 Oct 2025 17:54:35 +0000 Subject: [PATCH 05/10] =?UTF-8?q?=F0=9F=A4=96=20refactor:=20Extract=20comm?= =?UTF-8?q?on=20result=20check=20helpers?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extracted hasSuccessResult() and hasFailureResult() helpers to reduce duplication. Both todo_write and status_set use the same pattern to check result.success, and the display logic now uses the same helper. This makes the code more maintainable and consistent. --- .../messages/StreamingMessageAggregator.ts | 48 +++++++++++-------- 1 file changed, 28 insertions(+), 20 deletions(-) diff --git a/src/utils/messages/StreamingMessageAggregator.ts b/src/utils/messages/StreamingMessageAggregator.ts index 5ea41822e8..71564c6b48 100644 --- a/src/utils/messages/StreamingMessageAggregator.ts +++ b/src/utils/messages/StreamingMessageAggregator.ts @@ -35,6 +35,31 @@ interface StreamingContext { model: string; } +/** + * Check if a tool result indicates success (for tools that return { success: boolean }) + */ +function hasSuccessResult(result: unknown): boolean { + return ( + typeof result === "object" && + result !== null && + "success" in result && + result.success === true + ); +} + +/** + * Check if a tool result indicates failure (for tools that return { success: boolean }) + */ +function hasFailureResult(result: unknown): boolean { + return ( + typeof result === "object" && + result !== null && + "success" in result && + result.success === false + ); +} + + export class StreamingMessageAggregator { private messages = new Map(); private activeStreams = new Map(); @@ -488,13 +513,7 @@ export class StreamingMessageAggregator { (toolPart as DynamicToolPartAvailable).output = data.result; // Update TODO state if this was a successful todo_write - if ( - data.toolName === "todo_write" && - typeof data.result === "object" && - data.result !== null && - "success" in data.result && - data.result.success - ) { + if (data.toolName === "todo_write" && hasSuccessResult(data.result)) { const args = toolPart.input as { todos: TodoItem[] }; // Only update if todos actually changed (prevents flickering from reference changes) if (!this.todosEqual(this.currentTodos, args.todos)) { @@ -503,13 +522,7 @@ export class StreamingMessageAggregator { } // Update agent status if this was a successful status_set - if ( - data.toolName === "status_set" && - typeof data.result === "object" && - data.result !== null && - "success" in data.result && - data.result.success - ) { + if (data.toolName === "status_set" && hasSuccessResult(data.result)) { const args = toolPart.input as { emoji: string; message: string }; this.agentStatus = { emoji: args.emoji, message: args.message }; } @@ -773,12 +786,7 @@ export class StreamingMessageAggregator { let status: "pending" | "executing" | "completed" | "failed" | "interrupted"; if (part.state === "output-available") { // Check if result indicates failure (for tools that return { success: boolean }) - const isFailed = - typeof part.output === "object" && - part.output !== null && - "success" in part.output && - part.output.success === false; - status = isFailed ? "failed" : "completed"; + status = hasFailureResult(part.output) ? "failed" : "completed"; } else if (part.state === "input-available" && message.metadata?.partial) { status = "interrupted"; } else if (part.state === "input-available") { From b9e7228b65e7d5d473ad37f6c65d1b76b047dce3 Mon Sep 17 00:00:00 2001 From: Ammar Date: Tue, 28 Oct 2025 17:57:58 +0000 Subject: [PATCH 06/10] =?UTF-8?q?=F0=9F=A4=96=20refactor:=20Show=20outcome?= =?UTF-8?q?=20variety=20in=20final=20status=20examples?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Updated final status examples to reflect different outcomes: - ✅ Success: 'PR checks pass and ready to merge' - ❌ Failure: 'CreateWorkspace Tests failed' - ⚠️ Warning/blocker: 'Encountered serious issue with design' This encourages agents to communicate the actual outcome, not just that work is complete. --- src/utils/tools/toolDefinitions.ts | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/utils/tools/toolDefinitions.ts b/src/utils/tools/toolDefinitions.ts index b76cfa0dba..24304c2fab 100644 --- a/src/utils/tools/toolDefinitions.ts +++ b/src/utils/tools/toolDefinitions.ts @@ -186,7 +186,10 @@ export const TOOL_DEFINITIONS = { "Set a status indicator to show what the agent is currently doing. " + "The emoji appears left of the streaming indicator, and the message shows on hover. " + "IMPORTANT: Always set a status at the start of each response and update it as your work progresses. " + - "Set a final status before finishing your response (e.g., '✅ Complete', '🎉 Done', '✓ Finished'). " + + "Set a final status before finishing your response that reflects the outcome: " + + "'✅ PR checks pass and ready to merge' (success), " + + "'❌ CreateWorkspace Tests failed' (failure), " + + "'⚠️ Encountered serious issue with design' (warning/blocker). " + "The status is cleared at the start of each new response, so you must set it again. " + "Use this to communicate ongoing activities (e.g., '🔍 Analyzing code', '📝 Writing tests', '🔧 Refactoring logic').", schema: z From 56ccbf60dde8d1a3902f89adf30901a05d7da532 Mon Sep 17 00:00:00 2001 From: Ammar Date: Tue, 28 Oct 2025 18:00:07 +0000 Subject: [PATCH 07/10] =?UTF-8?q?=F0=9F=A4=96=20fix:=20Run=20prettier=20on?= =?UTF-8?q?=20StreamingMessageAggregator.ts?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/utils/messages/StreamingMessageAggregator.ts | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/src/utils/messages/StreamingMessageAggregator.ts b/src/utils/messages/StreamingMessageAggregator.ts index 71564c6b48..627d7c11f4 100644 --- a/src/utils/messages/StreamingMessageAggregator.ts +++ b/src/utils/messages/StreamingMessageAggregator.ts @@ -40,10 +40,7 @@ interface StreamingContext { */ function hasSuccessResult(result: unknown): boolean { return ( - typeof result === "object" && - result !== null && - "success" in result && - result.success === true + typeof result === "object" && result !== null && "success" in result && result.success === true ); } @@ -52,14 +49,10 @@ function hasSuccessResult(result: unknown): boolean { */ function hasFailureResult(result: unknown): boolean { return ( - typeof result === "object" && - result !== null && - "success" in result && - result.success === false + typeof result === "object" && result !== null && "success" in result && result.success === false ); } - export class StreamingMessageAggregator { private messages = new Map(); private activeStreams = new Map(); From 01b72d5b62583ebce7166fceb7ebb14b99e7f820 Mon Sep 17 00:00:00 2001 From: Ammar Date: Tue, 28 Oct 2025 18:16:52 +0000 Subject: [PATCH 08/10] =?UTF-8?q?=F0=9F=A4=96=20refactor:=20Shared=20tool?= =?UTF-8?q?=20result=20processing=20for=20live=20&=20historical?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Problem Status (and TODOs) weren't persisting across page reloads/workspace switches because loadHistoricalMessages() didn't process tool results to reconstruct derived state. ## Solution Extracted processToolResult() as single source of truth for updating derived state from tool results. Called from: 1. handleToolCallEnd() - live streaming events 2. loadHistoricalMessages() - historical message loading This ensures agentStatus and currentTodos are reconstructed uniformly whether processing live events or historical snapshots. ## Implementation - Extracted 23-line processToolResult() method - Updated handleToolCallEnd() to call shared method (-12 LoC) - Updated loadHistoricalMessages() to process tool parts (+6 LoC) - Added 3 tests for historical message reconstruction ## Benefits - Single source of truth for tool result processing - No special reconstruction logic needed - Easier to extend with new derived state - Leverages existing tool part architecture ## Testing - Added 3 new tests for historical message scenarios - All 842 tests pass - Typecheck and lint pass Net: ~17 LoC --- .../StreamingMessageAggregator.status.test.ts | 118 ++++++++++++++++++ .../messages/StreamingMessageAggregator.ts | 52 +++++--- 2 files changed, 156 insertions(+), 14 deletions(-) diff --git a/src/utils/messages/StreamingMessageAggregator.status.test.ts b/src/utils/messages/StreamingMessageAggregator.status.test.ts index 924882234e..8f655d4b7d 100644 --- a/src/utils/messages/StreamingMessageAggregator.status.test.ts +++ b/src/utils/messages/StreamingMessageAggregator.status.test.ts @@ -376,4 +376,122 @@ describe("StreamingMessageAggregator - Agent Status", () => { expect(status?.emoji).toBe("🔍"); expect(status?.message).toBe("Analyzing code"); }); + + it("should reconstruct agentStatus when loading historical messages", () => { + const aggregator = new StreamingMessageAggregator("2024-01-01T00:00:00.000Z"); + + // Create historical messages with a completed status_set tool call + const historicalMessages = [ + { + id: "msg1", + role: "user" as const, + parts: [{ type: "text" as const, text: "Hello" }], + metadata: { timestamp: Date.now(), historySequence: 1 }, + }, + { + id: "msg2", + role: "assistant" as const, + parts: [ + { type: "text" as const, text: "Working on it..." }, + { + type: "dynamic-tool" as const, + toolCallId: "tool1", + toolName: "status_set", + state: "output-available" as const, + input: { emoji: "🔍", message: "Analyzing code" }, + output: { success: true, emoji: "🔍", message: "Analyzing code" }, + timestamp: Date.now(), + }, + ], + metadata: { timestamp: Date.now(), historySequence: 2 }, + }, + ]; + + // Load historical messages + aggregator.loadHistoricalMessages(historicalMessages); + + // Status should be reconstructed from the historical tool call + const status = aggregator.getAgentStatus(); + expect(status).toBeDefined(); + expect(status?.emoji).toBe("🔍"); + expect(status?.message).toBe("Analyzing code"); + }); + + it("should use most recent status_set when loading multiple historical messages", () => { + const aggregator = new StreamingMessageAggregator("2024-01-01T00:00:00.000Z"); + + // Create historical messages with multiple status_set calls + const historicalMessages = [ + { + id: "msg1", + role: "assistant" as const, + parts: [ + { + type: "dynamic-tool" as const, + toolCallId: "tool1", + toolName: "status_set", + state: "output-available" as const, + input: { emoji: "🔍", message: "First status" }, + output: { success: true, emoji: "🔍", message: "First status" }, + timestamp: Date.now(), + }, + ], + metadata: { timestamp: Date.now(), historySequence: 1 }, + }, + { + id: "msg2", + role: "assistant" as const, + parts: [ + { + type: "dynamic-tool" as const, + toolCallId: "tool2", + toolName: "status_set", + state: "output-available" as const, + input: { emoji: "📝", message: "Second status" }, + output: { success: true, emoji: "📝", message: "Second status" }, + timestamp: Date.now(), + }, + ], + metadata: { timestamp: Date.now(), historySequence: 2 }, + }, + ]; + + // Load historical messages + aggregator.loadHistoricalMessages(historicalMessages); + + // Should use the most recent (last processed) status + const status = aggregator.getAgentStatus(); + expect(status?.emoji).toBe("📝"); + expect(status?.message).toBe("Second status"); + }); + + it("should not reconstruct status from failed status_set in historical messages", () => { + const aggregator = new StreamingMessageAggregator("2024-01-01T00:00:00.000Z"); + + // Create historical message with failed status_set + const historicalMessages = [ + { + id: "msg1", + role: "assistant" as const, + parts: [ + { + type: "dynamic-tool" as const, + toolCallId: "tool1", + toolName: "status_set", + state: "output-available" as const, + input: { emoji: "not-emoji", message: "test" }, + output: { success: false, error: "emoji must be a single emoji character" }, + timestamp: Date.now(), + }, + ], + metadata: { timestamp: Date.now(), historySequence: 1 }, + }, + ]; + + // Load historical messages + aggregator.loadHistoricalMessages(historicalMessages); + + // Status should remain undefined (failed validation) + expect(aggregator.getAgentStatus()).toBeUndefined(); + }); }); diff --git a/src/utils/messages/StreamingMessageAggregator.ts b/src/utils/messages/StreamingMessageAggregator.ts index 627d7c11f4..24e5b3fc74 100644 --- a/src/utils/messages/StreamingMessageAggregator.ts +++ b/src/utils/messages/StreamingMessageAggregator.ts @@ -199,7 +199,18 @@ export class StreamingMessageAggregator { loadHistoricalMessages(messages: CmuxMessage[]): void { for (const message of messages) { this.messages.set(message.id, message); + + // Process completed tool calls to reconstruct derived state (todos, agentStatus) + // This ensures state persists across page reloads and workspace switches + if (message.role === "assistant") { + for (const part of message.parts) { + if (isDynamicToolPart(part) && part.state === "output-available") { + this.processToolResult(part.toolName, part.input, part.output); + } + } + } } + this.invalidateCache(); } @@ -491,6 +502,31 @@ export class StreamingMessageAggregator { // Tool deltas are for display - args are in dynamic-tool part } + + /** + * Process a completed tool call's result to update derived state. + * Called for both live tool-call-end events and historical tool parts. + * + * This is the single source of truth for updating state from tool results, + * ensuring consistency whether processing live events or historical messages. + */ + private processToolResult(toolName: string, input: unknown, output: unknown): void { + // Update TODO state if this was a successful todo_write + if (toolName === "todo_write" && hasSuccessResult(output)) { + const args = input as { todos: TodoItem[] }; + // Only update if todos actually changed (prevents flickering from reference changes) + if (!this.todosEqual(this.currentTodos, args.todos)) { + this.currentTodos = args.todos; + } + } + + // Update agent status if this was a successful status_set + if (toolName === "status_set" && hasSuccessResult(output)) { + const args = input as { emoji: string; message: string }; + this.agentStatus = { emoji: args.emoji, message: args.message }; + } + } + handleToolCallEnd(data: ToolCallEndEvent): void { const message = this.messages.get(data.messageId); if (message) { @@ -505,20 +541,8 @@ export class StreamingMessageAggregator { (toolPart as DynamicToolPartAvailable).state = "output-available"; (toolPart as DynamicToolPartAvailable).output = data.result; - // Update TODO state if this was a successful todo_write - if (data.toolName === "todo_write" && hasSuccessResult(data.result)) { - const args = toolPart.input as { todos: TodoItem[] }; - // Only update if todos actually changed (prevents flickering from reference changes) - if (!this.todosEqual(this.currentTodos, args.todos)) { - this.currentTodos = args.todos; - } - } - - // Update agent status if this was a successful status_set - if (data.toolName === "status_set" && hasSuccessResult(data.result)) { - const args = toolPart.input as { emoji: string; message: string }; - this.agentStatus = { emoji: args.emoji, message: args.message }; - } + // Process tool result to update derived state (todos, agentStatus, etc.) + this.processToolResult(data.toolName, toolPart.input, data.result); } this.invalidateCache(); } From 9eef8aee4d23028bd6583b9297058e99f55ac42c Mon Sep 17 00:00:00 2001 From: Ammar Date: Tue, 28 Oct 2025 18:19:07 +0000 Subject: [PATCH 09/10] =?UTF-8?q?=F0=9F=A4=96=20fix:=20Run=20prettier=20on?= =?UTF-8?q?=20StreamingMessageAggregator.ts?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/utils/messages/StreamingMessageAggregator.ts | 1 - 1 file changed, 1 deletion(-) diff --git a/src/utils/messages/StreamingMessageAggregator.ts b/src/utils/messages/StreamingMessageAggregator.ts index 24e5b3fc74..da16048a42 100644 --- a/src/utils/messages/StreamingMessageAggregator.ts +++ b/src/utils/messages/StreamingMessageAggregator.ts @@ -502,7 +502,6 @@ export class StreamingMessageAggregator { // Tool deltas are for display - args are in dynamic-tool part } - /** * Process a completed tool call's result to update derived state. * Called for both live tool-call-end events and historical tool parts. From b1093294da09ab14a0c83a715fbac1e24af8d2b6 Mon Sep 17 00:00:00 2001 From: Ammar Date: Tue, 28 Oct 2025 18:25:44 +0000 Subject: [PATCH 10/10] =?UTF-8?q?=F0=9F=A4=96=20refactor:=20Clear=20TODO/s?= =?UTF-8?q?tatus=20on=20user=20message,=20not=20stream=20events?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Problem Previous implementation cleared state at different times: - TODOs: Cleared on stream-end (via cleanupStreamState) - Status: Cleared on stream-start This created historical/live disparity because stream events aren't persisted in chat.jsonl. After page reload, TODOs would stick around (desirable!) but the implementation was inconsistent. ## Solution Clear BOTH todos and agentStatus when new user message arrives. Why user messages? - ✅ Persisted in chat.jsonl (unlike stream events) - ✅ Consistent live/historical behavior - ✅ Semantic: New question = new task = clear previous state ## Changes 1. Removed TODO clearing from cleanupStreamState() 2. Removed status clearing from handleStreamStart() 3. Added centralized clearing in handleMessage() when user message arrives 4. Updated test: 'stream-start' → 'new user message' ## Benefits - Consistent behavior whether loading from history or processing live - TODOs persist until next question (user preference!) - Simpler: One place to clear state, not scattered across handlers - Architecture: Relies on persisted data, not ephemeral events ## Testing - All 842 tests pass - Updated test reflects new clearing behavior - Typecheck and lint pass --- .../StreamingMessageAggregator.status.test.ts | 20 ++++++++--------- .../messages/StreamingMessageAggregator.ts | 22 ++++++++++++------- 2 files changed, 24 insertions(+), 18 deletions(-) diff --git a/src/utils/messages/StreamingMessageAggregator.status.test.ts b/src/utils/messages/StreamingMessageAggregator.status.test.ts index 8f655d4b7d..ad1617688a 100644 --- a/src/utils/messages/StreamingMessageAggregator.status.test.ts +++ b/src/utils/messages/StreamingMessageAggregator.status.test.ts @@ -199,7 +199,7 @@ describe("StreamingMessageAggregator - Agent Status", () => { expect(aggregator.getAgentStatus()).toBeUndefined(); }); - it("should clear agent status on stream-start (different from TODO behavior)", () => { + it("should clear agent status when new user message arrives", () => { const aggregator = new StreamingMessageAggregator("2024-01-01T00:00:00.000Z"); // Start first stream and set status @@ -245,16 +245,16 @@ describe("StreamingMessageAggregator - Agent Status", () => { // Status persists after stream ends expect(aggregator.getAgentStatus()?.message).toBe("First task"); - // Start a NEW stream - status should be cleared - aggregator.handleStreamStart({ - type: "stream-start", - workspaceId: "workspace1", - messageId: "msg2", - model: "test-model", - historySequence: 2, - }); + // User sends a NEW message - status should be cleared + const newUserMessage = { + id: "msg2", + role: "user" as const, + parts: [{ type: "text" as const, text: "What's next?" }], + metadata: { timestamp: Date.now(), historySequence: 2 }, + }; + aggregator.handleMessage(newUserMessage); - // Status should be cleared on new stream start + // Status should be cleared on new user message expect(aggregator.getAgentStatus()).toBeUndefined(); }); diff --git a/src/utils/messages/StreamingMessageAggregator.ts b/src/utils/messages/StreamingMessageAggregator.ts index da16048a42..1af90a9ac1 100644 --- a/src/utils/messages/StreamingMessageAggregator.ts +++ b/src/utils/messages/StreamingMessageAggregator.ts @@ -167,11 +167,12 @@ export class StreamingMessageAggregator { /** * Clean up stream-scoped state when stream ends (normally or abnormally). * Called by handleStreamEnd, handleStreamAbort, and handleStreamError. + * + * NOTE: Does NOT clear todos or agentStatus - those are cleared when a new + * user message arrives (see handleMessage), ensuring consistent behavior + * whether loading from history or processing live events. */ private cleanupStreamState(messageId: string): void { - this.currentTodos = []; - // NOTE: agentStatus is NOT cleared here - it persists after stream completion - // to show the last activity. This is different from todos which are stream-scoped. this.activeStreams.delete(messageId); } @@ -311,10 +312,9 @@ export class StreamingMessageAggregator { // Clear pending stream start timestamp - stream has started this.setPendingStreamStartTime(null); - // Clear agent status on stream start (unlike todos which persist across streams). - // Rationale: Status represents current activity, so it should be cleared and reset - // for each new stream. Todos represent pending work, so they persist until completion. - this.agentStatus = undefined; + // NOTE: We do NOT clear agentStatus or currentTodos here. + // They are cleared when a new user message arrives (see handleMessage), + // ensuring consistent behavior whether loading from history or processing live events. // Detect if this stream is compacting by checking if last user message is a compaction-request const messages = this.getAllMessages(); @@ -649,8 +649,14 @@ export class StreamingMessageAggregator { // Now add the new message this.addMessage(incomingMessage); - // If this is a user message, record timestamp for pending stream detection + // If this is a user message, clear derived state and record timestamp if (incomingMessage.role === "user") { + // Clear derived state (todos, agentStatus) for new conversation turn + // This ensures consistent behavior whether loading from history or processing live events + // since stream-start/stream-end events are not persisted in chat.jsonl + this.currentTodos = []; + this.agentStatus = undefined; + this.setPendingStreamStartTime(Date.now()); } }