diff --git a/src/components/tools/StatusSetToolCall.tsx b/src/components/tools/StatusSetToolCall.tsx index 4c313fa5c8..803362a58e 100644 --- a/src/components/tools/StatusSetToolCall.tsx +++ b/src/components/tools/StatusSetToolCall.tsx @@ -12,11 +12,17 @@ interface StatusSetToolCallProps { export const StatusSetToolCall: React.FC = ({ args, - result: _result, + result, status = "pending", }) => { const statusDisplay = getStatusDisplay(status); + // Show error message if validation failed + const errorMessage = + status === "failed" && result && typeof result === "object" && "error" in result + ? String(result.error) + : undefined; + return ( @@ -25,6 +31,7 @@ export const StatusSetToolCall: React.FC = ({ status_set {args.message} + {errorMessage && ({errorMessage})} {statusDisplay} diff --git a/src/utils/messages/StreamingMessageAggregator.status.test.ts b/src/utils/messages/StreamingMessageAggregator.status.test.ts index fac616c4aa..ad1617688a 100644 --- a/src/utils/messages/StreamingMessageAggregator.status.test.ts +++ b/src/utils/messages/StreamingMessageAggregator.status.test.ts @@ -199,7 +199,7 @@ describe("StreamingMessageAggregator - Agent Status", () => { expect(aggregator.getAgentStatus()).toBeUndefined(); }); - it("should clear agent status on stream-start (different from TODO behavior)", () => { + it("should clear agent status when new user message arrives", () => { const aggregator = new StreamingMessageAggregator("2024-01-01T00:00:00.000Z"); // Start first stream and set status @@ -245,16 +245,253 @@ describe("StreamingMessageAggregator - Agent Status", () => { // Status persists after stream ends expect(aggregator.getAgentStatus()?.message).toBe("First task"); - // Start a NEW stream - status should be cleared + // User sends a NEW message - status should be cleared + const newUserMessage = { + id: "msg2", + role: "user" as const, + parts: [{ type: "text" as const, text: "What's next?" }], + metadata: { timestamp: Date.now(), historySequence: 2 }, + }; + aggregator.handleMessage(newUserMessage); + + // Status should be cleared on new user message + expect(aggregator.getAgentStatus()).toBeUndefined(); + }); + + it("should show 'failed' status in UI when status_set validation fails", () => { + const aggregator = new StreamingMessageAggregator("2024-01-01T00:00:00.000Z"); + const messageId = "msg1"; + + // Start a stream + aggregator.handleStreamStart({ + type: "stream-start", + workspaceId: "workspace1", + messageId, + model: "test-model", + historySequence: 1, + }); + + // Add a status_set tool call with invalid emoji + aggregator.handleToolCallStart({ + type: "tool-call-start", + workspaceId: "workspace1", + messageId, + toolCallId: "tool1", + toolName: "status_set", + args: { emoji: "not-an-emoji", message: "test" }, + tokens: 10, + timestamp: Date.now(), + }); + + // Complete with validation failure + aggregator.handleToolCallEnd({ + type: "tool-call-end", + workspaceId: "workspace1", + messageId, + toolCallId: "tool1", + toolName: "status_set", + result: { success: false, error: "emoji must be a single emoji character" }, + }); + + // End the stream to finalize message + aggregator.handleStreamEnd({ + type: "stream-end", + workspaceId: "workspace1", + messageId, + metadata: { model: "test-model" }, + parts: [], + }); + + // Check that the tool message shows 'failed' status in the UI + const displayedMessages = aggregator.getDisplayedMessages(); + const toolMessage = displayedMessages.find((m) => m.type === "tool"); + expect(toolMessage).toBeDefined(); + expect(toolMessage?.type).toBe("tool"); + if (toolMessage?.type === "tool") { + expect(toolMessage.status).toBe("failed"); + expect(toolMessage.toolName).toBe("status_set"); + } + + // And status should NOT be updated in aggregator + expect(aggregator.getAgentStatus()).toBeUndefined(); + }); + + it("should show 'completed' status in UI when status_set validation succeeds", () => { + const aggregator = new StreamingMessageAggregator("2024-01-01T00:00:00.000Z"); + const messageId = "msg1"; + + // Start a stream aggregator.handleStreamStart({ type: "stream-start", workspaceId: "workspace1", - messageId: "msg2", + messageId, model: "test-model", - historySequence: 2, + historySequence: 1, + }); + + // Add a successful status_set tool call + aggregator.handleToolCallStart({ + type: "tool-call-start", + workspaceId: "workspace1", + messageId, + toolCallId: "tool1", + toolName: "status_set", + args: { emoji: "🔍", message: "Analyzing code" }, + tokens: 10, + timestamp: Date.now(), + }); + + // Complete successfully + aggregator.handleToolCallEnd({ + type: "tool-call-end", + workspaceId: "workspace1", + messageId, + toolCallId: "tool1", + toolName: "status_set", + result: { success: true, emoji: "🔍", message: "Analyzing code" }, + }); + + // End the stream to finalize message + aggregator.handleStreamEnd({ + type: "stream-end", + workspaceId: "workspace1", + messageId, + metadata: { model: "test-model" }, + parts: [], }); - // Status should be cleared on new stream start + // Check that the tool message shows 'completed' status in the UI + const displayedMessages = aggregator.getDisplayedMessages(); + const toolMessage = displayedMessages.find((m) => m.type === "tool"); + expect(toolMessage).toBeDefined(); + expect(toolMessage?.type).toBe("tool"); + if (toolMessage?.type === "tool") { + expect(toolMessage.status).toBe("completed"); + expect(toolMessage.toolName).toBe("status_set"); + } + + // And status SHOULD be updated in aggregator + const status = aggregator.getAgentStatus(); + expect(status).toBeDefined(); + expect(status?.emoji).toBe("🔍"); + expect(status?.message).toBe("Analyzing code"); + }); + + it("should reconstruct agentStatus when loading historical messages", () => { + const aggregator = new StreamingMessageAggregator("2024-01-01T00:00:00.000Z"); + + // Create historical messages with a completed status_set tool call + const historicalMessages = [ + { + id: "msg1", + role: "user" as const, + parts: [{ type: "text" as const, text: "Hello" }], + metadata: { timestamp: Date.now(), historySequence: 1 }, + }, + { + id: "msg2", + role: "assistant" as const, + parts: [ + { type: "text" as const, text: "Working on it..." }, + { + type: "dynamic-tool" as const, + toolCallId: "tool1", + toolName: "status_set", + state: "output-available" as const, + input: { emoji: "🔍", message: "Analyzing code" }, + output: { success: true, emoji: "🔍", message: "Analyzing code" }, + timestamp: Date.now(), + }, + ], + metadata: { timestamp: Date.now(), historySequence: 2 }, + }, + ]; + + // Load historical messages + aggregator.loadHistoricalMessages(historicalMessages); + + // Status should be reconstructed from the historical tool call + const status = aggregator.getAgentStatus(); + expect(status).toBeDefined(); + expect(status?.emoji).toBe("🔍"); + expect(status?.message).toBe("Analyzing code"); + }); + + it("should use most recent status_set when loading multiple historical messages", () => { + const aggregator = new StreamingMessageAggregator("2024-01-01T00:00:00.000Z"); + + // Create historical messages with multiple status_set calls + const historicalMessages = [ + { + id: "msg1", + role: "assistant" as const, + parts: [ + { + type: "dynamic-tool" as const, + toolCallId: "tool1", + toolName: "status_set", + state: "output-available" as const, + input: { emoji: "🔍", message: "First status" }, + output: { success: true, emoji: "🔍", message: "First status" }, + timestamp: Date.now(), + }, + ], + metadata: { timestamp: Date.now(), historySequence: 1 }, + }, + { + id: "msg2", + role: "assistant" as const, + parts: [ + { + type: "dynamic-tool" as const, + toolCallId: "tool2", + toolName: "status_set", + state: "output-available" as const, + input: { emoji: "📝", message: "Second status" }, + output: { success: true, emoji: "📝", message: "Second status" }, + timestamp: Date.now(), + }, + ], + metadata: { timestamp: Date.now(), historySequence: 2 }, + }, + ]; + + // Load historical messages + aggregator.loadHistoricalMessages(historicalMessages); + + // Should use the most recent (last processed) status + const status = aggregator.getAgentStatus(); + expect(status?.emoji).toBe("📝"); + expect(status?.message).toBe("Second status"); + }); + + it("should not reconstruct status from failed status_set in historical messages", () => { + const aggregator = new StreamingMessageAggregator("2024-01-01T00:00:00.000Z"); + + // Create historical message with failed status_set + const historicalMessages = [ + { + id: "msg1", + role: "assistant" as const, + parts: [ + { + type: "dynamic-tool" as const, + toolCallId: "tool1", + toolName: "status_set", + state: "output-available" as const, + input: { emoji: "not-emoji", message: "test" }, + output: { success: false, error: "emoji must be a single emoji character" }, + timestamp: Date.now(), + }, + ], + metadata: { timestamp: Date.now(), historySequence: 1 }, + }, + ]; + + // Load historical messages + aggregator.loadHistoricalMessages(historicalMessages); + + // Status should remain undefined (failed validation) expect(aggregator.getAgentStatus()).toBeUndefined(); }); }); diff --git a/src/utils/messages/StreamingMessageAggregator.ts b/src/utils/messages/StreamingMessageAggregator.ts index a235c2d111..1af90a9ac1 100644 --- a/src/utils/messages/StreamingMessageAggregator.ts +++ b/src/utils/messages/StreamingMessageAggregator.ts @@ -35,6 +35,24 @@ interface StreamingContext { model: string; } +/** + * Check if a tool result indicates success (for tools that return { success: boolean }) + */ +function hasSuccessResult(result: unknown): boolean { + return ( + typeof result === "object" && result !== null && "success" in result && result.success === true + ); +} + +/** + * Check if a tool result indicates failure (for tools that return { success: boolean }) + */ +function hasFailureResult(result: unknown): boolean { + return ( + typeof result === "object" && result !== null && "success" in result && result.success === false + ); +} + export class StreamingMessageAggregator { private messages = new Map(); private activeStreams = new Map(); @@ -149,11 +167,12 @@ export class StreamingMessageAggregator { /** * Clean up stream-scoped state when stream ends (normally or abnormally). * Called by handleStreamEnd, handleStreamAbort, and handleStreamError. + * + * NOTE: Does NOT clear todos or agentStatus - those are cleared when a new + * user message arrives (see handleMessage), ensuring consistent behavior + * whether loading from history or processing live events. */ private cleanupStreamState(messageId: string): void { - this.currentTodos = []; - // NOTE: agentStatus is NOT cleared here - it persists after stream completion - // to show the last activity. This is different from todos which are stream-scoped. this.activeStreams.delete(messageId); } @@ -181,7 +200,18 @@ export class StreamingMessageAggregator { loadHistoricalMessages(messages: CmuxMessage[]): void { for (const message of messages) { this.messages.set(message.id, message); + + // Process completed tool calls to reconstruct derived state (todos, agentStatus) + // This ensures state persists across page reloads and workspace switches + if (message.role === "assistant") { + for (const part of message.parts) { + if (isDynamicToolPart(part) && part.state === "output-available") { + this.processToolResult(part.toolName, part.input, part.output); + } + } + } } + this.invalidateCache(); } @@ -282,10 +312,9 @@ export class StreamingMessageAggregator { // Clear pending stream start timestamp - stream has started this.setPendingStreamStartTime(null); - // Clear agent status on stream start (unlike todos which persist across streams). - // Rationale: Status represents current activity, so it should be cleared and reset - // for each new stream. Todos represent pending work, so they persist until completion. - this.agentStatus = undefined; + // NOTE: We do NOT clear agentStatus or currentTodos here. + // They are cleared when a new user message arrives (see handleMessage), + // ensuring consistent behavior whether loading from history or processing live events. // Detect if this stream is compacting by checking if last user message is a compaction-request const messages = this.getAllMessages(); @@ -473,6 +502,30 @@ export class StreamingMessageAggregator { // Tool deltas are for display - args are in dynamic-tool part } + /** + * Process a completed tool call's result to update derived state. + * Called for both live tool-call-end events and historical tool parts. + * + * This is the single source of truth for updating state from tool results, + * ensuring consistency whether processing live events or historical messages. + */ + private processToolResult(toolName: string, input: unknown, output: unknown): void { + // Update TODO state if this was a successful todo_write + if (toolName === "todo_write" && hasSuccessResult(output)) { + const args = input as { todos: TodoItem[] }; + // Only update if todos actually changed (prevents flickering from reference changes) + if (!this.todosEqual(this.currentTodos, args.todos)) { + this.currentTodos = args.todos; + } + } + + // Update agent status if this was a successful status_set + if (toolName === "status_set" && hasSuccessResult(output)) { + const args = input as { emoji: string; message: string }; + this.agentStatus = { emoji: args.emoji, message: args.message }; + } + } + handleToolCallEnd(data: ToolCallEndEvent): void { const message = this.messages.get(data.messageId); if (message) { @@ -487,32 +540,8 @@ export class StreamingMessageAggregator { (toolPart as DynamicToolPartAvailable).state = "output-available"; (toolPart as DynamicToolPartAvailable).output = data.result; - // Update TODO state if this was a successful todo_write - if ( - data.toolName === "todo_write" && - typeof data.result === "object" && - data.result !== null && - "success" in data.result && - data.result.success - ) { - const args = toolPart.input as { todos: TodoItem[] }; - // Only update if todos actually changed (prevents flickering from reference changes) - if (!this.todosEqual(this.currentTodos, args.todos)) { - this.currentTodos = args.todos; - } - } - - // Update agent status if this was a successful status_set - if ( - data.toolName === "status_set" && - typeof data.result === "object" && - data.result !== null && - "success" in data.result && - data.result.success - ) { - const args = toolPart.input as { emoji: string; message: string }; - this.agentStatus = { emoji: args.emoji, message: args.message }; - } + // Process tool result to update derived state (todos, agentStatus, etc.) + this.processToolResult(data.toolName, toolPart.input, data.result); } this.invalidateCache(); } @@ -620,8 +649,14 @@ export class StreamingMessageAggregator { // Now add the new message this.addMessage(incomingMessage); - // If this is a user message, record timestamp for pending stream detection + // If this is a user message, clear derived state and record timestamp if (incomingMessage.role === "user") { + // Clear derived state (todos, agentStatus) for new conversation turn + // This ensures consistent behavior whether loading from history or processing live events + // since stream-start/stream-end events are not persisted in chat.jsonl + this.currentTodos = []; + this.agentStatus = undefined; + this.setPendingStreamStartTime(Date.now()); } } @@ -769,14 +804,18 @@ export class StreamingMessageAggregator { timestamp: part.timestamp ?? baseTimestamp, }); } else if (isDynamicToolPart(part)) { - const status = - part.state === "output-available" - ? "completed" - : part.state === "input-available" && message.metadata?.partial - ? "interrupted" - : part.state === "input-available" - ? "executing" - : "pending"; + // Determine status based on part state and result + let status: "pending" | "executing" | "completed" | "failed" | "interrupted"; + if (part.state === "output-available") { + // Check if result indicates failure (for tools that return { success: boolean }) + status = hasFailureResult(part.output) ? "failed" : "completed"; + } else if (part.state === "input-available" && message.metadata?.partial) { + status = "interrupted"; + } else if (part.state === "input-available") { + status = "executing"; + } else { + status = "pending"; + } displayedMessages.push({ type: "tool", diff --git a/src/utils/tools/toolDefinitions.ts b/src/utils/tools/toolDefinitions.ts index 5906cfeded..24304c2fab 100644 --- a/src/utils/tools/toolDefinitions.ts +++ b/src/utils/tools/toolDefinitions.ts @@ -186,7 +186,11 @@ export const TOOL_DEFINITIONS = { "Set a status indicator to show what the agent is currently doing. " + "The emoji appears left of the streaming indicator, and the message shows on hover. " + "IMPORTANT: Always set a status at the start of each response and update it as your work progresses. " + - "The status is cleared when a new stream starts, so you must set it again for each response. " + + "Set a final status before finishing your response that reflects the outcome: " + + "'✅ PR checks pass and ready to merge' (success), " + + "'❌ CreateWorkspace Tests failed' (failure), " + + "'⚠️ Encountered serious issue with design' (warning/blocker). " + + "The status is cleared at the start of each new response, so you must set it again. " + "Use this to communicate ongoing activities (e.g., '🔍 Analyzing code', '📝 Writing tests', '🔧 Refactoring logic').", schema: z .object({