coder · ammario · Oct 28, 2025 · Oct 28, 2025 · Oct 28, 2025 · Oct 28, 2025
diff --git a/src/components/tools/StatusSetToolCall.tsx b/src/components/tools/StatusSetToolCall.tsx
@@ -12,11 +12,17 @@ interface StatusSetToolCallProps {
 
 export const StatusSetToolCall: React.FC<StatusSetToolCallProps> = ({
   args,
-  result: _result,
+  result,
   status = "pending",
 }) => {
   const statusDisplay = getStatusDisplay(status);
 
+  // Show error message if validation failed
+  const errorMessage =
+    status === "failed" && result && typeof result === "object" && "error" in result
+      ? String(result.error)
+      : undefined;
+
   return (
     <ToolContainer expanded={false}>
       <ToolHeader>
@@ -25,6 +31,7 @@ export const StatusSetToolCall: React.FC<StatusSetToolCallProps> = ({
           <Tooltip>status_set</Tooltip>
         </TooltipWrapper>
         <span className="text-muted-foreground italic">{args.message}</span>
+        {errorMessage && <span className="text-error-foreground text-sm">({errorMessage})</span>}
         <StatusIndicator status={status}>{statusDisplay}</StatusIndicator>
       </ToolHeader>
     </ToolContainer>

diff --git a/src/utils/messages/StreamingMessageAggregator.status.test.ts b/src/utils/messages/StreamingMessageAggregator.status.test.ts
@@ -257,4 +257,123 @@ describe("StreamingMessageAggregator - Agent Status", () => {
     // Status should be cleared on new stream start
     expect(aggregator.getAgentStatus()).toBeUndefined();
   });
+
+  it("should show 'failed' status in UI when status_set validation fails", () => {
+    const aggregator = new StreamingMessageAggregator("2024-01-01T00:00:00.000Z");
+    const messageId = "msg1";
+
+    // Start a stream
+    aggregator.handleStreamStart({
+      type: "stream-start",
+      workspaceId: "workspace1",
+      messageId,
+      model: "test-model",
+      historySequence: 1,
+    });
+
+    // Add a status_set tool call with invalid emoji
+    aggregator.handleToolCallStart({
+      type: "tool-call-start",
+      workspaceId: "workspace1",
+      messageId,
+      toolCallId: "tool1",
+      toolName: "status_set",
+      args: { emoji: "not-an-emoji", message: "test" },
+      tokens: 10,
+      timestamp: Date.now(),
+    });
+
+    // Complete with validation failure
+    aggregator.handleToolCallEnd({
+      type: "tool-call-end",
+      workspaceId: "workspace1",
+      messageId,
+      toolCallId: "tool1",
+      toolName: "status_set",
+      result: { success: false, error: "emoji must be a single emoji character" },
+    });
+
+    // End the stream to finalize message
+    aggregator.handleStreamEnd({
+      type: "stream-end",
+      workspaceId: "workspace1",
+      messageId,
+      metadata: { model: "test-model" },
+      parts: [],
+    });
+
+    // Check that the tool message shows 'failed' status in the UI
+    const displayedMessages = aggregator.getDisplayedMessages();
+    const toolMessage = displayedMessages.find((m) => m.type === "tool");
+    expect(toolMessage).toBeDefined();
+    expect(toolMessage?.type).toBe("tool");
+    if (toolMessage?.type === "tool") {
+      expect(toolMessage.status).toBe("failed");
+      expect(toolMessage.toolName).toBe("status_set");
+    }
+
+    // And status should NOT be updated in aggregator
+    expect(aggregator.getAgentStatus()).toBeUndefined();
+  });
+
+  it("should show 'completed' status in UI when status_set validation succeeds", () => {
+    const aggregator = new StreamingMessageAggregator("2024-01-01T00:00:00.000Z");
+    const messageId = "msg1";
+
+    // Start a stream
+    aggregator.handleStreamStart({
+      type: "stream-start",
+      workspaceId: "workspace1",
+      messageId,
+      model: "test-model",
+      historySequence: 1,
+    });
+
+    // Add a successful status_set tool call
+    aggregator.handleToolCallStart({
+      type: "tool-call-start",
+      workspaceId: "workspace1",
+      messageId,
+      toolCallId: "tool1",
+      toolName: "status_set",
+      args: { emoji: "🔍", message: "Analyzing code" },
+      tokens: 10,
+      timestamp: Date.now(),
+    });
+
+    // Complete successfully
+    aggregator.handleToolCallEnd({
+      type: "tool-call-end",
+      workspaceId: "workspace1",
+      messageId,
+      toolCallId: "tool1",
+      toolName: "status_set",
+      result: { success: true, emoji: "🔍", message: "Analyzing code" },
+    });
+
+    // End the stream to finalize message
+    aggregator.handleStreamEnd({
+      type: "stream-end",
+      workspaceId: "workspace1",
+      messageId,
+      metadata: { model: "test-model" },
+      parts: [],
+    });
+
+    // Check that the tool message shows 'completed' status in the UI
+    const displayedMessages = aggregator.getDisplayedMessages();
+    const toolMessage = displayedMessages.find((m) => m.type === "tool");
+    expect(toolMessage).toBeDefined();
+    expect(toolMessage?.type).toBe("tool");
+    if (toolMessage?.type === "tool") {
+      expect(toolMessage.status).toBe("completed");
+      expect(toolMessage.toolName).toBe("status_set");
+    }
+
+    // And status SHOULD be updated in aggregator
+    const status = aggregator.getAgentStatus();
+    expect(status).toBeDefined();
+    expect(status?.emoji).toBe("🔍");
+    expect(status?.message).toBe("Analyzing code");
+  });
 });
diff --git a/src/utils/messages/StreamingMessageAggregator.ts b/src/utils/messages/StreamingMessageAggregator.ts
@@ -35,6 +35,31 @@ interface StreamingContext {
   model: string;
 }
 
+/**
+ * Check if a tool result indicates success (for tools that return { success: boolean })
+ */
+function hasSuccessResult(result: unknown): boolean {
+  return (
+    typeof result === "object" &&
+    result !== null &&
+    "success" in result &&
+    result.success === true
+  );
+}
+
+/**
+ * Check if a tool result indicates failure (for tools that return { success: boolean })
+ */
+function hasFailureResult(result: unknown): boolean {
+  return (
+    typeof result === "object" &&
+    result !== null &&
+    "success" in result &&
+    result.success === false
+  );
+}
+
+
 export class StreamingMessageAggregator {
   private messages = new Map<string, CmuxMessage>();
   private activeStreams = new Map<string, StreamingContext>();
@@ -488,13 +513,7 @@ export class StreamingMessageAggregator {
         (toolPart as DynamicToolPartAvailable).output = data.result;
 
         // Update TODO state if this was a successful todo_write
-        if (
-          data.toolName === "todo_write" &&
-          typeof data.result === "object" &&
-          data.result !== null &&
-          "success" in data.result &&
-          data.result.success
-        ) {
+        if (data.toolName === "todo_write" && hasSuccessResult(data.result)) {
           const args = toolPart.input as { todos: TodoItem[] };
           // Only update if todos actually changed (prevents flickering from reference changes)
           if (!this.todosEqual(this.currentTodos, args.todos)) {
@@ -503,13 +522,7 @@ export class StreamingMessageAggregator {
         }
 
         // Update agent status if this was a successful status_set
-        if (
-          data.toolName === "status_set" &&
-          typeof data.result === "object" &&
-          data.result !== null &&
-          "success" in data.result &&
-          data.result.success
-        ) {
+        if (data.toolName === "status_set" && hasSuccessResult(data.result)) {
           const args = toolPart.input as { emoji: string; message: string };
           this.agentStatus = { emoji: args.emoji, message: args.message };
         }
@@ -769,14 +782,18 @@ export class StreamingMessageAggregator {
                 timestamp: part.timestamp ?? baseTimestamp,
               });
             } else if (isDynamicToolPart(part)) {
-              const status =
-                part.state === "output-available"
-                  ? "completed"
-                  : part.state === "input-available" && message.metadata?.partial
-                    ? "interrupted"
-                    : part.state === "input-available"
-                      ? "executing"
-                      : "pending";
+              // Determine status based on part state and result
+              let status: "pending" | "executing" | "completed" | "failed" | "interrupted";
+              if (part.state === "output-available") {
+                // Check if result indicates failure (for tools that return { success: boolean })
+                status = hasFailureResult(part.output) ? "failed" : "completed";
+              } else if (part.state === "input-available" && message.metadata?.partial) {
+                status = "interrupted";
+              } else if (part.state === "input-available") {
+                status = "executing";
+              } else {
+                status = "pending";
+              }
 
               displayedMessages.push({
                 type: "tool",

diff --git a/src/utils/tools/toolDefinitions.ts b/src/utils/tools/toolDefinitions.ts
@@ -186,7 +186,11 @@ export const TOOL_DEFINITIONS = {
       "Set a status indicator to show what the agent is currently doing. " +
       "The emoji appears left of the streaming indicator, and the message shows on hover. " +
       "IMPORTANT: Always set a status at the start of each response and update it as your work progresses. " +
-      "The status is cleared when a new stream starts, so you must set it again for each response. " +
+      "Set a final status before finishing your response that reflects the outcome: " +
+      "'✅ PR checks pass and ready to merge' (success), " +
+      "'❌ CreateWorkspace Tests failed' (failure), " +
+      "'⚠️ Encountered serious issue with design' (warning/blocker). " +
+      "The status is cleared at the start of each new response, so you must set it again. " +
       "Use this to communicate ongoing activities (e.g., '🔍 Analyzing code', '📝 Writing tests', '🔧 Refactoring logic').",
     schema: z
       .object({