Skip to content
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion src/components/tools/StatusSetToolCall.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,17 @@ interface StatusSetToolCallProps {

export const StatusSetToolCall: React.FC<StatusSetToolCallProps> = ({
args,
result: _result,
result,
status = "pending",
}) => {
const statusDisplay = getStatusDisplay(status);

// Show error message if validation failed
const errorMessage =
status === "failed" && result && typeof result === "object" && "error" in result
? String(result.error)
: undefined;

return (
<ToolContainer expanded={false}>
<ToolHeader>
Expand All @@ -25,6 +31,7 @@ export const StatusSetToolCall: React.FC<StatusSetToolCallProps> = ({
<Tooltip>status_set</Tooltip>
</TooltipWrapper>
<span className="text-muted-foreground italic">{args.message}</span>
{errorMessage && <span className="text-error-foreground text-sm">({errorMessage})</span>}
<StatusIndicator status={status}>{statusDisplay}</StatusIndicator>
</ToolHeader>
</ToolContainer>
Expand Down
119 changes: 119 additions & 0 deletions src/utils/messages/StreamingMessageAggregator.status.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -257,4 +257,123 @@ describe("StreamingMessageAggregator - Agent Status", () => {
// Status should be cleared on new stream start
expect(aggregator.getAgentStatus()).toBeUndefined();
});

it("should show 'failed' status in UI when status_set validation fails", () => {
const aggregator = new StreamingMessageAggregator("2024-01-01T00:00:00.000Z");
const messageId = "msg1";

// Start a stream
aggregator.handleStreamStart({
type: "stream-start",
workspaceId: "workspace1",
messageId,
model: "test-model",
historySequence: 1,
});

// Add a status_set tool call with invalid emoji
aggregator.handleToolCallStart({
type: "tool-call-start",
workspaceId: "workspace1",
messageId,
toolCallId: "tool1",
toolName: "status_set",
args: { emoji: "not-an-emoji", message: "test" },
tokens: 10,
timestamp: Date.now(),
});

// Complete with validation failure
aggregator.handleToolCallEnd({
type: "tool-call-end",
workspaceId: "workspace1",
messageId,
toolCallId: "tool1",
toolName: "status_set",
result: { success: false, error: "emoji must be a single emoji character" },
});

// End the stream to finalize message
aggregator.handleStreamEnd({
type: "stream-end",
workspaceId: "workspace1",
messageId,
metadata: { model: "test-model" },
parts: [],
});

// Check that the tool message shows 'failed' status in the UI
const displayedMessages = aggregator.getDisplayedMessages();
const toolMessage = displayedMessages.find((m) => m.type === "tool");
expect(toolMessage).toBeDefined();
expect(toolMessage?.type).toBe("tool");
if (toolMessage?.type === "tool") {
expect(toolMessage.status).toBe("failed");
expect(toolMessage.toolName).toBe("status_set");
}

// And status should NOT be updated in aggregator
expect(aggregator.getAgentStatus()).toBeUndefined();
});

it("should show 'completed' status in UI when status_set validation succeeds", () => {
const aggregator = new StreamingMessageAggregator("2024-01-01T00:00:00.000Z");
const messageId = "msg1";

// Start a stream
aggregator.handleStreamStart({
type: "stream-start",
workspaceId: "workspace1",
messageId,
model: "test-model",
historySequence: 1,
});

// Add a successful status_set tool call
aggregator.handleToolCallStart({
type: "tool-call-start",
workspaceId: "workspace1",
messageId,
toolCallId: "tool1",
toolName: "status_set",
args: { emoji: "πŸ”", message: "Analyzing code" },
tokens: 10,
timestamp: Date.now(),
});

// Complete successfully
aggregator.handleToolCallEnd({
type: "tool-call-end",
workspaceId: "workspace1",
messageId,
toolCallId: "tool1",
toolName: "status_set",
result: { success: true, emoji: "πŸ”", message: "Analyzing code" },
});

// End the stream to finalize message
aggregator.handleStreamEnd({
type: "stream-end",
workspaceId: "workspace1",
messageId,
metadata: { model: "test-model" },
parts: [],
});

// Check that the tool message shows 'completed' status in the UI
const displayedMessages = aggregator.getDisplayedMessages();
const toolMessage = displayedMessages.find((m) => m.type === "tool");
expect(toolMessage).toBeDefined();
expect(toolMessage?.type).toBe("tool");
if (toolMessage?.type === "tool") {
expect(toolMessage.status).toBe("completed");
expect(toolMessage.toolName).toBe("status_set");
}

// And status SHOULD be updated in aggregator
const status = aggregator.getAgentStatus();
expect(status).toBeDefined();
expect(status?.emoji).toBe("πŸ”");
expect(status?.message).toBe("Analyzing code");
});
});
61 changes: 39 additions & 22 deletions src/utils/messages/StreamingMessageAggregator.ts
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,31 @@ interface StreamingContext {
model: string;
}

/**
* Check if a tool result indicates success (for tools that return { success: boolean })
*/
function hasSuccessResult(result: unknown): boolean {
return (
typeof result === "object" &&
result !== null &&
"success" in result &&
result.success === true
);
}

/**
* Check if a tool result indicates failure (for tools that return { success: boolean })
*/
function hasFailureResult(result: unknown): boolean {
return (
typeof result === "object" &&
result !== null &&
"success" in result &&
result.success === false
);
}


export class StreamingMessageAggregator {
private messages = new Map<string, CmuxMessage>();
private activeStreams = new Map<string, StreamingContext>();
Expand Down Expand Up @@ -488,13 +513,7 @@ export class StreamingMessageAggregator {
(toolPart as DynamicToolPartAvailable).output = data.result;

// Update TODO state if this was a successful todo_write
if (
data.toolName === "todo_write" &&
typeof data.result === "object" &&
data.result !== null &&
"success" in data.result &&
data.result.success
) {
if (data.toolName === "todo_write" && hasSuccessResult(data.result)) {
const args = toolPart.input as { todos: TodoItem[] };
// Only update if todos actually changed (prevents flickering from reference changes)
if (!this.todosEqual(this.currentTodos, args.todos)) {
Expand All @@ -503,13 +522,7 @@ export class StreamingMessageAggregator {
}

// Update agent status if this was a successful status_set
if (
data.toolName === "status_set" &&
typeof data.result === "object" &&
data.result !== null &&
"success" in data.result &&
data.result.success
) {
if (data.toolName === "status_set" && hasSuccessResult(data.result)) {
const args = toolPart.input as { emoji: string; message: string };
this.agentStatus = { emoji: args.emoji, message: args.message };
}
Expand Down Expand Up @@ -769,14 +782,18 @@ export class StreamingMessageAggregator {
timestamp: part.timestamp ?? baseTimestamp,
});
} else if (isDynamicToolPart(part)) {
const status =
part.state === "output-available"
? "completed"
: part.state === "input-available" && message.metadata?.partial
? "interrupted"
: part.state === "input-available"
? "executing"
: "pending";
// Determine status based on part state and result
let status: "pending" | "executing" | "completed" | "failed" | "interrupted";
if (part.state === "output-available") {
// Check if result indicates failure (for tools that return { success: boolean })
status = hasFailureResult(part.output) ? "failed" : "completed";
} else if (part.state === "input-available" && message.metadata?.partial) {
status = "interrupted";
} else if (part.state === "input-available") {
status = "executing";
} else {
status = "pending";
}

displayedMessages.push({
type: "tool",
Expand Down
6 changes: 5 additions & 1 deletion src/utils/tools/toolDefinitions.ts
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,11 @@ export const TOOL_DEFINITIONS = {
"Set a status indicator to show what the agent is currently doing. " +
"The emoji appears left of the streaming indicator, and the message shows on hover. " +
"IMPORTANT: Always set a status at the start of each response and update it as your work progresses. " +
"The status is cleared when a new stream starts, so you must set it again for each response. " +
"Set a final status before finishing your response that reflects the outcome: " +
"'βœ… PR checks pass and ready to merge' (success), " +
"'❌ CreateWorkspace Tests failed' (failure), " +
"'⚠️ Encountered serious issue with design' (warning/blocker). " +
"The status is cleared at the start of each new response, so you must set it again. " +
"Use this to communicate ongoing activities (e.g., 'πŸ” Analyzing code', 'πŸ“ Writing tests', 'πŸ”§ Refactoring logic').",
schema: z
.object({
Expand Down
Loading