Skip to content

Commit e10e1c8

Browse files
committed
🤖 Sanitize malformed tool inputs before API calls
Fixes workspace errors when chat history contains corrupted tool inputs. The httpjail-coder workspace had a message where the AI generated malformed JSON that was stored as a string in the tool input field: input: '{"script" timeout_secs="10": "ls"}' This caused API errors: 'Input should be a valid dictionary' Solution: - Created sanitizeToolInputs() to replace invalid inputs (strings, null, arrays) with empty objects before sending to API - Integrated into AIService message processing pipeline - Original history remains unchanged, only API request is sanitized - Comprehensive test coverage including the actual problematic message Generated with `cmux`
1 parent 611a180 commit e10e1c8

File tree

3 files changed

+361
-1
lines changed

3 files changed

+361
-1
lines changed

src/services/aiService.ts

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ import * as os from "os";
33
import { EventEmitter } from "events";
44
import { convertToModelMessages, type LanguageModel } from "ai";
55
import { applyToolOutputRedaction } from "@/utils/messages/applyToolOutputRedaction";
6+
import { sanitizeToolInputs } from "@/utils/messages/sanitizeToolInput";
67
import type { Result } from "@/types/result";
78
import { Ok, Err } from "@/types/result";
89
import type { WorkspaceMetadata } from "@/types/workspace";
@@ -461,10 +462,16 @@ export class AIService extends EventEmitter {
461462
const redactedForProvider = applyToolOutputRedaction(messagesWithModeContext);
462463
log.debug_obj(`${workspaceId}/2a_redacted_messages.json`, redactedForProvider);
463464

465+
// Sanitize tool inputs to ensure they are valid objects (not strings or arrays)
466+
// This fixes cases where corrupted data in history has malformed tool inputs
467+
// that would cause API errors like "Input should be a valid dictionary"
468+
const sanitizedMessages = sanitizeToolInputs(redactedForProvider);
469+
log.debug_obj(`${workspaceId}/2b_sanitized_messages.json`, sanitizedMessages);
470+
464471
// Convert CmuxMessage to ModelMessage format using Vercel AI SDK utility
465472
// Type assertion needed because CmuxMessage has custom tool parts for interrupted tools
466473
// eslint-disable-next-line @typescript-eslint/no-explicit-any, @typescript-eslint/no-unsafe-argument
467-
const modelMessages = convertToModelMessages(redactedForProvider as any);
474+
const modelMessages = convertToModelMessages(sanitizedMessages as any);
468475
log.debug_obj(`${workspaceId}/2_model_messages.json`, modelMessages);
469476

470477
// Apply ModelMessage transforms based on provider requirements
Lines changed: 292 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,292 @@
1+
import { describe, it, expect } from "@jest/globals";
2+
import type { CmuxMessage } from "@/types/message";
3+
import { sanitizeToolInputs } from "./sanitizeToolInput";
4+
5+
describe("sanitizeToolInput", () => {
6+
it("should detect malformed JSON strings in tool input", () => {
7+
// This reproduces the bug from httpjail-coder workspace
8+
// When a tool input is a string instead of an object, the API will reject it
9+
const message: CmuxMessage = {
10+
id: "test-1",
11+
role: "assistant",
12+
parts: [
13+
{
14+
type: "dynamic-tool",
15+
toolCallId: "toolu_01test",
16+
toolName: "bash",
17+
state: "output-available",
18+
// This is the malformed input from the actual chat - a string instead of object
19+
input: '{"script" timeout_secs="10": "ls"}',
20+
output: {
21+
error: 'Invalid input for tool bash: JSON parsing failed',
22+
},
23+
},
24+
],
25+
metadata: {
26+
timestamp: Date.now(),
27+
historySequence: 1,
28+
},
29+
};
30+
31+
// The input field is a string, not an object
32+
const toolPart = message.parts[0];
33+
if (toolPart.type === "dynamic-tool") {
34+
expect(typeof toolPart.input).toBe("string");
35+
}
36+
});
37+
38+
it("should detect string input (non-object) in tool calls", () => {
39+
const message: CmuxMessage = {
40+
id: "test-2",
41+
role: "assistant",
42+
parts: [
43+
{
44+
type: "dynamic-tool",
45+
toolCallId: "toolu_02test",
46+
toolName: "bash",
47+
state: "output-available",
48+
// Input is a string instead of an object
49+
input: "not an object",
50+
output: {
51+
error: "Invalid input",
52+
},
53+
},
54+
],
55+
metadata: {
56+
timestamp: Date.now(),
57+
historySequence: 2,
58+
},
59+
};
60+
61+
// The input field is a string, which would cause API errors
62+
const toolPart = message.parts[0];
63+
if (toolPart.type === "dynamic-tool") {
64+
expect(typeof toolPart.input).toBe("string");
65+
}
66+
});
67+
68+
it("should handle valid tool input correctly", () => {
69+
const message: CmuxMessage = {
70+
id: "test-3",
71+
role: "assistant",
72+
parts: [
73+
{
74+
type: "dynamic-tool",
75+
toolCallId: "toolu_03test",
76+
toolName: "bash",
77+
state: "output-available",
78+
// Valid input
79+
input: {
80+
script: "ls",
81+
timeout_secs: 10,
82+
},
83+
output: {
84+
success: true,
85+
output: "file1.txt\nfile2.txt",
86+
},
87+
},
88+
],
89+
metadata: {
90+
timestamp: Date.now(),
91+
historySequence: 3,
92+
},
93+
};
94+
95+
// Valid object input should pass through unchanged
96+
const toolPart = message.parts[0];
97+
if (toolPart.type === "dynamic-tool") {
98+
expect(typeof toolPart.input).toBe("object");
99+
expect(toolPart.input).toEqual({ script: "ls", timeout_secs: 10 });
100+
}
101+
});
102+
103+
describe("sanitizeToolInputs", () => {
104+
it("should handle the actual malformed message from httpjail-coder workspace", () => {
105+
// This is the actual problematic message that caused the bug
106+
const problematicMessage: CmuxMessage = {
107+
id: "assistant-1761527027508-karjrpf3g",
108+
role: "assistant",
109+
metadata: {
110+
historySequence: 1,
111+
timestamp: 1761527027508,
112+
partial: true,
113+
},
114+
parts: [
115+
{
116+
type: "text",
117+
text: "I'll explore this repository.",
118+
},
119+
{
120+
type: "dynamic-tool",
121+
toolCallId: "toolu_01DXeXp8oArG4PzT9rk4hz5c",
122+
toolName: "bash",
123+
state: "output-available",
124+
// THIS IS THE MALFORMED INPUT - string instead of object
125+
input: '{"script" timeout_secs="10": "ls"}',
126+
output: {
127+
error: "Invalid input for tool bash: JSON parsing failed",
128+
},
129+
},
130+
],
131+
};
132+
133+
const sanitized = sanitizeToolInputs([problematicMessage]);
134+
const sanitizedTool = sanitized[0].parts[1];
135+
136+
if (sanitizedTool.type === "dynamic-tool") {
137+
// Should be converted to empty object
138+
expect(sanitizedTool.input).toEqual({});
139+
}
140+
});
141+
142+
it("should convert string inputs to empty objects", () => {
143+
const messages: CmuxMessage[] = [
144+
{
145+
id: "test-1",
146+
role: "assistant",
147+
parts: [
148+
{
149+
type: "dynamic-tool",
150+
toolCallId: "toolu_01test",
151+
toolName: "bash",
152+
state: "output-available",
153+
input: "not an object",
154+
output: { error: "Invalid input" },
155+
},
156+
],
157+
metadata: { timestamp: Date.now(), historySequence: 1 },
158+
},
159+
];
160+
161+
const sanitized = sanitizeToolInputs(messages);
162+
expect(sanitized[0].parts[0]).toMatchObject({
163+
type: "dynamic-tool",
164+
input: {}, // Should be converted to empty object
165+
});
166+
});
167+
168+
it("should keep valid object inputs unchanged", () => {
169+
const messages: CmuxMessage[] = [
170+
{
171+
id: "test-2",
172+
role: "assistant",
173+
parts: [
174+
{
175+
type: "dynamic-tool",
176+
toolCallId: "toolu_02test",
177+
toolName: "bash",
178+
state: "output-available",
179+
input: { script: "ls", timeout_secs: 10 },
180+
output: { success: true },
181+
},
182+
],
183+
metadata: { timestamp: Date.now(), historySequence: 2 },
184+
},
185+
];
186+
187+
const sanitized = sanitizeToolInputs(messages);
188+
expect(sanitized[0].parts[0]).toMatchObject({
189+
type: "dynamic-tool",
190+
input: { script: "ls", timeout_secs: 10 },
191+
});
192+
});
193+
194+
it("should not modify non-assistant messages", () => {
195+
const messages: CmuxMessage[] = [
196+
{
197+
id: "test-3",
198+
role: "user",
199+
parts: [{ type: "text", text: "Hello" }],
200+
metadata: { timestamp: Date.now(), historySequence: 3 },
201+
},
202+
];
203+
204+
const sanitized = sanitizeToolInputs(messages);
205+
expect(sanitized).toEqual(messages);
206+
});
207+
208+
it("should handle messages with multiple parts", () => {
209+
const messages: CmuxMessage[] = [
210+
{
211+
id: "test-4",
212+
role: "assistant",
213+
parts: [
214+
{ type: "text", text: "Let me run this command" },
215+
{
216+
type: "dynamic-tool",
217+
toolCallId: "toolu_04test",
218+
toolName: "bash",
219+
state: "output-available",
220+
input: "malformed",
221+
output: { error: "bad" },
222+
},
223+
{ type: "text", text: "Done" },
224+
],
225+
metadata: { timestamp: Date.now(), historySequence: 4 },
226+
},
227+
];
228+
229+
const sanitized = sanitizeToolInputs(messages);
230+
expect(sanitized[0].parts[1]).toMatchObject({
231+
type: "dynamic-tool",
232+
input: {},
233+
});
234+
// Other parts should be unchanged
235+
expect(sanitized[0].parts[0]).toEqual({ type: "text", text: "Let me run this command" });
236+
expect(sanitized[0].parts[2]).toEqual({ type: "text", text: "Done" });
237+
});
238+
239+
it("should handle null input", () => {
240+
const messages: CmuxMessage[] = [
241+
{
242+
id: "test-null",
243+
role: "assistant",
244+
parts: [
245+
{
246+
type: "dynamic-tool",
247+
toolCallId: "toolu_null",
248+
toolName: "bash",
249+
state: "output-available",
250+
input: null as any,
251+
output: { error: "Invalid" },
252+
},
253+
],
254+
metadata: { timestamp: Date.now(), historySequence: 1 },
255+
},
256+
];
257+
258+
const sanitized = sanitizeToolInputs(messages);
259+
const toolPart = sanitized[0].parts[0];
260+
if (toolPart.type === "dynamic-tool") {
261+
expect(toolPart.input).toEqual({});
262+
}
263+
});
264+
265+
it("should handle array input", () => {
266+
const messages: CmuxMessage[] = [
267+
{
268+
id: "test-array",
269+
role: "assistant",
270+
parts: [
271+
{
272+
type: "dynamic-tool",
273+
toolCallId: "toolu_array",
274+
toolName: "bash",
275+
state: "output-available",
276+
input: ["not", "valid"] as any,
277+
output: { error: "Invalid" },
278+
},
279+
],
280+
metadata: { timestamp: Date.now(), historySequence: 1 },
281+
},
282+
];
283+
284+
const sanitized = sanitizeToolInputs(messages);
285+
const toolPart = sanitized[0].parts[0];
286+
if (toolPart.type === "dynamic-tool") {
287+
expect(toolPart.input).toEqual({});
288+
}
289+
});
290+
});
291+
});
292+
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
import type { CmuxMessage, CmuxToolPart } from "@/types/message";
2+
3+
/**
4+
* Sanitizes tool inputs in messages to ensure they are valid objects.
5+
*
6+
* The Anthropic API (and other LLM APIs) require tool inputs to be objects/dictionaries.
7+
* However, if the model generates malformed JSON or if we have corrupted data in history,
8+
* the input field might be a string instead of an object.
9+
*
10+
* This causes API errors like: "Input should be a valid dictionary"
11+
*
12+
* This function ensures all tool inputs are objects by converting non-object inputs
13+
* to empty objects. This allows the conversation to continue even with corrupted history.
14+
*
15+
* @param messages - Messages to sanitize
16+
* @returns New array with sanitized messages (original messages are not modified)
17+
*/
18+
export function sanitizeToolInputs(messages: CmuxMessage[]): CmuxMessage[] {
19+
return messages.map((msg) => {
20+
// Only process assistant messages with tool parts
21+
if (msg.role !== "assistant") {
22+
return msg;
23+
}
24+
25+
// Check if any parts need sanitization
26+
const needsSanitization = msg.parts.some(
27+
(part) =>
28+
part.type === "dynamic-tool" &&
29+
(typeof part.input !== "object" || part.input === null || Array.isArray(part.input))
30+
);
31+
32+
if (!needsSanitization) {
33+
return msg;
34+
}
35+
36+
// Create new message with sanitized parts
37+
return {
38+
...msg,
39+
parts: msg.parts.map((part): typeof part => {
40+
if (part.type !== "dynamic-tool") {
41+
return part;
42+
}
43+
44+
// Sanitize the input if it's not a valid object
45+
if (
46+
typeof part.input !== "object" ||
47+
part.input === null ||
48+
Array.isArray(part.input)
49+
) {
50+
return {
51+
...part,
52+
input: {}, // Replace with empty object
53+
} as CmuxToolPart;
54+
}
55+
56+
return part;
57+
}),
58+
};
59+
});
60+
}
61+

0 commit comments

Comments
 (0)