@@ -14,6 +14,64 @@ import { execBuffered } from "@/node/utils/runtime/helpers";
1414
1515const USER_AGENT = "Mux/1.0 (https://github.com/coder/mux; web-fetch tool)" ;
1616
17+ /** Parse curl -i output into headers and body */
18+ function parseResponse ( output : string ) : { headers : string ; body : string ; statusCode : string } {
19+ // Find the last HTTP status line (after redirects) and its headers
20+ // curl -i with -L shows all redirect responses, we want the final one
21+ const httpMatches = [ ...output . matchAll ( / H T T P \/ [ \d . ] + ( \d { 3 } ) [ ^ \r \n ] * / g) ] ;
22+ const lastStatusMatch = httpMatches . length > 0 ? httpMatches [ httpMatches . length - 1 ] : null ;
23+ const statusCode = lastStatusMatch ? lastStatusMatch [ 1 ] : "" ;
24+
25+ // Headers end with \r\n\r\n (or \n\n for some servers)
26+ const headerEndIndex = output . indexOf ( "\r\n\r\n" ) ;
27+ const altHeaderEndIndex = output . indexOf ( "\n\n" ) ;
28+ const splitIndex =
29+ headerEndIndex !== - 1
30+ ? headerEndIndex + 4
31+ : altHeaderEndIndex !== - 1
32+ ? altHeaderEndIndex + 2
33+ : 0 ;
34+
35+ const headers = splitIndex > 0 ? output . slice ( 0 , splitIndex ) . toLowerCase ( ) : "" ;
36+ const body = splitIndex > 0 ? output . slice ( splitIndex ) : output ;
37+
38+ return { headers, body, statusCode } ;
39+ }
40+
41+ /** Detect if error response is a Cloudflare challenge page */
42+ function isCloudflareChallenge ( headers : string , body : string ) : boolean {
43+ return (
44+ headers . includes ( "cf-mitigated" ) ||
45+ ( body . includes ( "Just a moment" ) && body . includes ( "Enable JavaScript" ) )
46+ ) ;
47+ }
48+
49+ /** Try to extract readable content from HTML, returns null on failure */
50+ function tryExtractContent (
51+ body : string ,
52+ url : string ,
53+ maxBytes : number
54+ ) : { title : string ; content : string } | null {
55+ try {
56+ const dom = new JSDOM ( body , { url } ) ;
57+ const reader = new Readability ( dom . window . document ) ;
58+ const article = reader . parse ( ) ;
59+ if ( ! article ?. content ) return null ;
60+
61+ const turndown = new TurndownService ( {
62+ headingStyle : "atx" ,
63+ codeBlockStyle : "fenced" ,
64+ } ) ;
65+ let content = turndown . turndown ( article . content ) ;
66+ if ( content . length > maxBytes ) {
67+ content = content . slice ( 0 , maxBytes ) + "\n\n[Content truncated]" ;
68+ }
69+ return { title : article . title ?? "Untitled" , content } ;
70+ } catch {
71+ return null ;
72+ }
73+ }
74+
1775/**
1876 * Web fetch tool factory for AI assistant
1977 * Creates a tool that fetches web pages and extracts readable content as markdown
@@ -62,12 +120,41 @@ export const createWebFetchTool: ToolFactory = (config: ToolConfiguration) => {
62120 const exitCodeMessages : Record < number , string > = {
63121 6 : "Could not resolve host" ,
64122 7 : "Failed to connect" ,
65- 22 : "HTTP error (4xx/5xx)" ,
66123 28 : "Operation timed out" ,
67124 35 : "SSL/TLS handshake failed" ,
68125 56 : "Network data receive error" ,
69126 63 : "Maximum file size exceeded" ,
70127 } ;
128+
129+ // For HTTP errors (exit 22), try to parse and include the error body
130+ if ( result . exitCode === 22 && result . stdout ) {
131+ const { headers, body, statusCode } = parseResponse ( result . stdout ) ;
132+ const statusText = statusCode ? `HTTP ${ statusCode } ` : "HTTP error" ;
133+
134+ // Detect Cloudflare challenge pages
135+ if ( isCloudflareChallenge ( headers , body ) ) {
136+ return {
137+ success : false ,
138+ error : `${ statusText } : Cloudflare security challenge (page requires JavaScript)` ,
139+ } ;
140+ }
141+
142+ // Try to extract readable content from error page
143+ const extracted = tryExtractContent ( body , url , WEB_FETCH_MAX_OUTPUT_BYTES ) ;
144+ if ( extracted ) {
145+ return {
146+ success : false ,
147+ error : statusText ,
148+ content : extracted . content ,
149+ } ;
150+ }
151+
152+ return {
153+ success : false ,
154+ error : statusText ,
155+ } ;
156+ }
157+
71158 const reason = exitCodeMessages [ result . exitCode ] || result . stderr || "Unknown error" ;
72159 return {
73160 success : false ,
@@ -76,19 +163,7 @@ export const createWebFetchTool: ToolFactory = (config: ToolConfiguration) => {
76163 }
77164
78165 // Parse headers and body from curl -i output
79- // Headers end with \r\n\r\n (or \n\n for some servers)
80- const output = result . stdout ;
81- const headerEndIndex = output . indexOf ( "\r\n\r\n" ) ;
82- const altHeaderEndIndex = output . indexOf ( "\n\n" ) ;
83- const splitIndex =
84- headerEndIndex !== - 1
85- ? headerEndIndex + 4
86- : altHeaderEndIndex !== - 1
87- ? altHeaderEndIndex + 2
88- : 0 ;
89-
90- const headers = splitIndex > 0 ? output . slice ( 0 , splitIndex ) . toLowerCase ( ) : "" ;
91- const body = splitIndex > 0 ? output . slice ( splitIndex ) : output ;
166+ const { headers, body } = parseResponse ( result . stdout ) ;
92167
93168 if ( ! body || body . trim ( ) . length === 0 ) {
94169 return {
0 commit comments