Skip to content

Commit 1f1f015

Browse files
improvement(files): update execution for passing base64 strings (#2906)
* progress * improvement(execution): update execution for passing base64 strings * fix types * cleanup comments * path security vuln * reject promise correctly * fix redirect case * remove proxy routes * fix tests * use ipaddr
1 parent 4afb245 commit 1f1f015

File tree

87 files changed

+1399
-964
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

87 files changed

+1399
-964
lines changed

apps/sim/app/api/copilot/execute-tool/route.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -224,7 +224,7 @@ export async function POST(req: NextRequest) {
224224
hasApiKey: !!executionParams.apiKey,
225225
})
226226

227-
const result = await executeTool(resolvedToolName, executionParams, true)
227+
const result = await executeTool(resolvedToolName, executionParams)
228228

229229
logger.info(`[${tracker.requestId}] Tool execution complete`, {
230230
toolName,

apps/sim/app/api/files/parse/route.ts

Lines changed: 140 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,10 @@ import { createLogger } from '@sim/logger'
66
import binaryExtensionsList from 'binary-extensions'
77
import { type NextRequest, NextResponse } from 'next/server'
88
import { checkHybridAuth } from '@/lib/auth/hybrid'
9-
import { createPinnedUrl, validateUrlWithDNS } from '@/lib/core/security/input-validation'
9+
import { secureFetchWithPinnedIP, validateUrlWithDNS } from '@/lib/core/security/input-validation'
1010
import { isSupportedFileType, parseFile } from '@/lib/file-parsers'
1111
import { isUsingCloudStorage, type StorageContext, StorageService } from '@/lib/uploads'
12+
import { uploadExecutionFile } from '@/lib/uploads/contexts/execution'
1213
import { UPLOAD_DIR_SERVER } from '@/lib/uploads/core/setup.server'
1314
import { getFileMetadataByKey } from '@/lib/uploads/server/metadata'
1415
import {
@@ -21,6 +22,7 @@ import {
2122
} from '@/lib/uploads/utils/file-utils'
2223
import { getUserEntityPermissions } from '@/lib/workspaces/permissions/utils'
2324
import { verifyFileAccess } from '@/app/api/files/authorization'
25+
import type { UserFile } from '@/executor/types'
2426
import '@/lib/uploads/core/setup.server'
2527

2628
export const dynamic = 'force-dynamic'
@@ -30,13 +32,20 @@ const logger = createLogger('FilesParseAPI')
3032
const MAX_DOWNLOAD_SIZE_BYTES = 100 * 1024 * 1024 // 100 MB
3133
const DOWNLOAD_TIMEOUT_MS = 30000 // 30 seconds
3234

35+
interface ExecutionContext {
36+
workspaceId: string
37+
workflowId: string
38+
executionId: string
39+
}
40+
3341
interface ParseResult {
3442
success: boolean
3543
content?: string
3644
error?: string
3745
filePath: string
3846
originalName?: string // Original filename from database (for workspace files)
3947
viewerUrl?: string | null // Viewer URL for the file if available
48+
userFile?: UserFile // UserFile object for the raw file
4049
metadata?: {
4150
fileType: string
4251
size: number
@@ -70,27 +79,45 @@ export async function POST(request: NextRequest) {
7079

7180
const userId = authResult.userId
7281
const requestData = await request.json()
73-
const { filePath, fileType, workspaceId } = requestData
82+
const { filePath, fileType, workspaceId, workflowId, executionId } = requestData
7483

7584
if (!filePath || (typeof filePath === 'string' && filePath.trim() === '')) {
7685
return NextResponse.json({ success: false, error: 'No file path provided' }, { status: 400 })
7786
}
7887

79-
logger.info('File parse request received:', { filePath, fileType, workspaceId, userId })
88+
// Build execution context if all required fields are present
89+
const executionContext: ExecutionContext | undefined =
90+
workspaceId && workflowId && executionId
91+
? { workspaceId, workflowId, executionId }
92+
: undefined
93+
94+
logger.info('File parse request received:', {
95+
filePath,
96+
fileType,
97+
workspaceId,
98+
userId,
99+
hasExecutionContext: !!executionContext,
100+
})
80101

81102
if (Array.isArray(filePath)) {
82103
const results = []
83-
for (const path of filePath) {
84-
if (!path || (typeof path === 'string' && path.trim() === '')) {
104+
for (const singlePath of filePath) {
105+
if (!singlePath || (typeof singlePath === 'string' && singlePath.trim() === '')) {
85106
results.push({
86107
success: false,
87108
error: 'Empty file path in array',
88-
filePath: path || '',
109+
filePath: singlePath || '',
89110
})
90111
continue
91112
}
92113

93-
const result = await parseFileSingle(path, fileType, workspaceId, userId)
114+
const result = await parseFileSingle(
115+
singlePath,
116+
fileType,
117+
workspaceId,
118+
userId,
119+
executionContext
120+
)
94121
if (result.metadata) {
95122
result.metadata.processingTime = Date.now() - startTime
96123
}
@@ -106,6 +133,7 @@ export async function POST(request: NextRequest) {
106133
fileType: result.metadata?.fileType || 'application/octet-stream',
107134
size: result.metadata?.size || 0,
108135
binary: false,
136+
file: result.userFile,
109137
},
110138
filePath: result.filePath,
111139
viewerUrl: result.viewerUrl,
@@ -121,7 +149,7 @@ export async function POST(request: NextRequest) {
121149
})
122150
}
123151

124-
const result = await parseFileSingle(filePath, fileType, workspaceId, userId)
152+
const result = await parseFileSingle(filePath, fileType, workspaceId, userId, executionContext)
125153

126154
if (result.metadata) {
127155
result.metadata.processingTime = Date.now() - startTime
@@ -137,6 +165,7 @@ export async function POST(request: NextRequest) {
137165
fileType: result.metadata?.fileType || 'application/octet-stream',
138166
size: result.metadata?.size || 0,
139167
binary: false,
168+
file: result.userFile,
140169
},
141170
filePath: result.filePath,
142171
viewerUrl: result.viewerUrl,
@@ -164,7 +193,8 @@ async function parseFileSingle(
164193
filePath: string,
165194
fileType: string,
166195
workspaceId: string,
167-
userId: string
196+
userId: string,
197+
executionContext?: ExecutionContext
168198
): Promise<ParseResult> {
169199
logger.info('Parsing file:', filePath)
170200

@@ -186,18 +216,18 @@ async function parseFileSingle(
186216
}
187217

188218
if (filePath.includes('/api/files/serve/')) {
189-
return handleCloudFile(filePath, fileType, undefined, userId)
219+
return handleCloudFile(filePath, fileType, undefined, userId, executionContext)
190220
}
191221

192222
if (filePath.startsWith('http://') || filePath.startsWith('https://')) {
193-
return handleExternalUrl(filePath, fileType, workspaceId, userId)
223+
return handleExternalUrl(filePath, fileType, workspaceId, userId, executionContext)
194224
}
195225

196226
if (isUsingCloudStorage()) {
197-
return handleCloudFile(filePath, fileType, undefined, userId)
227+
return handleCloudFile(filePath, fileType, undefined, userId, executionContext)
198228
}
199229

200-
return handleLocalFile(filePath, fileType, userId)
230+
return handleLocalFile(filePath, fileType, userId, executionContext)
201231
}
202232

203233
/**
@@ -230,12 +260,14 @@ function validateFilePath(filePath: string): { isValid: boolean; error?: string
230260
/**
231261
* Handle external URL
232262
* If workspaceId is provided, checks if file already exists and saves to workspace if not
263+
* If executionContext is provided, also stores the file in execution storage and returns UserFile
233264
*/
234265
async function handleExternalUrl(
235266
url: string,
236267
fileType: string,
237268
workspaceId: string,
238-
userId: string
269+
userId: string,
270+
executionContext?: ExecutionContext
239271
): Promise<ParseResult> {
240272
try {
241273
logger.info('Fetching external URL:', url)
@@ -312,17 +344,13 @@ async function handleExternalUrl(
312344

313345
if (existingFile) {
314346
const storageFilePath = `/api/files/serve/${existingFile.key}`
315-
return handleCloudFile(storageFilePath, fileType, 'workspace', userId)
347+
return handleCloudFile(storageFilePath, fileType, 'workspace', userId, executionContext)
316348
}
317349
}
318350
}
319351

320-
const pinnedUrl = createPinnedUrl(url, urlValidation.resolvedIP!)
321-
const response = await fetch(pinnedUrl, {
322-
signal: AbortSignal.timeout(DOWNLOAD_TIMEOUT_MS),
323-
headers: {
324-
Host: urlValidation.originalHostname!,
325-
},
352+
const response = await secureFetchWithPinnedIP(url, urlValidation.resolvedIP!, {
353+
timeout: DOWNLOAD_TIMEOUT_MS,
326354
})
327355
if (!response.ok) {
328356
throw new Error(`Failed to fetch URL: ${response.status} ${response.statusText}`)
@@ -341,6 +369,19 @@ async function handleExternalUrl(
341369

342370
logger.info(`Downloaded file from URL: ${url}, size: ${buffer.length} bytes`)
343371

372+
let userFile: UserFile | undefined
373+
const mimeType = response.headers.get('content-type') || getMimeTypeFromExtension(extension)
374+
375+
if (executionContext) {
376+
try {
377+
userFile = await uploadExecutionFile(executionContext, buffer, filename, mimeType, userId)
378+
logger.info(`Stored file in execution storage: ${filename}`, { key: userFile.key })
379+
} catch (uploadError) {
380+
logger.warn(`Failed to store file in execution storage:`, uploadError)
381+
// Continue without userFile - parsing can still work
382+
}
383+
}
384+
344385
if (shouldCheckWorkspace) {
345386
try {
346387
const permission = await getUserEntityPermissions(userId, 'workspace', workspaceId)
@@ -353,8 +394,6 @@ async function handleExternalUrl(
353394
})
354395
} else {
355396
const { uploadWorkspaceFile } = await import('@/lib/uploads/contexts/workspace')
356-
const mimeType =
357-
response.headers.get('content-type') || getMimeTypeFromExtension(extension)
358397
await uploadWorkspaceFile(workspaceId, userId, buffer, filename, mimeType)
359398
logger.info(`Saved URL file to workspace storage: ${filename}`)
360399
}
@@ -363,17 +402,23 @@ async function handleExternalUrl(
363402
}
364403
}
365404

405+
let parseResult: ParseResult
366406
if (extension === 'pdf') {
367-
return await handlePdfBuffer(buffer, filename, fileType, url)
368-
}
369-
if (extension === 'csv') {
370-
return await handleCsvBuffer(buffer, filename, fileType, url)
407+
parseResult = await handlePdfBuffer(buffer, filename, fileType, url)
408+
} else if (extension === 'csv') {
409+
parseResult = await handleCsvBuffer(buffer, filename, fileType, url)
410+
} else if (isSupportedFileType(extension)) {
411+
parseResult = await handleGenericTextBuffer(buffer, filename, extension, fileType, url)
412+
} else {
413+
parseResult = handleGenericBuffer(buffer, filename, extension, fileType)
371414
}
372-
if (isSupportedFileType(extension)) {
373-
return await handleGenericTextBuffer(buffer, filename, extension, fileType, url)
415+
416+
// Attach userFile to the result
417+
if (userFile) {
418+
parseResult.userFile = userFile
374419
}
375420

376-
return handleGenericBuffer(buffer, filename, extension, fileType)
421+
return parseResult
377422
} catch (error) {
378423
logger.error(`Error handling external URL ${url}:`, error)
379424
return {
@@ -386,12 +431,15 @@ async function handleExternalUrl(
386431

387432
/**
388433
* Handle file stored in cloud storage
434+
* If executionContext is provided and file is not already from execution storage,
435+
* copies the file to execution storage and returns UserFile
389436
*/
390437
async function handleCloudFile(
391438
filePath: string,
392439
fileType: string,
393440
explicitContext: string | undefined,
394-
userId: string
441+
userId: string,
442+
executionContext?: ExecutionContext
395443
): Promise<ParseResult> {
396444
try {
397445
const cloudKey = extractStorageKey(filePath)
@@ -438,6 +486,7 @@ async function handleCloudFile(
438486

439487
const filename = originalFilename || cloudKey.split('/').pop() || cloudKey
440488
const extension = path.extname(filename).toLowerCase().substring(1)
489+
const mimeType = getMimeTypeFromExtension(extension)
441490

442491
const normalizedFilePath = `/api/files/serve/${encodeURIComponent(cloudKey)}?context=${context}`
443492
let workspaceIdFromKey: string | undefined
@@ -453,6 +502,39 @@ async function handleCloudFile(
453502

454503
const viewerUrl = getViewerUrl(cloudKey, workspaceIdFromKey)
455504

505+
// Store file in execution storage if executionContext is provided
506+
let userFile: UserFile | undefined
507+
508+
if (executionContext) {
509+
// If file is already from execution context, create UserFile reference without re-uploading
510+
if (context === 'execution') {
511+
userFile = {
512+
id: `file_${Date.now()}_${Math.random().toString(36).substring(2, 9)}`,
513+
name: filename,
514+
url: normalizedFilePath,
515+
size: fileBuffer.length,
516+
type: mimeType,
517+
key: cloudKey,
518+
context: 'execution',
519+
}
520+
logger.info(`Created UserFile reference for existing execution file: ${filename}`)
521+
} else {
522+
// Copy from workspace/other storage to execution storage
523+
try {
524+
userFile = await uploadExecutionFile(
525+
executionContext,
526+
fileBuffer,
527+
filename,
528+
mimeType,
529+
userId
530+
)
531+
logger.info(`Copied file to execution storage: ${filename}`, { key: userFile.key })
532+
} catch (uploadError) {
533+
logger.warn(`Failed to copy file to execution storage:`, uploadError)
534+
}
535+
}
536+
}
537+
456538
let parseResult: ParseResult
457539
if (extension === 'pdf') {
458540
parseResult = await handlePdfBuffer(fileBuffer, filename, fileType, normalizedFilePath)
@@ -477,6 +559,11 @@ async function handleCloudFile(
477559

478560
parseResult.viewerUrl = viewerUrl
479561

562+
// Attach userFile to the result
563+
if (userFile) {
564+
parseResult.userFile = userFile
565+
}
566+
480567
return parseResult
481568
} catch (error) {
482569
logger.error(`Error handling cloud file ${filePath}:`, error)
@@ -500,7 +587,8 @@ async function handleCloudFile(
500587
async function handleLocalFile(
501588
filePath: string,
502589
fileType: string,
503-
userId: string
590+
userId: string,
591+
executionContext?: ExecutionContext
504592
): Promise<ParseResult> {
505593
try {
506594
const filename = filePath.split('/').pop() || filePath
@@ -540,13 +628,32 @@ async function handleLocalFile(
540628
const hash = createHash('md5').update(fileBuffer).digest('hex')
541629

542630
const extension = path.extname(filename).toLowerCase().substring(1)
631+
const mimeType = fileType || getMimeTypeFromExtension(extension)
632+
633+
// Store file in execution storage if executionContext is provided
634+
let userFile: UserFile | undefined
635+
if (executionContext) {
636+
try {
637+
userFile = await uploadExecutionFile(
638+
executionContext,
639+
fileBuffer,
640+
filename,
641+
mimeType,
642+
userId
643+
)
644+
logger.info(`Stored local file in execution storage: ${filename}`, { key: userFile.key })
645+
} catch (uploadError) {
646+
logger.warn(`Failed to store local file in execution storage:`, uploadError)
647+
}
648+
}
543649

544650
return {
545651
success: true,
546652
content: result.content,
547653
filePath,
654+
userFile,
548655
metadata: {
549-
fileType: fileType || getMimeTypeFromExtension(extension),
656+
fileType: mimeType,
550657
size: stats.size,
551658
hash,
552659
processingTime: 0,

0 commit comments

Comments
 (0)