@@ -6,9 +6,10 @@ import { createLogger } from '@sim/logger'
66import binaryExtensionsList from 'binary-extensions'
77import { type NextRequest , NextResponse } from 'next/server'
88import { checkHybridAuth } from '@/lib/auth/hybrid'
9- import { createPinnedUrl , validateUrlWithDNS } from '@/lib/core/security/input-validation'
9+ import { secureFetchWithPinnedIP , validateUrlWithDNS } from '@/lib/core/security/input-validation'
1010import { isSupportedFileType , parseFile } from '@/lib/file-parsers'
1111import { isUsingCloudStorage , type StorageContext , StorageService } from '@/lib/uploads'
12+ import { uploadExecutionFile } from '@/lib/uploads/contexts/execution'
1213import { UPLOAD_DIR_SERVER } from '@/lib/uploads/core/setup.server'
1314import { getFileMetadataByKey } from '@/lib/uploads/server/metadata'
1415import {
@@ -21,6 +22,7 @@ import {
2122} from '@/lib/uploads/utils/file-utils'
2223import { getUserEntityPermissions } from '@/lib/workspaces/permissions/utils'
2324import { verifyFileAccess } from '@/app/api/files/authorization'
25+ import type { UserFile } from '@/executor/types'
2426import '@/lib/uploads/core/setup.server'
2527
2628export const dynamic = 'force-dynamic'
@@ -30,13 +32,20 @@ const logger = createLogger('FilesParseAPI')
3032const MAX_DOWNLOAD_SIZE_BYTES = 100 * 1024 * 1024 // 100 MB
3133const DOWNLOAD_TIMEOUT_MS = 30000 // 30 seconds
3234
35+ interface ExecutionContext {
36+ workspaceId : string
37+ workflowId : string
38+ executionId : string
39+ }
40+
3341interface ParseResult {
3442 success : boolean
3543 content ?: string
3644 error ?: string
3745 filePath : string
3846 originalName ?: string // Original filename from database (for workspace files)
3947 viewerUrl ?: string | null // Viewer URL for the file if available
48+ userFile ?: UserFile // UserFile object for the raw file
4049 metadata ?: {
4150 fileType : string
4251 size : number
@@ -70,27 +79,45 @@ export async function POST(request: NextRequest) {
7079
7180 const userId = authResult . userId
7281 const requestData = await request . json ( )
73- const { filePath, fileType, workspaceId } = requestData
82+ const { filePath, fileType, workspaceId, workflowId , executionId } = requestData
7483
7584 if ( ! filePath || ( typeof filePath === 'string' && filePath . trim ( ) === '' ) ) {
7685 return NextResponse . json ( { success : false , error : 'No file path provided' } , { status : 400 } )
7786 }
7887
79- logger . info ( 'File parse request received:' , { filePath, fileType, workspaceId, userId } )
88+ // Build execution context if all required fields are present
89+ const executionContext : ExecutionContext | undefined =
90+ workspaceId && workflowId && executionId
91+ ? { workspaceId, workflowId, executionId }
92+ : undefined
93+
94+ logger . info ( 'File parse request received:' , {
95+ filePath,
96+ fileType,
97+ workspaceId,
98+ userId,
99+ hasExecutionContext : ! ! executionContext ,
100+ } )
80101
81102 if ( Array . isArray ( filePath ) ) {
82103 const results = [ ]
83- for ( const path of filePath ) {
84- if ( ! path || ( typeof path === 'string' && path . trim ( ) === '' ) ) {
104+ for ( const singlePath of filePath ) {
105+ if ( ! singlePath || ( typeof singlePath === 'string' && singlePath . trim ( ) === '' ) ) {
85106 results . push ( {
86107 success : false ,
87108 error : 'Empty file path in array' ,
88- filePath : path || '' ,
109+ filePath : singlePath || '' ,
89110 } )
90111 continue
91112 }
92113
93- const result = await parseFileSingle ( path , fileType , workspaceId , userId )
114+ const result = await parseFileSingle (
115+ singlePath ,
116+ fileType ,
117+ workspaceId ,
118+ userId ,
119+ executionContext
120+ )
94121 if ( result . metadata ) {
95122 result . metadata . processingTime = Date . now ( ) - startTime
96123 }
@@ -106,6 +133,7 @@ export async function POST(request: NextRequest) {
106133 fileType : result . metadata ?. fileType || 'application/octet-stream' ,
107134 size : result . metadata ?. size || 0 ,
108135 binary : false ,
136+ file : result . userFile ,
109137 } ,
110138 filePath : result . filePath ,
111139 viewerUrl : result . viewerUrl ,
@@ -121,7 +149,7 @@ export async function POST(request: NextRequest) {
121149 } )
122150 }
123151
124- const result = await parseFileSingle ( filePath , fileType , workspaceId , userId )
152+ const result = await parseFileSingle ( filePath , fileType , workspaceId , userId , executionContext )
125153
126154 if ( result . metadata ) {
127155 result . metadata . processingTime = Date . now ( ) - startTime
@@ -137,6 +165,7 @@ export async function POST(request: NextRequest) {
137165 fileType : result . metadata ?. fileType || 'application/octet-stream' ,
138166 size : result . metadata ?. size || 0 ,
139167 binary : false ,
168+ file : result . userFile ,
140169 } ,
141170 filePath : result . filePath ,
142171 viewerUrl : result . viewerUrl ,
@@ -164,7 +193,8 @@ async function parseFileSingle(
164193 filePath : string ,
165194 fileType : string ,
166195 workspaceId : string ,
167- userId : string
196+ userId : string ,
197+ executionContext ?: ExecutionContext
168198) : Promise < ParseResult > {
169199 logger . info ( 'Parsing file:' , filePath )
170200
@@ -186,18 +216,18 @@ async function parseFileSingle(
186216 }
187217
188218 if ( filePath . includes ( '/api/files/serve/' ) ) {
189- return handleCloudFile ( filePath , fileType , undefined , userId )
219+ return handleCloudFile ( filePath , fileType , undefined , userId , executionContext )
190220 }
191221
192222 if ( filePath . startsWith ( 'http://' ) || filePath . startsWith ( 'https://' ) ) {
193- return handleExternalUrl ( filePath , fileType , workspaceId , userId )
223+ return handleExternalUrl ( filePath , fileType , workspaceId , userId , executionContext )
194224 }
195225
196226 if ( isUsingCloudStorage ( ) ) {
197- return handleCloudFile ( filePath , fileType , undefined , userId )
227+ return handleCloudFile ( filePath , fileType , undefined , userId , executionContext )
198228 }
199229
200- return handleLocalFile ( filePath , fileType , userId )
230+ return handleLocalFile ( filePath , fileType , userId , executionContext )
201231}
202232
203233/**
@@ -230,12 +260,14 @@ function validateFilePath(filePath: string): { isValid: boolean; error?: string
230260/**
231261 * Handle external URL
232262 * If workspaceId is provided, checks if file already exists and saves to workspace if not
263+ * If executionContext is provided, also stores the file in execution storage and returns UserFile
233264 */
234265async function handleExternalUrl (
235266 url : string ,
236267 fileType : string ,
237268 workspaceId : string ,
238- userId : string
269+ userId : string ,
270+ executionContext ?: ExecutionContext
239271) : Promise < ParseResult > {
240272 try {
241273 logger . info ( 'Fetching external URL:' , url )
@@ -312,17 +344,13 @@ async function handleExternalUrl(
312344
313345 if ( existingFile ) {
314346 const storageFilePath = `/api/files/serve/${ existingFile . key } `
315- return handleCloudFile ( storageFilePath , fileType , 'workspace' , userId )
347+ return handleCloudFile ( storageFilePath , fileType , 'workspace' , userId , executionContext )
316348 }
317349 }
318350 }
319351
320- const pinnedUrl = createPinnedUrl ( url , urlValidation . resolvedIP ! )
321- const response = await fetch ( pinnedUrl , {
322- signal : AbortSignal . timeout ( DOWNLOAD_TIMEOUT_MS ) ,
323- headers : {
324- Host : urlValidation . originalHostname ! ,
325- } ,
352+ const response = await secureFetchWithPinnedIP ( url , urlValidation . resolvedIP ! , {
353+ timeout : DOWNLOAD_TIMEOUT_MS ,
326354 } )
327355 if ( ! response . ok ) {
328356 throw new Error ( `Failed to fetch URL: ${ response . status } ${ response . statusText } ` )
@@ -341,6 +369,19 @@ async function handleExternalUrl(
341369
342370 logger . info ( `Downloaded file from URL: ${ url } , size: ${ buffer . length } bytes` )
343371
372+ let userFile : UserFile | undefined
373+ const mimeType = response . headers . get ( 'content-type' ) || getMimeTypeFromExtension ( extension )
374+
375+ if ( executionContext ) {
376+ try {
377+ userFile = await uploadExecutionFile ( executionContext , buffer , filename , mimeType , userId )
378+ logger . info ( `Stored file in execution storage: ${ filename } ` , { key : userFile . key } )
379+ } catch ( uploadError ) {
380+ logger . warn ( `Failed to store file in execution storage:` , uploadError )
381+ // Continue without userFile - parsing can still work
382+ }
383+ }
384+
344385 if ( shouldCheckWorkspace ) {
345386 try {
346387 const permission = await getUserEntityPermissions ( userId , 'workspace' , workspaceId )
@@ -353,8 +394,6 @@ async function handleExternalUrl(
353394 } )
354395 } else {
355396 const { uploadWorkspaceFile } = await import ( '@/lib/uploads/contexts/workspace' )
356- const mimeType =
357- response . headers . get ( 'content-type' ) || getMimeTypeFromExtension ( extension )
358397 await uploadWorkspaceFile ( workspaceId , userId , buffer , filename , mimeType )
359398 logger . info ( `Saved URL file to workspace storage: ${ filename } ` )
360399 }
@@ -363,17 +402,23 @@ async function handleExternalUrl(
363402 }
364403 }
365404
405+ let parseResult : ParseResult
366406 if ( extension === 'pdf' ) {
367- return await handlePdfBuffer ( buffer , filename , fileType , url )
368- }
369- if ( extension === 'csv' ) {
370- return await handleCsvBuffer ( buffer , filename , fileType , url )
407+ parseResult = await handlePdfBuffer ( buffer , filename , fileType , url )
408+ } else if ( extension === 'csv' ) {
409+ parseResult = await handleCsvBuffer ( buffer , filename , fileType , url )
410+ } else if ( isSupportedFileType ( extension ) ) {
411+ parseResult = await handleGenericTextBuffer ( buffer , filename , extension , fileType , url )
412+ } else {
413+ parseResult = handleGenericBuffer ( buffer , filename , extension , fileType )
371414 }
372- if ( isSupportedFileType ( extension ) ) {
373- return await handleGenericTextBuffer ( buffer , filename , extension , fileType , url )
415+
416+ // Attach userFile to the result
417+ if ( userFile ) {
418+ parseResult . userFile = userFile
374419 }
375420
376- return handleGenericBuffer ( buffer , filename , extension , fileType )
421+ return parseResult
377422 } catch ( error ) {
378423 logger . error ( `Error handling external URL ${ url } :` , error )
379424 return {
@@ -386,12 +431,15 @@ async function handleExternalUrl(
386431
387432/**
388433 * Handle file stored in cloud storage
434+ * If executionContext is provided and file is not already from execution storage,
435+ * copies the file to execution storage and returns UserFile
389436 */
390437async function handleCloudFile (
391438 filePath : string ,
392439 fileType : string ,
393440 explicitContext : string | undefined ,
394- userId : string
441+ userId : string ,
442+ executionContext ?: ExecutionContext
395443) : Promise < ParseResult > {
396444 try {
397445 const cloudKey = extractStorageKey ( filePath )
@@ -438,6 +486,7 @@ async function handleCloudFile(
438486
439487 const filename = originalFilename || cloudKey . split ( '/' ) . pop ( ) || cloudKey
440488 const extension = path . extname ( filename ) . toLowerCase ( ) . substring ( 1 )
489+ const mimeType = getMimeTypeFromExtension ( extension )
441490
442491 const normalizedFilePath = `/api/files/serve/${ encodeURIComponent ( cloudKey ) } ?context=${ context } `
443492 let workspaceIdFromKey : string | undefined
@@ -453,6 +502,39 @@ async function handleCloudFile(
453502
454503 const viewerUrl = getViewerUrl ( cloudKey , workspaceIdFromKey )
455504
505+ // Store file in execution storage if executionContext is provided
506+ let userFile : UserFile | undefined
507+
508+ if ( executionContext ) {
509+ // If file is already from execution context, create UserFile reference without re-uploading
510+ if ( context === 'execution' ) {
511+ userFile = {
512+ id : `file_${ Date . now ( ) } _${ Math . random ( ) . toString ( 36 ) . substring ( 2 , 9 ) } ` ,
513+ name : filename ,
514+ url : normalizedFilePath ,
515+ size : fileBuffer . length ,
516+ type : mimeType ,
517+ key : cloudKey ,
518+ context : 'execution' ,
519+ }
520+ logger . info ( `Created UserFile reference for existing execution file: ${ filename } ` )
521+ } else {
522+ // Copy from workspace/other storage to execution storage
523+ try {
524+ userFile = await uploadExecutionFile (
525+ executionContext ,
526+ fileBuffer ,
527+ filename ,
528+ mimeType ,
529+ userId
530+ )
531+ logger . info ( `Copied file to execution storage: ${ filename } ` , { key : userFile . key } )
532+ } catch ( uploadError ) {
533+ logger . warn ( `Failed to copy file to execution storage:` , uploadError )
534+ }
535+ }
536+ }
537+
456538 let parseResult : ParseResult
457539 if ( extension === 'pdf' ) {
458540 parseResult = await handlePdfBuffer ( fileBuffer , filename , fileType , normalizedFilePath )
@@ -477,6 +559,11 @@ async function handleCloudFile(
477559
478560 parseResult . viewerUrl = viewerUrl
479561
562+ // Attach userFile to the result
563+ if ( userFile ) {
564+ parseResult . userFile = userFile
565+ }
566+
480567 return parseResult
481568 } catch ( error ) {
482569 logger . error ( `Error handling cloud file ${ filePath } :` , error )
@@ -500,7 +587,8 @@ async function handleCloudFile(
500587async function handleLocalFile (
501588 filePath : string ,
502589 fileType : string ,
503- userId : string
590+ userId : string ,
591+ executionContext ?: ExecutionContext
504592) : Promise < ParseResult > {
505593 try {
506594 const filename = filePath . split ( '/' ) . pop ( ) || filePath
@@ -540,13 +628,32 @@ async function handleLocalFile(
540628 const hash = createHash ( 'md5' ) . update ( fileBuffer ) . digest ( 'hex' )
541629
542630 const extension = path . extname ( filename ) . toLowerCase ( ) . substring ( 1 )
631+ const mimeType = fileType || getMimeTypeFromExtension ( extension )
632+
633+ // Store file in execution storage if executionContext is provided
634+ let userFile : UserFile | undefined
635+ if ( executionContext ) {
636+ try {
637+ userFile = await uploadExecutionFile (
638+ executionContext ,
639+ fileBuffer ,
640+ filename ,
641+ mimeType ,
642+ userId
643+ )
644+ logger . info ( `Stored local file in execution storage: ${ filename } ` , { key : userFile . key } )
645+ } catch ( uploadError ) {
646+ logger . warn ( `Failed to store local file in execution storage:` , uploadError )
647+ }
648+ }
543649
544650 return {
545651 success : true ,
546652 content : result . content ,
547653 filePath,
654+ userFile,
548655 metadata : {
549- fileType : fileType || getMimeTypeFromExtension ( extension ) ,
656+ fileType : mimeType ,
550657 size : stats . size ,
551658 hash,
552659 processingTime : 0 ,
0 commit comments