Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion apps/sim/app/api/copilot/execute-tool/route.ts
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,7 @@ export async function POST(req: NextRequest) {
hasApiKey: !!executionParams.apiKey,
})

const result = await executeTool(resolvedToolName, executionParams, true)
const result = await executeTool(resolvedToolName, executionParams)

logger.info(`[${tracker.requestId}] Tool execution complete`, {
toolName,
Expand Down
173 changes: 140 additions & 33 deletions apps/sim/app/api/files/parse/route.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,10 @@ import { createLogger } from '@sim/logger'
import binaryExtensionsList from 'binary-extensions'
import { type NextRequest, NextResponse } from 'next/server'
import { checkHybridAuth } from '@/lib/auth/hybrid'
import { createPinnedUrl, validateUrlWithDNS } from '@/lib/core/security/input-validation'
import { secureFetchWithPinnedIP, validateUrlWithDNS } from '@/lib/core/security/input-validation'
import { isSupportedFileType, parseFile } from '@/lib/file-parsers'
import { isUsingCloudStorage, type StorageContext, StorageService } from '@/lib/uploads'
import { uploadExecutionFile } from '@/lib/uploads/contexts/execution'
import { UPLOAD_DIR_SERVER } from '@/lib/uploads/core/setup.server'
import { getFileMetadataByKey } from '@/lib/uploads/server/metadata'
import {
Expand All @@ -21,6 +22,7 @@ import {
} from '@/lib/uploads/utils/file-utils'
import { getUserEntityPermissions } from '@/lib/workspaces/permissions/utils'
import { verifyFileAccess } from '@/app/api/files/authorization'
import type { UserFile } from '@/executor/types'
import '@/lib/uploads/core/setup.server'

export const dynamic = 'force-dynamic'
Expand All @@ -30,13 +32,20 @@ const logger = createLogger('FilesParseAPI')
const MAX_DOWNLOAD_SIZE_BYTES = 100 * 1024 * 1024 // 100 MB
const DOWNLOAD_TIMEOUT_MS = 30000 // 30 seconds

interface ExecutionContext {
workspaceId: string
workflowId: string
executionId: string
}

interface ParseResult {
success: boolean
content?: string
error?: string
filePath: string
originalName?: string // Original filename from database (for workspace files)
viewerUrl?: string | null // Viewer URL for the file if available
userFile?: UserFile // UserFile object for the raw file
metadata?: {
fileType: string
size: number
Expand Down Expand Up @@ -70,27 +79,45 @@ export async function POST(request: NextRequest) {

const userId = authResult.userId
const requestData = await request.json()
const { filePath, fileType, workspaceId } = requestData
const { filePath, fileType, workspaceId, workflowId, executionId } = requestData

if (!filePath || (typeof filePath === 'string' && filePath.trim() === '')) {
return NextResponse.json({ success: false, error: 'No file path provided' }, { status: 400 })
}

logger.info('File parse request received:', { filePath, fileType, workspaceId, userId })
// Build execution context if all required fields are present
const executionContext: ExecutionContext | undefined =
workspaceId && workflowId && executionId
? { workspaceId, workflowId, executionId }
: undefined

logger.info('File parse request received:', {
filePath,
fileType,
workspaceId,
userId,
hasExecutionContext: !!executionContext,
})

if (Array.isArray(filePath)) {
const results = []
for (const path of filePath) {
if (!path || (typeof path === 'string' && path.trim() === '')) {
for (const singlePath of filePath) {
if (!singlePath || (typeof singlePath === 'string' && singlePath.trim() === '')) {
results.push({
success: false,
error: 'Empty file path in array',
filePath: path || '',
filePath: singlePath || '',
})
continue
}

const result = await parseFileSingle(path, fileType, workspaceId, userId)
const result = await parseFileSingle(
singlePath,
fileType,
workspaceId,
userId,
executionContext
)
if (result.metadata) {
result.metadata.processingTime = Date.now() - startTime
}
Expand All @@ -106,6 +133,7 @@ export async function POST(request: NextRequest) {
fileType: result.metadata?.fileType || 'application/octet-stream',
size: result.metadata?.size || 0,
binary: false,
file: result.userFile,
},
filePath: result.filePath,
viewerUrl: result.viewerUrl,
Expand All @@ -121,7 +149,7 @@ export async function POST(request: NextRequest) {
})
}

const result = await parseFileSingle(filePath, fileType, workspaceId, userId)
const result = await parseFileSingle(filePath, fileType, workspaceId, userId, executionContext)

if (result.metadata) {
result.metadata.processingTime = Date.now() - startTime
Expand All @@ -137,6 +165,7 @@ export async function POST(request: NextRequest) {
fileType: result.metadata?.fileType || 'application/octet-stream',
size: result.metadata?.size || 0,
binary: false,
file: result.userFile,
},
filePath: result.filePath,
viewerUrl: result.viewerUrl,
Expand Down Expand Up @@ -164,7 +193,8 @@ async function parseFileSingle(
filePath: string,
fileType: string,
workspaceId: string,
userId: string
userId: string,
executionContext?: ExecutionContext
): Promise<ParseResult> {
logger.info('Parsing file:', filePath)

Expand All @@ -186,18 +216,18 @@ async function parseFileSingle(
}

if (filePath.includes('/api/files/serve/')) {
return handleCloudFile(filePath, fileType, undefined, userId)
return handleCloudFile(filePath, fileType, undefined, userId, executionContext)
}

if (filePath.startsWith('http://') || filePath.startsWith('https://')) {
return handleExternalUrl(filePath, fileType, workspaceId, userId)
return handleExternalUrl(filePath, fileType, workspaceId, userId, executionContext)
}

if (isUsingCloudStorage()) {
return handleCloudFile(filePath, fileType, undefined, userId)
return handleCloudFile(filePath, fileType, undefined, userId, executionContext)
}

return handleLocalFile(filePath, fileType, userId)
return handleLocalFile(filePath, fileType, userId, executionContext)
}

/**
Expand Down Expand Up @@ -230,12 +260,14 @@ function validateFilePath(filePath: string): { isValid: boolean; error?: string
/**
* Handle external URL
* If workspaceId is provided, checks if file already exists and saves to workspace if not
* If executionContext is provided, also stores the file in execution storage and returns UserFile
*/
async function handleExternalUrl(
url: string,
fileType: string,
workspaceId: string,
userId: string
userId: string,
executionContext?: ExecutionContext
): Promise<ParseResult> {
try {
logger.info('Fetching external URL:', url)
Expand Down Expand Up @@ -312,17 +344,13 @@ async function handleExternalUrl(

if (existingFile) {
const storageFilePath = `/api/files/serve/${existingFile.key}`
return handleCloudFile(storageFilePath, fileType, 'workspace', userId)
return handleCloudFile(storageFilePath, fileType, 'workspace', userId, executionContext)
}
}
}

const pinnedUrl = createPinnedUrl(url, urlValidation.resolvedIP!)
const response = await fetch(pinnedUrl, {
signal: AbortSignal.timeout(DOWNLOAD_TIMEOUT_MS),
headers: {
Host: urlValidation.originalHostname!,
},
const response = await secureFetchWithPinnedIP(url, urlValidation.resolvedIP!, {
timeout: DOWNLOAD_TIMEOUT_MS,
})
if (!response.ok) {
throw new Error(`Failed to fetch URL: ${response.status} ${response.statusText}`)
Expand All @@ -341,6 +369,19 @@ async function handleExternalUrl(

logger.info(`Downloaded file from URL: ${url}, size: ${buffer.length} bytes`)

let userFile: UserFile | undefined
const mimeType = response.headers.get('content-type') || getMimeTypeFromExtension(extension)

if (executionContext) {
try {
userFile = await uploadExecutionFile(executionContext, buffer, filename, mimeType, userId)
logger.info(`Stored file in execution storage: ${filename}`, { key: userFile.key })
} catch (uploadError) {
logger.warn(`Failed to store file in execution storage:`, uploadError)
// Continue without userFile - parsing can still work
}
}

if (shouldCheckWorkspace) {
try {
const permission = await getUserEntityPermissions(userId, 'workspace', workspaceId)
Expand All @@ -353,8 +394,6 @@ async function handleExternalUrl(
})
} else {
const { uploadWorkspaceFile } = await import('@/lib/uploads/contexts/workspace')
const mimeType =
response.headers.get('content-type') || getMimeTypeFromExtension(extension)
await uploadWorkspaceFile(workspaceId, userId, buffer, filename, mimeType)
logger.info(`Saved URL file to workspace storage: ${filename}`)
}
Expand All @@ -363,17 +402,23 @@ async function handleExternalUrl(
}
}

let parseResult: ParseResult
if (extension === 'pdf') {
return await handlePdfBuffer(buffer, filename, fileType, url)
}
if (extension === 'csv') {
return await handleCsvBuffer(buffer, filename, fileType, url)
parseResult = await handlePdfBuffer(buffer, filename, fileType, url)
} else if (extension === 'csv') {
parseResult = await handleCsvBuffer(buffer, filename, fileType, url)
} else if (isSupportedFileType(extension)) {
parseResult = await handleGenericTextBuffer(buffer, filename, extension, fileType, url)
} else {
parseResult = handleGenericBuffer(buffer, filename, extension, fileType)
}
if (isSupportedFileType(extension)) {
return await handleGenericTextBuffer(buffer, filename, extension, fileType, url)

// Attach userFile to the result
if (userFile) {
parseResult.userFile = userFile
}

return handleGenericBuffer(buffer, filename, extension, fileType)
return parseResult
} catch (error) {
logger.error(`Error handling external URL ${url}:`, error)
return {
Expand All @@ -386,12 +431,15 @@ async function handleExternalUrl(

/**
* Handle file stored in cloud storage
* If executionContext is provided and file is not already from execution storage,
* copies the file to execution storage and returns UserFile
*/
async function handleCloudFile(
filePath: string,
fileType: string,
explicitContext: string | undefined,
userId: string
userId: string,
executionContext?: ExecutionContext
): Promise<ParseResult> {
try {
const cloudKey = extractStorageKey(filePath)
Expand Down Expand Up @@ -438,6 +486,7 @@ async function handleCloudFile(

const filename = originalFilename || cloudKey.split('/').pop() || cloudKey
const extension = path.extname(filename).toLowerCase().substring(1)
const mimeType = getMimeTypeFromExtension(extension)

const normalizedFilePath = `/api/files/serve/${encodeURIComponent(cloudKey)}?context=${context}`
let workspaceIdFromKey: string | undefined
Expand All @@ -453,6 +502,39 @@ async function handleCloudFile(

const viewerUrl = getViewerUrl(cloudKey, workspaceIdFromKey)

// Store file in execution storage if executionContext is provided
let userFile: UserFile | undefined

if (executionContext) {
// If file is already from execution context, create UserFile reference without re-uploading
if (context === 'execution') {
userFile = {
id: `file_${Date.now()}_${Math.random().toString(36).substring(2, 9)}`,
name: filename,
url: normalizedFilePath,
size: fileBuffer.length,
type: mimeType,
key: cloudKey,
context: 'execution',
}
logger.info(`Created UserFile reference for existing execution file: ${filename}`)
} else {
// Copy from workspace/other storage to execution storage
try {
userFile = await uploadExecutionFile(
executionContext,
fileBuffer,
filename,
mimeType,
userId
)
logger.info(`Copied file to execution storage: ${filename}`, { key: userFile.key })
} catch (uploadError) {
logger.warn(`Failed to copy file to execution storage:`, uploadError)
}
}
}

let parseResult: ParseResult
if (extension === 'pdf') {
parseResult = await handlePdfBuffer(fileBuffer, filename, fileType, normalizedFilePath)
Expand All @@ -477,6 +559,11 @@ async function handleCloudFile(

parseResult.viewerUrl = viewerUrl

// Attach userFile to the result
if (userFile) {
parseResult.userFile = userFile
}

return parseResult
} catch (error) {
logger.error(`Error handling cloud file ${filePath}:`, error)
Expand All @@ -500,7 +587,8 @@ async function handleCloudFile(
async function handleLocalFile(
filePath: string,
fileType: string,
userId: string
userId: string,
executionContext?: ExecutionContext
): Promise<ParseResult> {
try {
const filename = filePath.split('/').pop() || filePath
Expand Down Expand Up @@ -540,13 +628,32 @@ async function handleLocalFile(
const hash = createHash('md5').update(fileBuffer).digest('hex')

const extension = path.extname(filename).toLowerCase().substring(1)
const mimeType = fileType || getMimeTypeFromExtension(extension)

// Store file in execution storage if executionContext is provided
let userFile: UserFile | undefined
if (executionContext) {
try {
userFile = await uploadExecutionFile(
executionContext,
fileBuffer,
filename,
mimeType,
userId
)
logger.info(`Stored local file in execution storage: ${filename}`, { key: userFile.key })
} catch (uploadError) {
logger.warn(`Failed to store local file in execution storage:`, uploadError)
}
}

return {
success: true,
content: result.content,
filePath,
userFile,
metadata: {
fileType: fileType || getMimeTypeFromExtension(extension),
fileType: mimeType,
size: stats.size,
hash,
processingTime: 0,
Expand Down
Loading