From 4012bb7eb3127ccedffc5d70dbae936e843c989a Mon Sep 17 00:00:00 2001 From: Abimael Martell Date: Fri, 30 Jan 2026 18:17:17 -0800 Subject: [PATCH 01/11] Add agent command for AI-powered web data extraction Implement the agent endpoint that allows users to extract data from the web using natural language prompts. Supports structured output via JSON schemas, URL targeting, credit limits, and async job handling with polling. Co-Authored-By: Claude Opus 4.5 --- src/commands/agent.ts | 293 ++++++++++++++++++++++++++++++++++++++++++ src/index.ts | 102 ++++++++++++++- src/types/agent.ts | 63 +++++++++ 3 files changed, 457 insertions(+), 1 deletion(-) create mode 100644 src/commands/agent.ts create mode 100644 src/types/agent.ts diff --git a/src/commands/agent.ts b/src/commands/agent.ts new file mode 100644 index 0000000..4689e45 --- /dev/null +++ b/src/commands/agent.ts @@ -0,0 +1,293 @@ +/** + * Agent command implementation + */ + +import type { + AgentOptions, + AgentResult, + AgentStatusResult, +} from '../types/agent'; +import { getClient } from '../utils/client'; +import { isJobId } from '../utils/job'; +import { writeOutput } from '../utils/output'; +import { readFileSync } from 'fs'; + +/** + * Load schema from file + */ +function loadSchemaFromFile(filePath: string): Record { + try { + const content = readFileSync(filePath, 'utf-8'); + return JSON.parse(content); + } catch (error) { + if ((error as NodeJS.ErrnoException).code === 'ENOENT') { + throw new Error(`Schema file not found: ${filePath}`); + } + if (error instanceof SyntaxError) { + throw new Error(`Invalid JSON in schema file: ${filePath}`); + } + throw error; + } +} + +/** + * Execute agent status check + */ +async function checkAgentStatus( + jobId: string, + options: AgentOptions +): Promise { + try { + const app = getClient({ apiKey: options.apiKey, apiUrl: options.apiUrl }); + const status = await app.getAgentStatus(jobId); + + return { + success: status.success, + data: { + id: jobId, + status: status.status, + data: status.data, + creditsUsed: status.creditsUsed, + expiresAt: status.expiresAt, + }, + }; + } catch (error) { + return { + success: false, + error: error instanceof Error ? error.message : 'Unknown error occurred', + }; + } +} + +/** + * Execute agent command + */ +export async function executeAgent( + options: AgentOptions +): Promise { + try { + const app = getClient({ apiKey: options.apiKey, apiUrl: options.apiUrl }); + const { prompt, status, wait, pollInterval, timeout } = options; + + // If status flag is set or input looks like a job ID, check status + if (status || isJobId(prompt)) { + return await checkAgentStatus(prompt, options); + } + + // Load schema from file if specified + let schema: Record | undefined = options.schema as + | Record + | undefined; + if (options.schemaFile) { + schema = loadSchemaFromFile(options.schemaFile); + } + + // Build agent options + const agentParams: { + prompt: string; + urls?: string[]; + schema?: Record; + maxCredits?: number; + pollInterval?: number; + timeout?: number; + } = { + prompt, + }; + + if (options.urls && options.urls.length > 0) { + agentParams.urls = options.urls; + } + if (schema) { + agentParams.schema = schema; + } + if (options.maxCredits !== undefined) { + agentParams.maxCredits = options.maxCredits; + } + + // If wait mode, use the convenience agent method with polling + if (wait) { + // Set polling options + if (pollInterval !== undefined) { + agentParams.pollInterval = pollInterval * 1000; // Convert to milliseconds + } else { + agentParams.pollInterval = 5000; // Default: 5 seconds + } + if (timeout !== undefined) { + agentParams.timeout = timeout * 1000; // Convert to milliseconds + } + + // Show progress if requested - use custom polling for better UX + if (options.progress) { + // Start agent first + const response = await app.startAgent(agentParams); + const jobId = response.id; + + process.stderr.write(`Starting agent...\n`); + process.stderr.write(`Job ID: ${jobId}\n`); + + // Poll for status with progress updates + const pollMs = agentParams.pollInterval || 5000; + const startTime = Date.now(); + const timeoutMs = timeout ? timeout * 1000 : undefined; + + while (true) { + await new Promise((resolve) => setTimeout(resolve, pollMs)); + + const agentStatus = await app.getAgentStatus(jobId); + + // Show progress + process.stderr.write(`\rStatus: ${agentStatus.status}`); + + if ( + agentStatus.status === 'completed' || + agentStatus.status === 'failed' + ) { + process.stderr.write('\n'); + return { + success: agentStatus.success, + data: { + id: jobId, + status: agentStatus.status, + data: agentStatus.data, + creditsUsed: agentStatus.creditsUsed, + expiresAt: agentStatus.expiresAt, + }, + }; + } + + // Check timeout + if (timeoutMs && Date.now() - startTime > timeoutMs) { + process.stderr.write('\n'); + return { + success: false, + error: `Timeout after ${timeout} seconds. Agent still processing.`, + }; + } + } + } else { + // Use SDK's built-in polling (no progress display) + const agentResponse = await app.agent(agentParams); + return { + success: agentResponse.success, + data: { + id: '', + status: agentResponse.status, + data: agentResponse.data, + creditsUsed: agentResponse.creditsUsed, + expiresAt: agentResponse.expiresAt, + }, + }; + } + } + + // Otherwise, start agent and return job ID + const response = await app.startAgent(agentParams); + + return { + success: response.success, + data: { + jobId: response.id, + status: 'processing', + }, + }; + } catch (error) { + return { + success: false, + error: error instanceof Error ? error.message : 'Unknown error occurred', + }; + } +} + +/** + * Format agent status in human-readable way + */ +function formatAgentStatus(data: AgentStatusResult['data']): string { + if (!data) return ''; + + const lines: string[] = []; + lines.push(`Job ID: ${data.id}`); + lines.push(`Status: ${data.status}`); + + if (data.creditsUsed !== undefined) { + lines.push(`Credits Used: ${data.creditsUsed}`); + } + + if (data.expiresAt) { + const expiresDate = new Date(data.expiresAt); + lines.push( + `Expires: ${expiresDate.toLocaleString('en-US', { + year: 'numeric', + month: 'short', + day: 'numeric', + hour: '2-digit', + minute: '2-digit', + })}` + ); + } + + if (data.data) { + lines.push(''); + lines.push('Result:'); + lines.push(JSON.stringify(data.data, null, 2)); + } + + return lines.join('\n') + '\n'; +} + +/** + * Handle agent command output + */ +export async function handleAgentCommand(options: AgentOptions): Promise { + const result = await executeAgent(options); + + if (!result.success) { + console.error('Error:', result.error); + process.exit(1); + } + + // Handle status result (completed agent job with data) + if ('data' in result && result.data && 'data' in result.data) { + const statusResult = result as AgentStatusResult; + if (statusResult.data) { + let outputContent: string; + + if (options.json) { + // JSON format + outputContent = options.pretty + ? JSON.stringify({ success: true, ...statusResult.data }, null, 2) + : JSON.stringify({ success: true, ...statusResult.data }); + } else { + // Human-readable format + outputContent = formatAgentStatus(statusResult.data); + } + + writeOutput(outputContent, options.output, !!options.output); + return; + } + } + + // Handle agent start result (job ID) + const agentResult = result as AgentResult; + if (!agentResult.data) { + return; + } + + let outputContent: string; + + if ('jobId' in agentResult.data) { + const jobData = { + jobId: agentResult.data.jobId, + status: agentResult.data.status, + }; + + outputContent = options.pretty + ? JSON.stringify({ success: true, data: jobData }, null, 2) + : JSON.stringify({ success: true, data: jobData }); + } else { + outputContent = options.pretty + ? JSON.stringify(agentResult.data, null, 2) + : JSON.stringify(agentResult.data); + } + + writeOutput(outputContent, options.output, !!options.output); +} diff --git a/src/index.ts b/src/index.ts index 17f40ed..a377cc5 100644 --- a/src/index.ts +++ b/src/index.ts @@ -14,6 +14,7 @@ import { handleCreditUsageCommand } from './commands/credit-usage'; import { handleCrawlCommand } from './commands/crawl'; import { handleMapCommand } from './commands/map'; import { handleSearchCommand } from './commands/search'; +import { handleAgentCommand } from './commands/agent'; import { handleVersionCommand } from './commands/version'; import { handleLoginCommand } from './commands/login'; import { handleLogoutCommand } from './commands/logout'; @@ -35,6 +36,7 @@ const AUTH_REQUIRED_COMMANDS = [ 'crawl', 'map', 'search', + 'agent', 'credit-usage', ]; @@ -460,10 +462,108 @@ function createSearchCommand(): Command { return searchCmd; } -// Add crawl, map, and search commands to main program +/** + * Create and configure the agent command + */ +function createAgentCommand(): Command { + const agentCmd = new Command('agent') + .description('Run an AI agent to extract data from the web') + .argument( + '', + 'Natural language prompt describing data to extract, or job ID to check status' + ) + .option('--urls ', 'Comma-separated URLs to focus extraction on') + .option( + '--schema ', + 'JSON schema for structured output (inline JSON string)' + ) + .option( + '--schema-file ', + 'Path to JSON schema file for structured output' + ) + .option( + '--max-credits ', + 'Maximum credits to spend (job fails if exceeded)', + parseInt + ) + .option('--status', 'Check status of existing agent job', false) + .option( + '--wait', + 'Wait for agent to complete before returning results', + false + ) + .option( + '--poll-interval ', + 'Polling interval in seconds when waiting (default: 5)', + parseFloat + ) + .option( + '--timeout ', + 'Timeout in seconds when waiting (default: no timeout)', + parseFloat + ) + .option('--progress', 'Show progress while waiting', false) + .option( + '-k, --api-key ', + 'Firecrawl API key (overrides global --api-key)' + ) + .option('--api-url ', 'API URL (overrides global --api-url)') + .option('-o, --output ', 'Output file path (default: stdout)') + .option('--json', 'Output as JSON format', false) + .option('--pretty', 'Pretty print JSON output', false) + .action(async (promptOrJobId, options) => { + // Auto-detect if it's a job ID (UUID format) + const isStatusCheck = options.status || isJobId(promptOrJobId); + + // Parse URLs + let urls: string[] | undefined; + if (options.urls) { + urls = options.urls + .split(',') + .map((u: string) => u.trim()) + .filter((u: string) => u.length > 0); + } + + // Parse inline schema + let schema: Record | undefined; + if (options.schema) { + try { + schema = JSON.parse(options.schema) as Record; + } catch { + console.error('Error: Invalid JSON in --schema option'); + process.exit(1); + } + } + + const agentOptions = { + prompt: promptOrJobId, + urls, + schema, + schemaFile: options.schemaFile, + maxCredits: options.maxCredits, + status: isStatusCheck, + wait: options.wait, + pollInterval: options.pollInterval, + timeout: options.timeout, + progress: options.progress, + apiKey: options.apiKey, + apiUrl: options.apiUrl, + output: options.output, + json: options.json, + pretty: options.pretty, + }; + + await handleAgentCommand(agentOptions); + }); + + return agentCmd; +} + +// Add crawl, map, search, and agent commands to main program program.addCommand(createCrawlCommand()); program.addCommand(createMapCommand()); program.addCommand(createSearchCommand()); +program.addCommand(createAgentCommand()); program .command('config') diff --git a/src/types/agent.ts b/src/types/agent.ts new file mode 100644 index 0000000..a41e639 --- /dev/null +++ b/src/types/agent.ts @@ -0,0 +1,63 @@ +/** + * Types and interfaces for the agent command + */ + +export type AgentModel = 'spark-1-mini' | 'spark-1-pro'; + +export type AgentStatus = 'processing' | 'completed' | 'failed'; + +export interface AgentOptions { + /** Natural language prompt describing the data to extract */ + prompt: string; + /** Model to use: spark-1-mini (default, cheaper) or spark-1-pro (higher accuracy) */ + model?: AgentModel; + /** Specific URLs to focus extraction on */ + urls?: string[]; + /** JSON schema for structured output */ + schema?: Record; + /** Path to JSON schema file */ + schemaFile?: string; + /** Maximum credits to spend (job fails if exceeded) */ + maxCredits?: number; + /** Check status of existing agent job */ + status?: boolean; + /** Wait for agent to complete before returning results */ + wait?: boolean; + /** Polling interval in seconds when waiting */ + pollInterval?: number; + /** Timeout in seconds when waiting */ + timeout?: number; + /** Show progress while waiting */ + progress?: boolean; + /** API key for Firecrawl */ + apiKey?: string; + /** API URL for Firecrawl */ + apiUrl?: string; + /** Output file path */ + output?: string; + /** Pretty print JSON output */ + pretty?: boolean; + /** Force JSON output */ + json?: boolean; +} + +export interface AgentResult { + success: boolean; + data?: { + jobId: string; + status: AgentStatus; + }; + error?: string; +} + +export interface AgentStatusResult { + success: boolean; + data?: { + id: string; + status: AgentStatus; + data?: any; + creditsUsed?: number; + expiresAt?: string; + }; + error?: string; +} From ede7c03185a078dfa6870c92979e3e0e8cd05467 Mon Sep 17 00:00:00 2001 From: Abimael Martell Date: Fri, 30 Jan 2026 18:24:24 -0800 Subject: [PATCH 02/11] Add --model option to agent command Support selecting between fire-1 (default) and fire-1-mini (faster, cheaper) models for agent extraction tasks. Co-Authored-By: Claude Opus 4.5 --- src/commands/agent.ts | 4 ++++ src/index.ts | 5 +++++ src/types/agent.ts | 4 ++-- 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/src/commands/agent.ts b/src/commands/agent.ts index 4689e45..6d42e7d 100644 --- a/src/commands/agent.ts +++ b/src/commands/agent.ts @@ -87,6 +87,7 @@ export async function executeAgent( prompt: string; urls?: string[]; schema?: Record; + model?: string; maxCredits?: number; pollInterval?: number; timeout?: number; @@ -100,6 +101,9 @@ export async function executeAgent( if (schema) { agentParams.schema = schema; } + if (options.model) { + agentParams.model = options.model; + } if (options.maxCredits !== undefined) { agentParams.maxCredits = options.maxCredits; } diff --git a/src/index.ts b/src/index.ts index a377cc5..0c54b95 100644 --- a/src/index.ts +++ b/src/index.ts @@ -473,6 +473,10 @@ function createAgentCommand(): Command { 'Natural language prompt describing data to extract, or job ID to check status' ) .option('--urls ', 'Comma-separated URLs to focus extraction on') + .option( + '--model ', + 'Model to use: fire-1 (default) or fire-1-mini (faster, cheaper)' + ) .option( '--schema ', 'JSON schema for structured output (inline JSON string)' @@ -540,6 +544,7 @@ function createAgentCommand(): Command { urls, schema, schemaFile: options.schemaFile, + model: options.model, maxCredits: options.maxCredits, status: isStatusCheck, wait: options.wait, diff --git a/src/types/agent.ts b/src/types/agent.ts index a41e639..fde236f 100644 --- a/src/types/agent.ts +++ b/src/types/agent.ts @@ -2,14 +2,14 @@ * Types and interfaces for the agent command */ -export type AgentModel = 'spark-1-mini' | 'spark-1-pro'; +export type AgentModel = 'fire-1' | 'fire-1-mini'; export type AgentStatus = 'processing' | 'completed' | 'failed'; export interface AgentOptions { /** Natural language prompt describing the data to extract */ prompt: string; - /** Model to use: spark-1-mini (default, cheaper) or spark-1-pro (higher accuracy) */ + /** Model to use: fire-1 (default) or fire-1-mini (faster, cheaper) */ model?: AgentModel; /** Specific URLs to focus extraction on */ urls?: string[]; From 818b550eca4d51c9edf3a735496bb8d9bdaf85cd Mon Sep 17 00:00:00 2001 From: Abimael Martell Date: Fri, 30 Jan 2026 18:27:36 -0800 Subject: [PATCH 03/11] Update firecrawl-js SDK to 4.12.0 with model support - Upgrade @mendable/firecrawl-js from 4.10.0 to 4.12.0 - Update model types to match SDK: spark-1-mini and spark-1-pro Co-Authored-By: Claude Opus 4.5 --- package.json | 2 +- pnpm-lock.yaml | 10 +++++----- src/commands/agent.ts | 4 ++-- src/index.ts | 2 +- src/types/agent.ts | 4 ++-- 5 files changed, 11 insertions(+), 11 deletions(-) diff --git a/package.json b/package.json index 4d15272..641469d 100644 --- a/package.json +++ b/package.json @@ -64,7 +64,7 @@ "vitest": "^4.0.0" }, "dependencies": { - "@mendable/firecrawl-js": "^4.10.0", + "@mendable/firecrawl-js": "^4.12.0", "commander": "^14.0.2" } } diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index e3e21b6..f61e2fd 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -9,8 +9,8 @@ importers: .: dependencies: '@mendable/firecrawl-js': - specifier: ^4.10.0 - version: 4.10.0 + specifier: ^4.12.0 + version: 4.12.0 commander: specifier: ^14.0.2 version: 14.0.2 @@ -195,8 +195,8 @@ packages: '@jridgewell/sourcemap-codec@1.5.5': resolution: {integrity: sha512-cYQ9310grqxueWbl+WuIUIaiUaDcj7WOq5fVhEljNVgRfOUhY9fy2zTvfoqWsnebh8Sl70VScFbICvJnLKB0Og==} - '@mendable/firecrawl-js@4.10.0': - resolution: {integrity: sha512-40qtKCVY3a1A4Y6t/m5Ar10HbzrWuyCNt7vR3uBh+j14GZC0JoxEkjaFRC00wBmPD9N5JMT4gmTXvzM/SI9enw==} + '@mendable/firecrawl-js@4.12.0': + resolution: {integrity: sha512-Jjawuumet+3htp39PwwhkZhSj2ORR8Tz/HqORoFGngrB3HadMBKoX6SSPIhayRTXwNnDkaU0PWm1SUtRvPdoPw==} engines: {node: '>=22.0.0'} '@rollup/rollup-android-arm-eabi@4.55.1': @@ -972,7 +972,7 @@ snapshots: '@jridgewell/sourcemap-codec@1.5.5': {} - '@mendable/firecrawl-js@4.10.0': + '@mendable/firecrawl-js@4.12.0': dependencies: axios: 1.13.2 typescript-event-target: 1.1.2 diff --git a/src/commands/agent.ts b/src/commands/agent.ts index 6d42e7d..fc8f1ea 100644 --- a/src/commands/agent.ts +++ b/src/commands/agent.ts @@ -87,7 +87,7 @@ export async function executeAgent( prompt: string; urls?: string[]; schema?: Record; - model?: string; + model?: 'spark-1-pro' | 'spark-1-mini'; maxCredits?: number; pollInterval?: number; timeout?: number; @@ -102,7 +102,7 @@ export async function executeAgent( agentParams.schema = schema; } if (options.model) { - agentParams.model = options.model; + agentParams.model = options.model as 'spark-1-pro' | 'spark-1-mini'; } if (options.maxCredits !== undefined) { agentParams.maxCredits = options.maxCredits; diff --git a/src/index.ts b/src/index.ts index 0c54b95..927c115 100644 --- a/src/index.ts +++ b/src/index.ts @@ -475,7 +475,7 @@ function createAgentCommand(): Command { .option('--urls ', 'Comma-separated URLs to focus extraction on') .option( '--model ', - 'Model to use: fire-1 (default) or fire-1-mini (faster, cheaper)' + 'Model to use: spark-1-mini (default, cheaper) or spark-1-pro (higher accuracy)' ) .option( '--schema ', diff --git a/src/types/agent.ts b/src/types/agent.ts index fde236f..f206623 100644 --- a/src/types/agent.ts +++ b/src/types/agent.ts @@ -2,14 +2,14 @@ * Types and interfaces for the agent command */ -export type AgentModel = 'fire-1' | 'fire-1-mini'; +export type AgentModel = 'spark-1-pro' | 'spark-1-mini'; export type AgentStatus = 'processing' | 'completed' | 'failed'; export interface AgentOptions { /** Natural language prompt describing the data to extract */ prompt: string; - /** Model to use: fire-1 (default) or fire-1-mini (faster, cheaper) */ + /** Model to use: spark-1-mini (default, cheaper) or spark-1-pro (higher accuracy) */ model?: AgentModel; /** Specific URLs to focus extraction on */ urls?: string[]; From c1a1ca09627a8dd4dd926c9edd56dfd5cfecf051 Mon Sep 17 00:00:00 2001 From: Abimael Martell Date: Fri, 30 Jan 2026 18:34:20 -0800 Subject: [PATCH 04/11] Add spinner animation for agent command - Add spinner utility for CLI feedback - Show animated spinner while agent is starting and processing - Display success/fail indicators when complete - Remove --progress flag (spinner now always shown) Co-Authored-By: Claude Opus 4.5 --- src/commands/agent.ts | 131 ++++++++++++++++++++---------------------- src/index.ts | 2 - src/types/agent.ts | 2 - src/utils/spinner.ts | 62 ++++++++++++++++++++ 4 files changed, 123 insertions(+), 74 deletions(-) create mode 100644 src/utils/spinner.ts diff --git a/src/commands/agent.ts b/src/commands/agent.ts index fc8f1ea..495cf9d 100644 --- a/src/commands/agent.ts +++ b/src/commands/agent.ts @@ -10,6 +10,7 @@ import type { import { getClient } from '../utils/client'; import { isJobId } from '../utils/job'; import { writeOutput } from '../utils/output'; +import { createSpinner } from '../utils/spinner'; import { readFileSync } from 'fs'; /** @@ -108,85 +109,75 @@ export async function executeAgent( agentParams.maxCredits = options.maxCredits; } - // If wait mode, use the convenience agent method with polling + // If wait mode, use polling with spinner if (wait) { - // Set polling options - if (pollInterval !== undefined) { - agentParams.pollInterval = pollInterval * 1000; // Convert to milliseconds - } else { - agentParams.pollInterval = 5000; // Default: 5 seconds - } - if (timeout !== undefined) { - agentParams.timeout = timeout * 1000; // Convert to milliseconds - } + const spinner = createSpinner('Starting agent...'); + spinner.start(); + + // Start agent first + const response = await app.startAgent(agentParams); + const jobId = response.id; + + spinner.update(`Agent running... (Job ID: ${jobId})`); + + // Poll for status + const pollMs = pollInterval ? pollInterval * 1000 : 5000; + const startTime = Date.now(); + const timeoutMs = timeout ? timeout * 1000 : undefined; + + while (true) { + await new Promise((resolve) => setTimeout(resolve, pollMs)); + + const agentStatus = await app.getAgentStatus(jobId); + + if (agentStatus.status === 'completed') { + spinner.succeed('Agent completed'); + return { + success: agentStatus.success, + data: { + id: jobId, + status: agentStatus.status, + data: agentStatus.data, + creditsUsed: agentStatus.creditsUsed, + expiresAt: agentStatus.expiresAt, + }, + }; + } - // Show progress if requested - use custom polling for better UX - if (options.progress) { - // Start agent first - const response = await app.startAgent(agentParams); - const jobId = response.id; - - process.stderr.write(`Starting agent...\n`); - process.stderr.write(`Job ID: ${jobId}\n`); - - // Poll for status with progress updates - const pollMs = agentParams.pollInterval || 5000; - const startTime = Date.now(); - const timeoutMs = timeout ? timeout * 1000 : undefined; - - while (true) { - await new Promise((resolve) => setTimeout(resolve, pollMs)); - - const agentStatus = await app.getAgentStatus(jobId); - - // Show progress - process.stderr.write(`\rStatus: ${agentStatus.status}`); - - if ( - agentStatus.status === 'completed' || - agentStatus.status === 'failed' - ) { - process.stderr.write('\n'); - return { - success: agentStatus.success, - data: { - id: jobId, - status: agentStatus.status, - data: agentStatus.data, - creditsUsed: agentStatus.creditsUsed, - expiresAt: agentStatus.expiresAt, - }, - }; - } - - // Check timeout - if (timeoutMs && Date.now() - startTime > timeoutMs) { - process.stderr.write('\n'); - return { - success: false, - error: `Timeout after ${timeout} seconds. Agent still processing.`, - }; - } + if (agentStatus.status === 'failed') { + spinner.fail('Agent failed'); + return { + success: false, + data: { + id: jobId, + status: agentStatus.status, + data: agentStatus.data, + creditsUsed: agentStatus.creditsUsed, + expiresAt: agentStatus.expiresAt, + }, + error: agentStatus.error, + }; + } + + // Check timeout + if (timeoutMs && Date.now() - startTime > timeoutMs) { + spinner.fail(`Timeout after ${timeout}s (Job ID: ${jobId})`); + return { + success: false, + error: `Timeout after ${timeout} seconds. Agent still processing. Job ID: ${jobId}`, + }; } - } else { - // Use SDK's built-in polling (no progress display) - const agentResponse = await app.agent(agentParams); - return { - success: agentResponse.success, - data: { - id: '', - status: agentResponse.status, - data: agentResponse.data, - creditsUsed: agentResponse.creditsUsed, - expiresAt: agentResponse.expiresAt, - }, - }; } } // Otherwise, start agent and return job ID + const spinner = createSpinner('Starting agent...'); + spinner.start(); + const response = await app.startAgent(agentParams); + spinner.succeed(`Agent started (Job ID: ${response.id})`); + return { success: response.success, data: { diff --git a/src/index.ts b/src/index.ts index 927c115..180f9c1 100644 --- a/src/index.ts +++ b/src/index.ts @@ -506,7 +506,6 @@ function createAgentCommand(): Command { 'Timeout in seconds when waiting (default: no timeout)', parseFloat ) - .option('--progress', 'Show progress while waiting', false) .option( '-k, --api-key ', 'Firecrawl API key (overrides global --api-key)' @@ -550,7 +549,6 @@ function createAgentCommand(): Command { wait: options.wait, pollInterval: options.pollInterval, timeout: options.timeout, - progress: options.progress, apiKey: options.apiKey, apiUrl: options.apiUrl, output: options.output, diff --git a/src/types/agent.ts b/src/types/agent.ts index f206623..e2a5c9a 100644 --- a/src/types/agent.ts +++ b/src/types/agent.ts @@ -27,8 +27,6 @@ export interface AgentOptions { pollInterval?: number; /** Timeout in seconds when waiting */ timeout?: number; - /** Show progress while waiting */ - progress?: boolean; /** API key for Firecrawl */ apiKey?: string; /** API URL for Firecrawl */ diff --git a/src/utils/spinner.ts b/src/utils/spinner.ts new file mode 100644 index 0000000..f6c07e7 --- /dev/null +++ b/src/utils/spinner.ts @@ -0,0 +1,62 @@ +/** + * Simple spinner utility for CLI feedback + */ + +const SPINNER_FRAMES = ['⠋', '⠙', '⠹', '⠸', '⠼', '⠴', '⠦', '⠧', '⠇', '⠏']; + +export interface Spinner { + start: (message?: string) => void; + update: (message: string) => void; + stop: (finalMessage?: string) => void; + succeed: (message?: string) => void; + fail: (message?: string) => void; +} + +export function createSpinner(initialMessage: string = ''): Spinner { + let frameIndex = 0; + let interval: ReturnType | null = null; + let currentMessage = initialMessage; + + const clearLine = () => { + process.stderr.write('\r\x1b[K'); + }; + + const render = () => { + const frame = SPINNER_FRAMES[frameIndex]; + clearLine(); + process.stderr.write(`${frame} ${currentMessage}`); + frameIndex = (frameIndex + 1) % SPINNER_FRAMES.length; + }; + + return { + start(message?: string) { + if (message) currentMessage = message; + if (interval) return; + render(); + interval = setInterval(render, 80); + }, + + update(message: string) { + currentMessage = message; + }, + + stop(finalMessage?: string) { + if (interval) { + clearInterval(interval); + interval = null; + } + clearLine(); + if (finalMessage) { + process.stderr.write(`${finalMessage}\n`); + } + }, + + succeed(message?: string) { + this.stop(`✓ ${message || currentMessage}`); + }, + + fail(message?: string) { + this.stop(`✗ ${message || currentMessage}`); + }, + }; +} From 1710269aa15a4fa51a79a550d5a7133ff810e1d7 Mon Sep 17 00:00:00 2001 From: Abimael Martell Date: Fri, 30 Jan 2026 18:44:55 -0800 Subject: [PATCH 05/11] Handle Ctrl+C gracefully in agent command When user interrupts with Ctrl+C while waiting for agent completion, show helpful message with the command to check job status later. Co-Authored-By: Claude Opus 4.5 --- src/commands/agent.ts | 98 +++++++++++++++++++++++++------------------ 1 file changed, 57 insertions(+), 41 deletions(-) diff --git a/src/commands/agent.ts b/src/commands/agent.ts index 495cf9d..354ebbe 100644 --- a/src/commands/agent.ts +++ b/src/commands/agent.ts @@ -118,6 +118,15 @@ export async function executeAgent( const response = await app.startAgent(agentParams); const jobId = response.id; + // Handle Ctrl+C gracefully + const handleInterrupt = () => { + spinner.stop(); + process.stderr.write('\n\nInterrupted. Agent is still running.\n'); + process.stderr.write(`Check status with: firecrawl agent ${jobId}\n\n`); + process.exit(0); + }; + process.on('SIGINT', handleInterrupt); + spinner.update(`Agent running... (Job ID: ${jobId})`); // Poll for status @@ -125,48 +134,55 @@ export async function executeAgent( const startTime = Date.now(); const timeoutMs = timeout ? timeout * 1000 : undefined; - while (true) { - await new Promise((resolve) => setTimeout(resolve, pollMs)); - - const agentStatus = await app.getAgentStatus(jobId); - - if (agentStatus.status === 'completed') { - spinner.succeed('Agent completed'); - return { - success: agentStatus.success, - data: { - id: jobId, - status: agentStatus.status, - data: agentStatus.data, - creditsUsed: agentStatus.creditsUsed, - expiresAt: agentStatus.expiresAt, - }, - }; - } - - if (agentStatus.status === 'failed') { - spinner.fail('Agent failed'); - return { - success: false, - data: { - id: jobId, - status: agentStatus.status, - data: agentStatus.data, - creditsUsed: agentStatus.creditsUsed, - expiresAt: agentStatus.expiresAt, - }, - error: agentStatus.error, - }; - } - - // Check timeout - if (timeoutMs && Date.now() - startTime > timeoutMs) { - spinner.fail(`Timeout after ${timeout}s (Job ID: ${jobId})`); - return { - success: false, - error: `Timeout after ${timeout} seconds. Agent still processing. Job ID: ${jobId}`, - }; + try { + while (true) { + await new Promise((resolve) => setTimeout(resolve, pollMs)); + + const agentStatus = await app.getAgentStatus(jobId); + + if (agentStatus.status === 'completed') { + process.removeListener('SIGINT', handleInterrupt); + spinner.succeed('Agent completed'); + return { + success: agentStatus.success, + data: { + id: jobId, + status: agentStatus.status, + data: agentStatus.data, + creditsUsed: agentStatus.creditsUsed, + expiresAt: agentStatus.expiresAt, + }, + }; + } + + if (agentStatus.status === 'failed') { + process.removeListener('SIGINT', handleInterrupt); + spinner.fail('Agent failed'); + return { + success: false, + data: { + id: jobId, + status: agentStatus.status, + data: agentStatus.data, + creditsUsed: agentStatus.creditsUsed, + expiresAt: agentStatus.expiresAt, + }, + error: agentStatus.error, + }; + } + + // Check timeout + if (timeoutMs && Date.now() - startTime > timeoutMs) { + process.removeListener('SIGINT', handleInterrupt); + spinner.fail(`Timeout after ${timeout}s (Job ID: ${jobId})`); + return { + success: false, + error: `Timeout after ${timeout} seconds. Agent still processing. Job ID: ${jobId}`, + }; + } } + } finally { + process.removeListener('SIGINT', handleInterrupt); } } From a8600a9231e89c79d4cbccaddf91e1b25647f29a Mon Sep 17 00:00:00 2001 From: Abimael Martell Date: Fri, 30 Jan 2026 18:46:39 -0800 Subject: [PATCH 06/11] Allow --wait when checking agent job status Now you can use `firecrawl agent --wait` to continuously poll an existing job until it completes, with the same spinner animation and Ctrl+C handling. Co-Authored-By: Claude Opus 4.5 --- src/commands/agent.ts | 97 ++++++++++++++++++++++++++++++++++++++----- 1 file changed, 86 insertions(+), 11 deletions(-) diff --git a/src/commands/agent.ts b/src/commands/agent.ts index 354ebbe..fbec7d3 100644 --- a/src/commands/agent.ts +++ b/src/commands/agent.ts @@ -32,7 +32,7 @@ function loadSchemaFromFile(filePath: string): Record { } /** - * Execute agent status check + * Execute agent status check (with optional wait/polling) */ async function checkAgentStatus( jobId: string, @@ -40,18 +40,93 @@ async function checkAgentStatus( ): Promise { try { const app = getClient({ apiKey: options.apiKey, apiUrl: options.apiUrl }); - const status = await app.getAgentStatus(jobId); - return { - success: status.success, - data: { - id: jobId, - status: status.status, - data: status.data, - creditsUsed: status.creditsUsed, - expiresAt: status.expiresAt, - }, + // If not waiting, just return current status + if (!options.wait) { + const status = await app.getAgentStatus(jobId); + return { + success: status.success, + data: { + id: jobId, + status: status.status, + data: status.data, + creditsUsed: status.creditsUsed, + expiresAt: status.expiresAt, + }, + }; + } + + // Wait mode: poll until completion + const spinner = createSpinner(`Checking agent status...`); + spinner.start(); + + // Handle Ctrl+C gracefully + const handleInterrupt = () => { + spinner.stop(); + process.stderr.write('\n\nInterrupted. Agent may still be running.\n'); + process.stderr.write(`Check status with: firecrawl agent ${jobId}\n\n`); + process.exit(0); }; + process.on('SIGINT', handleInterrupt); + + const pollMs = options.pollInterval ? options.pollInterval * 1000 : 5000; + const startTime = Date.now(); + const timeoutMs = options.timeout ? options.timeout * 1000 : undefined; + + try { + // Check initial status + let agentStatus = await app.getAgentStatus(jobId); + spinner.update(`Agent ${agentStatus.status}... (Job ID: ${jobId})`); + + while (true) { + if (agentStatus.status === 'completed') { + process.removeListener('SIGINT', handleInterrupt); + spinner.succeed('Agent completed'); + return { + success: agentStatus.success, + data: { + id: jobId, + status: agentStatus.status, + data: agentStatus.data, + creditsUsed: agentStatus.creditsUsed, + expiresAt: agentStatus.expiresAt, + }, + }; + } + + if (agentStatus.status === 'failed') { + process.removeListener('SIGINT', handleInterrupt); + spinner.fail('Agent failed'); + return { + success: false, + data: { + id: jobId, + status: agentStatus.status, + data: agentStatus.data, + creditsUsed: agentStatus.creditsUsed, + expiresAt: agentStatus.expiresAt, + }, + error: agentStatus.error, + }; + } + + // Check timeout + if (timeoutMs && Date.now() - startTime > timeoutMs) { + process.removeListener('SIGINT', handleInterrupt); + spinner.fail(`Timeout after ${options.timeout}s`); + return { + success: false, + error: `Timeout after ${options.timeout} seconds. Agent still processing.`, + }; + } + + await new Promise((resolve) => setTimeout(resolve, pollMs)); + agentStatus = await app.getAgentStatus(jobId); + spinner.update(`Agent ${agentStatus.status}... (Job ID: ${jobId})`); + } + } finally { + process.removeListener('SIGINT', handleInterrupt); + } } catch (error) { return { success: false, From c2ec84fed1b2acc539034d30de8c898386df1915 Mon Sep 17 00:00:00 2001 From: Abimael Martell Date: Fri, 30 Jan 2026 18:55:17 -0800 Subject: [PATCH 07/11] Improve error handling in agent command - Stop spinner before showing error messages - Extract detailed error messages from API responses - Show full error details to help debug issues Co-Authored-By: Claude Opus 4.5 --- src/commands/agent.ts | 190 ++++++++++++++++++++++++++---------------- 1 file changed, 118 insertions(+), 72 deletions(-) diff --git a/src/commands/agent.ts b/src/commands/agent.ts index fbec7d3..f319421 100644 --- a/src/commands/agent.ts +++ b/src/commands/agent.ts @@ -13,6 +13,31 @@ import { writeOutput } from '../utils/output'; import { createSpinner } from '../utils/spinner'; import { readFileSync } from 'fs'; +/** + * Extract detailed error message from API errors + */ +function extractErrorMessage(error: unknown): string { + if (error instanceof Error) { + // Check for response data in the error (common in axios/fetch errors) + const anyError = error as any; + if (anyError.response?.data?.error) { + return anyError.response.data.error; + } + if (anyError.response?.data?.message) { + return anyError.response.data.message; + } + if (anyError.response?.data) { + return JSON.stringify(anyError.response.data); + } + // Check for cause + if (anyError.cause) { + return `${error.message}: ${JSON.stringify(anyError.cause)}`; + } + return error.message; + } + return 'Unknown error occurred'; +} + /** * Load schema from file */ @@ -38,11 +63,11 @@ async function checkAgentStatus( jobId: string, options: AgentOptions ): Promise { - try { - const app = getClient({ apiKey: options.apiKey, apiUrl: options.apiUrl }); + const app = getClient({ apiKey: options.apiKey, apiUrl: options.apiUrl }); - // If not waiting, just return current status - if (!options.wait) { + // If not waiting, just return current status + if (!options.wait) { + try { const status = await app.getAgentStatus(jobId); return { success: status.success, @@ -54,84 +79,87 @@ async function checkAgentStatus( expiresAt: status.expiresAt, }, }; + } catch (error) { + return { + success: false, + error: extractErrorMessage(error), + }; } + } - // Wait mode: poll until completion - const spinner = createSpinner(`Checking agent status...`); - spinner.start(); - - // Handle Ctrl+C gracefully - const handleInterrupt = () => { - spinner.stop(); - process.stderr.write('\n\nInterrupted. Agent may still be running.\n'); - process.stderr.write(`Check status with: firecrawl agent ${jobId}\n\n`); - process.exit(0); - }; - process.on('SIGINT', handleInterrupt); - - const pollMs = options.pollInterval ? options.pollInterval * 1000 : 5000; - const startTime = Date.now(); - const timeoutMs = options.timeout ? options.timeout * 1000 : undefined; + // Wait mode: poll until completion + const spinner = createSpinner(`Checking agent status...`); + spinner.start(); - try { - // Check initial status - let agentStatus = await app.getAgentStatus(jobId); - spinner.update(`Agent ${agentStatus.status}... (Job ID: ${jobId})`); + // Handle Ctrl+C gracefully + const handleInterrupt = () => { + spinner.stop(); + process.stderr.write('\n\nInterrupted. Agent may still be running.\n'); + process.stderr.write(`Check status with: firecrawl agent ${jobId}\n\n`); + process.exit(0); + }; + process.on('SIGINT', handleInterrupt); - while (true) { - if (agentStatus.status === 'completed') { - process.removeListener('SIGINT', handleInterrupt); - spinner.succeed('Agent completed'); - return { - success: agentStatus.success, - data: { - id: jobId, - status: agentStatus.status, - data: agentStatus.data, - creditsUsed: agentStatus.creditsUsed, - expiresAt: agentStatus.expiresAt, - }, - }; - } + const pollMs = options.pollInterval ? options.pollInterval * 1000 : 5000; + const startTime = Date.now(); + const timeoutMs = options.timeout ? options.timeout * 1000 : undefined; - if (agentStatus.status === 'failed') { - process.removeListener('SIGINT', handleInterrupt); - spinner.fail('Agent failed'); - return { - success: false, - data: { - id: jobId, - status: agentStatus.status, - data: agentStatus.data, - creditsUsed: agentStatus.creditsUsed, - expiresAt: agentStatus.expiresAt, - }, - error: agentStatus.error, - }; - } + try { + // Check initial status + let agentStatus = await app.getAgentStatus(jobId); + spinner.update(`Agent ${agentStatus.status}... (Job ID: ${jobId})`); + + while (true) { + if (agentStatus.status === 'completed') { + spinner.succeed('Agent completed'); + return { + success: agentStatus.success, + data: { + id: jobId, + status: agentStatus.status, + data: agentStatus.data, + creditsUsed: agentStatus.creditsUsed, + expiresAt: agentStatus.expiresAt, + }, + }; + } - // Check timeout - if (timeoutMs && Date.now() - startTime > timeoutMs) { - process.removeListener('SIGINT', handleInterrupt); - spinner.fail(`Timeout after ${options.timeout}s`); - return { - success: false, - error: `Timeout after ${options.timeout} seconds. Agent still processing.`, - }; - } + if (agentStatus.status === 'failed') { + spinner.fail('Agent failed'); + return { + success: false, + data: { + id: jobId, + status: agentStatus.status, + data: agentStatus.data, + creditsUsed: agentStatus.creditsUsed, + expiresAt: agentStatus.expiresAt, + }, + error: agentStatus.error, + }; + } - await new Promise((resolve) => setTimeout(resolve, pollMs)); - agentStatus = await app.getAgentStatus(jobId); - spinner.update(`Agent ${agentStatus.status}... (Job ID: ${jobId})`); + // Check timeout + if (timeoutMs && Date.now() - startTime > timeoutMs) { + spinner.fail(`Timeout after ${options.timeout}s`); + return { + success: false, + error: `Timeout after ${options.timeout} seconds. Agent still processing.`, + }; } - } finally { - process.removeListener('SIGINT', handleInterrupt); + + await new Promise((resolve) => setTimeout(resolve, pollMs)); + agentStatus = await app.getAgentStatus(jobId); + spinner.update(`Agent ${agentStatus.status}... (Job ID: ${jobId})`); } } catch (error) { + spinner.fail('Failed to check agent status'); return { success: false, - error: error instanceof Error ? error.message : 'Unknown error occurred', + error: extractErrorMessage(error), }; + } finally { + process.removeListener('SIGINT', handleInterrupt); } } @@ -190,7 +218,16 @@ export async function executeAgent( spinner.start(); // Start agent first - const response = await app.startAgent(agentParams); + let response; + try { + response = await app.startAgent(agentParams); + } catch (error) { + spinner.fail('Failed to start agent'); + return { + success: false, + error: extractErrorMessage(error), + }; + } const jobId = response.id; // Handle Ctrl+C gracefully @@ -265,7 +302,16 @@ export async function executeAgent( const spinner = createSpinner('Starting agent...'); spinner.start(); - const response = await app.startAgent(agentParams); + let response; + try { + response = await app.startAgent(agentParams); + } catch (error) { + spinner.fail('Failed to start agent'); + return { + success: false, + error: extractErrorMessage(error), + }; + } spinner.succeed(`Agent started (Job ID: ${response.id})`); @@ -279,7 +325,7 @@ export async function executeAgent( } catch (error) { return { success: false, - error: error instanceof Error ? error.message : 'Unknown error occurred', + error: extractErrorMessage(error), }; } } From 523eba36b7286de016d6b8b319df812ec39e9455 Mon Sep 17 00:00:00 2001 From: Abimael Martell Date: Fri, 30 Jan 2026 19:00:04 -0800 Subject: [PATCH 08/11] Extract error details from Firecrawl SDK errors Show helpful error messages like "Invalid option: expected one of spark-1-pro|spark-1-mini" instead of just "Bad Request". Co-Authored-By: Claude Opus 4.5 --- src/commands/agent.ts | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/src/commands/agent.ts b/src/commands/agent.ts index f319421..e6e5217 100644 --- a/src/commands/agent.ts +++ b/src/commands/agent.ts @@ -18,8 +18,17 @@ import { readFileSync } from 'fs'; */ function extractErrorMessage(error: unknown): string { if (error instanceof Error) { - // Check for response data in the error (common in axios/fetch errors) const anyError = error as any; + + // Handle Firecrawl SDK errors with details array + if (anyError.details && Array.isArray(anyError.details)) { + const messages = anyError.details + .map((d: any) => d.message || JSON.stringify(d)) + .join('; '); + return messages || error.message; + } + + // Check for response data in the error (common in axios/fetch errors) if (anyError.response?.data?.error) { return anyError.response.data.error; } @@ -29,10 +38,7 @@ function extractErrorMessage(error: unknown): string { if (anyError.response?.data) { return JSON.stringify(anyError.response.data); } - // Check for cause - if (anyError.cause) { - return `${error.message}: ${JSON.stringify(anyError.cause)}`; - } + return error.message; } return 'Unknown error occurred'; From f16e86786d1b2e548ab8020355c3b8994000edc0 Mon Sep 17 00:00:00 2001 From: Abimael Martell Date: Fri, 30 Jan 2026 19:00:54 -0800 Subject: [PATCH 09/11] Validate model option before API call Show clear error message for invalid models instead of waiting for API rejection. Co-Authored-By: Claude Opus 4.5 --- src/index.ts | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/index.ts b/src/index.ts index 180f9c1..b427ecd 100644 --- a/src/index.ts +++ b/src/index.ts @@ -538,6 +538,15 @@ function createAgentCommand(): Command { } } + // Validate model + const validModels = ['spark-1-pro', 'spark-1-mini']; + if (options.model && !validModels.includes(options.model)) { + console.error( + `Error: Invalid model "${options.model}". Valid models: ${validModels.join(', ')}` + ); + process.exit(1); + } + const agentOptions = { prompt: promptOrJobId, urls, From 5c46a8194fe8efa362d3d81af8e7ee914ae1d447 Mon Sep 17 00:00:00 2001 From: Abimael Martell Date: Fri, 30 Jan 2026 19:18:34 -0800 Subject: [PATCH 10/11] add agent to skill --- skills/firecrawl-cli/SKILL.md | 63 +++++++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) diff --git a/skills/firecrawl-cli/SKILL.md b/skills/firecrawl-cli/SKILL.md index 46eba9e..bd8c515 100644 --- a/skills/firecrawl-cli/SKILL.md +++ b/skills/firecrawl-cli/SKILL.md @@ -288,3 +288,66 @@ For many URLs, use xargs with `-P` for parallel execution: ```bash cat urls.txt | xargs -P 10 -I {} sh -c 'firecrawl scrape "{}" -o ".firecrawl/$(echo {} | md5).md"' ``` + +### Agent - AI-powered data extraction (use sparingly) + +**IMPORTANT:** Only use `agent` for complex multi-site data enrichment tasks. It takes 1-5 minutes to complete. For most tasks, use `scrape`, `search`, or `map` instead. + +**When to use agent:** + +- Gathering data about entities (companies, products, people) from multiple unknown sources +- Competitive analysis comparing features/pricing across several websites +- Research requiring data synthesis from various sources +- Building lists of entities matching specific criteria +- Single page with very specific extraction needs (e.g., "find the CEO's email and LinkedIn") +- Single page discovery requiring navigation (e.g., "find the pricing for enterprise plan" when buried in subpages) + +**When NOT to use agent:** + +- Basic single page scraping → use `scrape` +- Known website crawling → use `crawl` +- URL discovery → use `map` +- Web search → use `search` +- Any time-sensitive task +- Simple content extraction that `scrape` can handle + +```bash +# Multi-site company research +firecrawl agent "Find Series A fintech startups from YC W24 with funding amounts" --wait -o .firecrawl/yc-fintech.json + +# Competitive pricing analysis +firecrawl agent "Compare pricing plans for Vercel, Netlify, and Cloudflare Pages" --wait -o .firecrawl/pricing.json + +# Focused extraction from specific URLs +firecrawl agent "Extract feature comparison" --urls https://a.com,https://b.com --wait -o .firecrawl/features.json + +# Structured output with schema +firecrawl agent "Find top 10 headless CMS options with pricing" --schema-file schema.json --wait -o .firecrawl/cms.json + +# Higher accuracy for complex tasks +firecrawl agent "Research AI coding assistants market" --model spark-1-pro --wait -o .firecrawl/research.json +``` + +**Agent Options:** + +- `--wait` - Wait for completion (recommended, otherwise returns job ID) +- `--urls ` - Comma-separated URLs to focus extraction on +- `--model ` - spark-1-mini (default, faster) or spark-1-pro (higher accuracy) +- `--schema ` - Inline JSON schema for structured output +- `--schema-file ` - Path to JSON schema file +- `--max-credits ` - Maximum credits to spend +- `--timeout ` - Timeout when waiting +- `-o, --output ` - Save to file + +**Checking job status (if not using --wait):** + +```bash +# Start agent (returns job ID immediately) +firecrawl agent "Find competitors" -o .firecrawl/job.json + +# Check status later +firecrawl agent + +# Wait for existing job to complete +firecrawl agent --wait -o .firecrawl/result.json +``` From ea56ade5dd05006b16008f25a24e51888d2248b9 Mon Sep 17 00:00:00 2001 From: Abimael Martell Date: Tue, 3 Feb 2026 16:35:26 -0800 Subject: [PATCH 11/11] only command for now --- skills/firecrawl-cli/SKILL.md | 63 ----------------------------------- 1 file changed, 63 deletions(-) diff --git a/skills/firecrawl-cli/SKILL.md b/skills/firecrawl-cli/SKILL.md index bd8c515..46eba9e 100644 --- a/skills/firecrawl-cli/SKILL.md +++ b/skills/firecrawl-cli/SKILL.md @@ -288,66 +288,3 @@ For many URLs, use xargs with `-P` for parallel execution: ```bash cat urls.txt | xargs -P 10 -I {} sh -c 'firecrawl scrape "{}" -o ".firecrawl/$(echo {} | md5).md"' ``` - -### Agent - AI-powered data extraction (use sparingly) - -**IMPORTANT:** Only use `agent` for complex multi-site data enrichment tasks. It takes 1-5 minutes to complete. For most tasks, use `scrape`, `search`, or `map` instead. - -**When to use agent:** - -- Gathering data about entities (companies, products, people) from multiple unknown sources -- Competitive analysis comparing features/pricing across several websites -- Research requiring data synthesis from various sources -- Building lists of entities matching specific criteria -- Single page with very specific extraction needs (e.g., "find the CEO's email and LinkedIn") -- Single page discovery requiring navigation (e.g., "find the pricing for enterprise plan" when buried in subpages) - -**When NOT to use agent:** - -- Basic single page scraping → use `scrape` -- Known website crawling → use `crawl` -- URL discovery → use `map` -- Web search → use `search` -- Any time-sensitive task -- Simple content extraction that `scrape` can handle - -```bash -# Multi-site company research -firecrawl agent "Find Series A fintech startups from YC W24 with funding amounts" --wait -o .firecrawl/yc-fintech.json - -# Competitive pricing analysis -firecrawl agent "Compare pricing plans for Vercel, Netlify, and Cloudflare Pages" --wait -o .firecrawl/pricing.json - -# Focused extraction from specific URLs -firecrawl agent "Extract feature comparison" --urls https://a.com,https://b.com --wait -o .firecrawl/features.json - -# Structured output with schema -firecrawl agent "Find top 10 headless CMS options with pricing" --schema-file schema.json --wait -o .firecrawl/cms.json - -# Higher accuracy for complex tasks -firecrawl agent "Research AI coding assistants market" --model spark-1-pro --wait -o .firecrawl/research.json -``` - -**Agent Options:** - -- `--wait` - Wait for completion (recommended, otherwise returns job ID) -- `--urls ` - Comma-separated URLs to focus extraction on -- `--model ` - spark-1-mini (default, faster) or spark-1-pro (higher accuracy) -- `--schema ` - Inline JSON schema for structured output -- `--schema-file ` - Path to JSON schema file -- `--max-credits ` - Maximum credits to spend -- `--timeout ` - Timeout when waiting -- `-o, --output ` - Save to file - -**Checking job status (if not using --wait):** - -```bash -# Start agent (returns job ID immediately) -firecrawl agent "Find competitors" -o .firecrawl/job.json - -# Check status later -firecrawl agent - -# Wait for existing job to complete -firecrawl agent --wait -o .firecrawl/result.json -```