diff --git a/README.md b/README.md index 389deac..1630a95 100644 --- a/README.md +++ b/README.md @@ -4,6 +4,9 @@ The official Node.js SDK for [Maxun](https://maxun.dev) - turn any website into Works with both Maxun Cloud and Maxun Open Source - automatically handles the differences for you. +https://github.com/user-attachments/assets/71a6f10b-5b2a-45dd-9ef7-53d0bcf2b76d + + ## What can you do with Maxun SDK? - **Extract structured data** from any website diff --git a/examples/llm-extraction.ts b/examples/llm-extraction.ts index 7314402..f639512 100644 --- a/examples/llm-extraction.ts +++ b/examples/llm-extraction.ts @@ -6,6 +6,7 @@ * - LLM automatically generates the extraction workflow * - Support for multiple LLM providers (Ollama, Anthropic, OpenAI) * - Creates a reusable robot that can be executed anytime + * - Auto-search: When no URL is provided, the system searches for the website automatically * * Site: Y Combinator Companies (https://www.ycombinator.com/companies) */ @@ -20,34 +21,51 @@ async function main() { }); try { - console.log('Creating robot using LLM extraction...\n'); - - // Use natural language to describe what to extract - // The LLM will automatically generate the workflow - const robot = await extractor.extract( - 'https://www.ycombinator.com/companies', - { - prompt: 'Extract the first 15 YC company names, their descriptions, and batch information', - llmProvider: 'ollama', // or 'anthropic', 'openai' - llmModel: 'llama3.2-vision', // default for ollama - llmBaseUrl: 'http://localhost:11434', // ollama default - robotName: 'YC Companies LLM Extractor' - } - ); - - console.log(`✓ Robot created: ${robot.id}`); + console.log('Example 1: Creating robot with configured URL...\n'); + + const robot = await extractor.extract({ + url: 'https://www.ycombinator.com/companies', + prompt: 'Extract the first 15 YC company names, their descriptions, and batch information', + llmProvider: 'ollama', + llmModel: 'llama3.2-vision', + llmBaseUrl: 'http://localhost:11434', + robotName: 'YC Companies LLM Extractor' + }); + + console.log(`Robot created: ${robot.id}`); // Execute the generated robot console.log('Executing robot...\n'); const result = await robot.run(); - console.log(`✓ Extraction completed!`); + console.log(`Extraction completed!`); console.log(` Status: ${result.status}`); console.log(` Companies extracted: ${result.data.listData?.length || 0}\n`); console.log('First 3 companies:'); console.log(JSON.stringify(result.data.listData?.slice(0, 3), null, 2)); + console.log('\n\nExample 2: Creating robot without configured URL...\n'); + + const autoSearchRobot = await extractor.extract({ + prompt: 'Extract company names and descriptions from the YCombinator Companies page', + llmProvider: 'ollama', + robotName: 'YC Auto-Search Extractor' + }); + + console.log(`Auto-search robot created: ${autoSearchRobot.id}`); + + // Execute the generated robot + console.log('Executing robot...\n'); + const autoSearchResult = await autoSearchRobot.run(); + + console.log(`Extraction completed!`); + console.log(` Status: ${autoSearchResult.status}`); + console.log(` Companies extracted: ${autoSearchResult.data.listData?.length || 0}\n`); + + console.log('First 3 companies:'); + console.log(JSON.stringify(autoSearchResult.data.listData?.slice(0, 3), null, 2)); + // Note: For Anthropic (recommended for best results): // llmProvider: 'anthropic', // llmModel: 'claude-3-5-sonnet-20241022', @@ -73,3 +91,4 @@ if (!process.env.MAXUN_API_KEY) { } main(); + diff --git a/src/client/maxun-client.ts b/src/client/maxun-client.ts index 4fc6709..0933c03 100644 --- a/src/client/maxun-client.ts +++ b/src/client/maxun-client.ts @@ -238,8 +238,10 @@ export class Client { /** * LLM-based extraction - extract data using natural language prompt + * URL is optional - if not provided, the server will search for the target website based on the prompt */ - async extractWithLLM(url: string, options: { + async extractWithLLM(options: { + url?: string; prompt: string; llmProvider?: 'anthropic' | 'openai' | 'ollama'; llmModel?: string; @@ -250,7 +252,7 @@ export class Client { const response = await this.axios.post>( '/extract/llm', { - url, + url: options.url || undefined, prompt: options.prompt, llmProvider: options.llmProvider, llmModel: options.llmModel, diff --git a/src/extract.ts b/src/extract.ts index 8408822..abe893f 100644 --- a/src/extract.ts +++ b/src/extract.ts @@ -75,8 +75,8 @@ export class Extract { * LLM-based extraction - create a robot using natural language prompt * The robot is saved and can be executed anytime by the user * - * @param url - The URL to extract data from * @param options - Extraction options + * @param options.url - (Optional) The URL to extract data from. If not provided, the system will automatically search for the target website based on the prompt. * @param options.prompt - Natural language prompt describing what to extract * @param options.llmProvider - LLM provider to use: 'anthropic', 'openai', or 'ollama' (default: 'ollama') * @param options.llmModel - Model name (default: 'llama3.2-vision' for ollama, 'claude-3-5-sonnet-20241022' for anthropic, 'gpt-4-vision-preview' for openai) @@ -85,7 +85,8 @@ export class Extract { * @param options.robotName - Optional custom name for the robot * @returns Robot instance that can be executed */ - async extract(url: string, options: { + async extract(options: { + url?: string; prompt: string; llmProvider?: 'anthropic' | 'openai' | 'ollama'; llmModel?: string; @@ -93,7 +94,7 @@ export class Extract { llmBaseUrl?: string; robotName?: string; }): Promise { - const robotData = await this.client.extractWithLLM(url, options); + const robotData = await this.client.extractWithLLM(options); const robot = await this.client.getRobot(robotData.robotId); return new Robot(this.client, robot); }