From 444ccc28599153879287df36a121e78edf650f80 Mon Sep 17 00:00:00 2001 From: Karishma Shukla Date: Fri, 12 Dec 2025 03:09:31 +0530 Subject: [PATCH 1/2] chore: add llm extract demo --- README.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/README.md b/README.md index c1c567f..23aac35 100644 --- a/README.md +++ b/README.md @@ -4,6 +4,9 @@ The official Node.js SDK for [Maxun](https://maxun.dev) - turn any website into Works with both Maxun Cloud and Maxun Open Source - automatically handles the differences for you. +https://github.com/user-attachments/assets/71a6f10b-5b2a-45dd-9ef7-53d0bcf2b76d + + ## What can you do with Maxun SDK? - **Extract structured data** from any website From 45617a895c095266586df38bc2226c9dee21e5a9 Mon Sep 17 00:00:00 2001 From: Rohit Rajan Date: Wed, 14 Jan 2026 14:49:07 +0530 Subject: [PATCH 2/2] feat: add auto-search logic ll extraction --- examples/llm-extraction.ts | 53 ++++++++++++++++++++++++++------------ src/client/maxun-client.ts | 6 +++-- src/extract.ts | 7 ++--- 3 files changed, 44 insertions(+), 22 deletions(-) diff --git a/examples/llm-extraction.ts b/examples/llm-extraction.ts index 7314402..f639512 100644 --- a/examples/llm-extraction.ts +++ b/examples/llm-extraction.ts @@ -6,6 +6,7 @@ * - LLM automatically generates the extraction workflow * - Support for multiple LLM providers (Ollama, Anthropic, OpenAI) * - Creates a reusable robot that can be executed anytime + * - Auto-search: When no URL is provided, the system searches for the website automatically * * Site: Y Combinator Companies (https://www.ycombinator.com/companies) */ @@ -20,34 +21,51 @@ async function main() { }); try { - console.log('Creating robot using LLM extraction...\n'); - - // Use natural language to describe what to extract - // The LLM will automatically generate the workflow - const robot = await extractor.extract( - 'https://www.ycombinator.com/companies', - { - prompt: 'Extract the first 15 YC company names, their descriptions, and batch information', - llmProvider: 'ollama', // or 'anthropic', 'openai' - llmModel: 'llama3.2-vision', // default for ollama - llmBaseUrl: 'http://localhost:11434', // ollama default - robotName: 'YC Companies LLM Extractor' - } - ); - - console.log(`✓ Robot created: ${robot.id}`); + console.log('Example 1: Creating robot with configured URL...\n'); + + const robot = await extractor.extract({ + url: 'https://www.ycombinator.com/companies', + prompt: 'Extract the first 15 YC company names, their descriptions, and batch information', + llmProvider: 'ollama', + llmModel: 'llama3.2-vision', + llmBaseUrl: 'http://localhost:11434', + robotName: 'YC Companies LLM Extractor' + }); + + console.log(`Robot created: ${robot.id}`); // Execute the generated robot console.log('Executing robot...\n'); const result = await robot.run(); - console.log(`✓ Extraction completed!`); + console.log(`Extraction completed!`); console.log(` Status: ${result.status}`); console.log(` Companies extracted: ${result.data.listData?.length || 0}\n`); console.log('First 3 companies:'); console.log(JSON.stringify(result.data.listData?.slice(0, 3), null, 2)); + console.log('\n\nExample 2: Creating robot without configured URL...\n'); + + const autoSearchRobot = await extractor.extract({ + prompt: 'Extract company names and descriptions from the YCombinator Companies page', + llmProvider: 'ollama', + robotName: 'YC Auto-Search Extractor' + }); + + console.log(`Auto-search robot created: ${autoSearchRobot.id}`); + + // Execute the generated robot + console.log('Executing robot...\n'); + const autoSearchResult = await autoSearchRobot.run(); + + console.log(`Extraction completed!`); + console.log(` Status: ${autoSearchResult.status}`); + console.log(` Companies extracted: ${autoSearchResult.data.listData?.length || 0}\n`); + + console.log('First 3 companies:'); + console.log(JSON.stringify(autoSearchResult.data.listData?.slice(0, 3), null, 2)); + // Note: For Anthropic (recommended for best results): // llmProvider: 'anthropic', // llmModel: 'claude-3-5-sonnet-20241022', @@ -73,3 +91,4 @@ if (!process.env.MAXUN_API_KEY) { } main(); + diff --git a/src/client/maxun-client.ts b/src/client/maxun-client.ts index 4fc6709..0933c03 100644 --- a/src/client/maxun-client.ts +++ b/src/client/maxun-client.ts @@ -238,8 +238,10 @@ export class Client { /** * LLM-based extraction - extract data using natural language prompt + * URL is optional - if not provided, the server will search for the target website based on the prompt */ - async extractWithLLM(url: string, options: { + async extractWithLLM(options: { + url?: string; prompt: string; llmProvider?: 'anthropic' | 'openai' | 'ollama'; llmModel?: string; @@ -250,7 +252,7 @@ export class Client { const response = await this.axios.post>( '/extract/llm', { - url, + url: options.url || undefined, prompt: options.prompt, llmProvider: options.llmProvider, llmModel: options.llmModel, diff --git a/src/extract.ts b/src/extract.ts index 8408822..abe893f 100644 --- a/src/extract.ts +++ b/src/extract.ts @@ -75,8 +75,8 @@ export class Extract { * LLM-based extraction - create a robot using natural language prompt * The robot is saved and can be executed anytime by the user * - * @param url - The URL to extract data from * @param options - Extraction options + * @param options.url - (Optional) The URL to extract data from. If not provided, the system will automatically search for the target website based on the prompt. * @param options.prompt - Natural language prompt describing what to extract * @param options.llmProvider - LLM provider to use: 'anthropic', 'openai', or 'ollama' (default: 'ollama') * @param options.llmModel - Model name (default: 'llama3.2-vision' for ollama, 'claude-3-5-sonnet-20241022' for anthropic, 'gpt-4-vision-preview' for openai) @@ -85,7 +85,8 @@ export class Extract { * @param options.robotName - Optional custom name for the robot * @returns Robot instance that can be executed */ - async extract(url: string, options: { + async extract(options: { + url?: string; prompt: string; llmProvider?: 'anthropic' | 'openai' | 'ollama'; llmModel?: string; @@ -93,7 +94,7 @@ export class Extract { llmBaseUrl?: string; robotName?: string; }): Promise { - const robotData = await this.client.extractWithLLM(url, options); + const robotData = await this.client.extractWithLLM(options); const robot = await this.client.getRobot(robotData.robotId); return new Robot(this.client, robot); }