Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@ The official Node.js SDK for [Maxun](https://maxun.dev) - turn any website into

Works with both Maxun Cloud and Maxun Open Source - automatically handles the differences for you.

https://github.com/user-attachments/assets/71a6f10b-5b2a-45dd-9ef7-53d0bcf2b76d


## What can you do with Maxun SDK?

- **Extract structured data** from any website
Expand Down
53 changes: 36 additions & 17 deletions examples/llm-extraction.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
* - LLM automatically generates the extraction workflow
* - Support for multiple LLM providers (Ollama, Anthropic, OpenAI)
* - Creates a reusable robot that can be executed anytime
* - Auto-search: When no URL is provided, the system searches for the website automatically
*
* Site: Y Combinator Companies (https://www.ycombinator.com/companies)
*/
Expand All @@ -20,34 +21,51 @@ async function main() {
});

try {
console.log('Creating robot using LLM extraction...\n');

// Use natural language to describe what to extract
// The LLM will automatically generate the workflow
const robot = await extractor.extract(
'https://www.ycombinator.com/companies',
{
prompt: 'Extract the first 15 YC company names, their descriptions, and batch information',
llmProvider: 'ollama', // or 'anthropic', 'openai'
llmModel: 'llama3.2-vision', // default for ollama
llmBaseUrl: 'http://localhost:11434', // ollama default
robotName: 'YC Companies LLM Extractor'
}
);

console.log(`✓ Robot created: ${robot.id}`);
console.log('Example 1: Creating robot with configured URL...\n');

const robot = await extractor.extract({
url: 'https://www.ycombinator.com/companies',
prompt: 'Extract the first 15 YC company names, their descriptions, and batch information',
llmProvider: 'ollama',
llmModel: 'llama3.2-vision',
llmBaseUrl: 'http://localhost:11434',
robotName: 'YC Companies LLM Extractor'
});

console.log(`Robot created: ${robot.id}`);

// Execute the generated robot
console.log('Executing robot...\n');
const result = await robot.run();

console.log(`Extraction completed!`);
console.log(`Extraction completed!`);
console.log(` Status: ${result.status}`);
console.log(` Companies extracted: ${result.data.listData?.length || 0}\n`);

console.log('First 3 companies:');
console.log(JSON.stringify(result.data.listData?.slice(0, 3), null, 2));

console.log('\n\nExample 2: Creating robot without configured URL...\n');

const autoSearchRobot = await extractor.extract({
prompt: 'Extract company names and descriptions from the YCombinator Companies page',
llmProvider: 'ollama',
robotName: 'YC Auto-Search Extractor'
});

console.log(`Auto-search robot created: ${autoSearchRobot.id}`);

// Execute the generated robot
console.log('Executing robot...\n');
const autoSearchResult = await autoSearchRobot.run();

console.log(`Extraction completed!`);
console.log(` Status: ${autoSearchResult.status}`);
console.log(` Companies extracted: ${autoSearchResult.data.listData?.length || 0}\n`);

console.log('First 3 companies:');
console.log(JSON.stringify(autoSearchResult.data.listData?.slice(0, 3), null, 2));

// Note: For Anthropic (recommended for best results):
// llmProvider: 'anthropic',
// llmModel: 'claude-3-5-sonnet-20241022',
Expand All @@ -73,3 +91,4 @@ if (!process.env.MAXUN_API_KEY) {
}

main();

6 changes: 4 additions & 2 deletions src/client/maxun-client.ts
Original file line number Diff line number Diff line change
Expand Up @@ -238,8 +238,10 @@ export class Client {

/**
* LLM-based extraction - extract data using natural language prompt
* URL is optional - if not provided, the server will search for the target website based on the prompt
*/
async extractWithLLM(url: string, options: {
async extractWithLLM(options: {
url?: string;
prompt: string;
llmProvider?: 'anthropic' | 'openai' | 'ollama';
llmModel?: string;
Expand All @@ -250,7 +252,7 @@ export class Client {
const response = await this.axios.post<ApiResponse<any>>(
'/extract/llm',
{
url,
url: options.url || undefined,
prompt: options.prompt,
llmProvider: options.llmProvider,
llmModel: options.llmModel,
Expand Down
7 changes: 4 additions & 3 deletions src/extract.ts
Original file line number Diff line number Diff line change
Expand Up @@ -75,8 +75,8 @@ export class Extract {
* LLM-based extraction - create a robot using natural language prompt
* The robot is saved and can be executed anytime by the user
*
* @param url - The URL to extract data from
* @param options - Extraction options
* @param options.url - (Optional) The URL to extract data from. If not provided, the system will automatically search for the target website based on the prompt.
* @param options.prompt - Natural language prompt describing what to extract
* @param options.llmProvider - LLM provider to use: 'anthropic', 'openai', or 'ollama' (default: 'ollama')
* @param options.llmModel - Model name (default: 'llama3.2-vision' for ollama, 'claude-3-5-sonnet-20241022' for anthropic, 'gpt-4-vision-preview' for openai)
Expand All @@ -85,15 +85,16 @@ export class Extract {
* @param options.robotName - Optional custom name for the robot
* @returns Robot instance that can be executed
*/
async extract(url: string, options: {
async extract(options: {
url?: string;
prompt: string;
llmProvider?: 'anthropic' | 'openai' | 'ollama';
llmModel?: string;
llmApiKey?: string;
llmBaseUrl?: string;
robotName?: string;
}): Promise<Robot> {
const robotData = await this.client.extractWithLLM(url, options);
const robotData = await this.client.extractWithLLM(options);
const robot = await this.client.getRobot(robotData.robotId);
return new Robot(this.client, robot);
}
Expand Down