diff --git a/.github/workflows/test-mcp-examples.yaml b/.github/workflows/test-mcp-examples.yaml index 969adf4c6..c4b2d9cfb 100644 --- a/.github/workflows/test-mcp-examples.yaml +++ b/.github/workflows/test-mcp-examples.yaml @@ -1,22 +1,18 @@ -name: Test MCP Examples +name: Test Pipeline Examples on: - push: - branches: - - main - paths: - - 'modules/ai-agents/examples/**/*.yaml' - - 'modules/ai-agents/examples/test-mcp-examples.sh' pull_request: branches: - main paths: - 'modules/ai-agents/examples/**/*.yaml' - - 'modules/ai-agents/examples/test-mcp-examples.sh' + - 'modules/ai-agents/examples/**/*.sh' + - 'modules/develop/examples/**/*.yaml' + - 'modules/develop/examples/**/*.sh' jobs: test-all-examples: - name: Test All MCP Examples + name: Test All Pipeline Examples runs-on: ubuntu-latest steps: @@ -39,15 +35,33 @@ jobs: sudo wget -qO /usr/local/bin/yq https://github.com/mikefarah/yq/releases/latest/download/yq_linux_amd64 sudo chmod +x /usr/local/bin/yq - - name: Make test script executable - run: chmod +x modules/ai-agents/examples/test-mcp-examples.sh + - name: Make test scripts executable + run: | + find modules/ai-agents/examples -name "*.sh" -exec chmod +x {} \; + find modules/develop/examples -name "*.sh" -exec chmod +x {} \; 2>/dev/null || true + + - name: Run MCP tools tests + run: | + cd modules/ai-agents/examples/mcp-tools + ./test-mcp-tools.sh + + - name: Run ai-agents pipeline tests + run: | + cd modules/ai-agents/examples/pipelines + ./test-pipelines.sh - - name: Run MCP examples tests + - name: Run cookbook tests run: | - cd modules/ai-agents/examples - ./test-mcp-examples.sh + for dir in modules/develop/examples/cookbooks/*/; do + if [[ -f "${dir}test-"*".sh" ]]; then + echo "Testing ${dir}..." + cd "${dir}" + ./test-*.sh + cd - > /dev/null + fi + done - name: Test Summary if: always() run: | - echo "::notice title=MCP Examples Testing::All Cloud MCP examples have been validated" + echo "::notice title=Pipeline Examples Testing::All pipeline examples have been validated" diff --git a/docs-data/personas.yaml b/docs-data/personas.yaml index 46d4e7912..f3368e85d 100644 --- a/docs-data/personas.yaml +++ b/docs-data/personas.yaml @@ -2,34 +2,77 @@ # # These personas represent the target audience for Redpanda Cloud documentation. # Use these when assigning :personas: attributes to documentation pages. +# +# This persona set covers two domains: +# 1. Streaming/Data Platform: Real-time data streaming, connectors, pipelines +# 2. Agentic Data Platform (ADP): AI agent development, governance, enterprise AI adoption schema_version: "1.0" repository: cloud-docs personas: - - id: app_developer - name: Application Developer - description: Builds applications that produce and consume data from Redpanda Cloud - experience_level: intermediate + # ============================================================================ + # TIER 1: Executive & Governance + # ============================================================================ + + - id: executive + name: Executive Stakeholder + description: CIO/CAIO/Head of AI Strategy driving enterprise AI adoption and governance + experience_level: executive goals: - - Connect applications to Redpanda Cloud clusters - - Produce and consume messages reliably - - Implement proper error handling and retries - - Optimize client performance + - Drive enterprise-wide AI adoption strategy + - Ensure ROI on AI investments + - Establish governance framework for agent deployments + - Manage cost and resource allocation + - Ensure compliance with organizational policies pain_points: - - Authentication and connection configuration - - Understanding Kafka client options - - Debugging connectivity issues - - Choosing the right client library + - Lack of visibility into agent usage and costs + - Difficulty enforcing governance at scale + - Unclear ROI metrics for AI initiatives + - Risk of shadow AI deployments + - Integration with existing enterprise systems content_preferences: - - Working code examples in multiple languages - - Connection configuration templates - - Client library comparisons - - Performance tuning guides + - High-level governance frameworks + - ROI and cost analysis + - Compliance and audit capabilities + - Executive dashboards and reporting + - Strategic planning guides typical_content_types: - - how-to - - tutorial + - overview + - concepts + - best-practices + + - id: security_leader + name: Security & Risk Leader + description: CISO/Compliance Officer protecting systems and enforcing data protection policies + experience_level: advanced + goals: + - Enforce agent policy and access controls + - Maintain audit trails for compliance + - Protect sensitive data and credentials + - Manage risk across agent deployments + - Ensure regulatory compliance + pain_points: + - Agent access to sensitive systems + - Lack of visibility into agent actions + - Difficult to audit agent behavior + - Credential management and rotation + - Compliance with data protection regulations + content_preferences: + - Security architecture patterns + - Policy enforcement mechanisms + - Audit trail documentation + - Compliance certification guides + - Incident response procedures + typical_content_types: + - concepts - reference + - best-practices + - troubleshooting + + # ============================================================================ + # TIER 2: Platform Operations + # ============================================================================ - id: platform_admin name: Platform Administrator @@ -55,47 +98,97 @@ personas: - reference - best-practices - - id: data_engineer - name: Data Engineer - description: Builds data pipelines using managed connectors and Redpanda Connect - experience_level: intermediate + - id: ai_platform_engineer + name: AI/ML Platform Engineer + description: Operates agent infrastructure, runtimes, and connectivity with governance controls + experience_level: advanced goals: - - Set up managed connectors to move data between systems - - Transform and route data reliably - - Monitor connector and pipeline health - - Handle errors and retries + - Deploy and operate agent runtime infrastructure + - Configure governance controls and policies + - Monitor agent performance and resource usage + - Onboard and manage MCP servers + - Ensure agent observability and debugging pain_points: - - Connector configuration complexity - - Debugging failed connectors - - Schema management and evolution - - Performance tuning + - Complex agent runtime configuration + - Difficult to troubleshoot agent failures + - Managing agent resource allocation + - Integrating governance with existing tools + - Scaling agent infrastructure content_preferences: - - Connector setup guides - - Transformation examples - - Error handling patterns - - Monitoring and troubleshooting + - Infrastructure setup guides + - Governance configuration patterns + - Observability and monitoring setup + - Performance tuning documentation + - Troubleshooting workflows typical_content_types: - how-to - - cookbook + - reference - troubleshooting + - best-practices - - id: ai_agent_developer - name: AI Agent Developer - description: Builds AI agents and integrations using MCP tools and LLM frameworks + # ============================================================================ + # TIER 3: Builders & Developers + # ============================================================================ + + - id: app_developer + name: Application Developer + description: Builds applications that produce and consume data from Redpanda Cloud experience_level: intermediate goals: + - Connect applications to Redpanda Cloud clusters + - Produce and consume messages reliably + - Implement proper error handling and retries + - Optimize client performance + pain_points: + - Authentication and connection configuration + - Understanding Kafka client options + - Debugging connectivity issues + - Choosing the right client library + content_preferences: + - Working code examples in multiple languages + - Connection configuration templates + - Client library comparisons + - Performance tuning guides + typical_content_types: + - how-to + - tutorial + - reference + + - id: agent_developer + name: Agent Developer + description: Builds AI agents, agentic workflows, and MCP tools that integrate with Redpanda Cloud and ADP + experience_level: intermediate + goals: + # MCP and streaming integration - Create MCP tools that AI assistants can discover and use - Deploy MCP servers to Redpanda Cloud - Integrate with AI/LLM applications - Debug agent-tool interactions + # Agentic workflows and governed deployment + - Build agents and workflows that solve business problems + - Use ADP catalog, templates, and curated datasets + - Design reasoning patterns and tool interactions + - Deploy agents into governed runtime pain_points: + # MCP and integration challenges - MCP configuration syntax - Testing tools before deployment - Limited AI-specific examples + # ADP and governance challenges + - Hard to discover existing templates, MCP servers, datasets + - Unclear access policies + - Brittle multi-step integrations + - Inconsistent testing/debugging environments content_preferences: + # Code examples and patterns - Working code examples with AI context - Testing and debugging workflows - Integration patterns + # Catalog and governance + - Rich catalog of agent templates and tools + - Governance introspection (what agent can/can't do) + - Replay-based debugging + - Streamlined deployment workflows typical_content_types: - tutorial - how-to @@ -127,6 +220,81 @@ personas: - reference - best-practices + # ============================================================================ + # TIER 4: Data & Knowledge Management + # ============================================================================ + + - id: data_engineer + name: Data Engineer + description: Builds data pipelines with managed connectors AND creates curated datasets for agent consumption + experience_level: intermediate + goals: + # Data movement and pipelines + - Set up managed connectors to move data between systems + - Transform and route data reliably + - Monitor connector and pipeline health + - Handle errors and retries + # Agent-ready datasets and RAG + - Create agent-ready datasets with federated SQL + - Ensure data quality and freshness for agents + - Expose data safely through governed views + - Provide clean RAG context via MCP servers + pain_points: + # Connector and pipeline challenges + - Connector configuration complexity + - Debugging failed connectors + - Schema management and evolution + - Performance tuning + # Data curation for agents + - Siloed data across sources + - Fragile RAG sources + - Schema drift + - Difficulty providing agent-ready datasets quickly + content_preferences: + # Connector and transformation + - Connector setup guides + - Transformation examples + - Error handling patterns + - Monitoring and troubleshooting + # Federated data and RAG + - Federated SQL query examples + - Governed view patterns + - RAG context design + - Data lineage visualization + typical_content_types: + - how-to + - cookbook + - troubleshooting + - reference + + - id: knowledge_manager + name: Knowledge & Operations Manager + description: Maintains organizational documentation and knowledge bases for agent consumption + experience_level: intermediate + goals: + - Ingest and maintain organizational knowledge bases + - Ensure content freshness and accuracy + - Optimize vector search for agent queries + - Manage knowledge base access and permissions + pain_points: + - Stale or outdated documentation + - Difficult to index and search content + - Managing content from multiple sources + - Ensuring agent retrieval accuracy + content_preferences: + - KB ingestion workflows + - Vector search optimization guides + - Content freshness strategies + - Access control patterns + typical_content_types: + - how-to + - best-practices + - troubleshooting + + # ============================================================================ + # TIER 5: Evaluation & End Users + # ============================================================================ + - id: evaluator name: Technical Evaluator description: Assessing Redpanda Cloud for their organization @@ -150,4 +318,27 @@ personas: - overview - concepts - tutorial - - get-started + + - id: business_user + name: Business End User + description: Uses agent-powered automations to complete business tasks + experience_level: beginner + goals: + - Complete tasks efficiently using agents + - Understand what agents can and cannot do + - Trust agent recommendations and actions + - Report issues when agents fail + pain_points: + - Unclear agent capabilities + - Unexpected agent behavior + - Lack of transparency in agent actions + - Difficulty getting help when agents fail + content_preferences: + - Simple, task-oriented guides + - Agent capability overviews + - Troubleshooting for common issues + - Trust and transparency documentation + typical_content_types: + - overview + - how-to + - troubleshooting diff --git a/local-antora-playbook.yml b/local-antora-playbook.yml index 3e5b6c1d8..d8d478c82 100644 --- a/local-antora-playbook.yml +++ b/local-antora-playbook.yml @@ -10,9 +10,6 @@ urls: latest_version_segment: 'current' output: clean: true -runtime: - log: - failure_level: error content: sources: - url: . diff --git a/modules/ROOT/nav.adoc b/modules/ROOT/nav.adoc index 990e75f9e..fe1ee293c 100644 --- a/modules/ROOT/nav.adoc +++ b/modules/ROOT/nav.adoc @@ -25,23 +25,74 @@ * xref:ai-agents:index.adoc[Agentic AI] ** xref:ai-agents:mcp/index.adoc[MCP] -*** xref:ai-agents:mcp/overview.adoc[MCP Overview] +*** xref:ai-agents:mcp/overview.adoc[Overview] *** xref:ai-agents:mcp/remote/index.adoc[Remote MCP] **** xref:ai-agents:mcp/remote/overview.adoc[Overview] -**** xref:ai-agents:mcp/remote/quickstart.adoc[Quickstart] **** xref:ai-agents:mcp/remote/concepts.adoc[Concepts] +**** xref:ai-agents:mcp/remote/quickstart.adoc[Quickstart] **** xref:ai-agents:mcp/remote/create-tool.adoc[Create a Tool] **** xref:ai-agents:mcp/remote/best-practices.adoc[Best Practices] **** xref:ai-agents:mcp/remote/tool-patterns.adoc[Tool Patterns] -**** xref:ai-agents:mcp/remote/troubleshooting.adoc[Troubleshooting] -**** xref:ai-agents:mcp/remote/admin-guide.adoc[Admin Guide] -***** xref:ai-agents:mcp/remote/manage-servers.adoc[Manage Servers] -***** xref:ai-agents:mcp/remote/scale-resources.adoc[Scale Resources] -***** xref:ai-agents:mcp/remote/monitor-activity.adoc[Monitor Activity] +**** xref:ai-agents:mcp/remote/troubleshooting.adoc[Troubleshoot] +**** xref:ai-agents:mcp/remote/manage-servers.adoc[Manage Servers] +**** xref:ai-agents:mcp/remote/monitor-mcp-servers.adoc[Monitor MCP Servers] +**** xref:ai-agents:mcp/remote/scale-resources.adoc[Scale Resources] *** xref:ai-agents:mcp/local/index.adoc[Redpanda Cloud Management MCP Server] **** xref:ai-agents:mcp/local/overview.adoc[Overview] **** xref:ai-agents:mcp/local/quickstart.adoc[Quickstart] **** xref:ai-agents:mcp/local/configuration.adoc[Configure] +** xref:ai-agents:agents/index.adoc[Agents] +*** xref:ai-agents:agents/get-started-index.adoc[Get Started] +**** xref:ai-agents:agents/overview.adoc[Overview] +**** xref:ai-agents:agents/concepts.adoc[Concepts] +**** xref:ai-agents:agents/quickstart.adoc[Quickstart] +**** xref:ai-agents:agents/tutorials/customer-support-agent.adoc[Multi-Tool Orchestration] +**** xref:ai-agents:agents/tutorials/transaction-dispute-resolution.adoc[Multi-Agent Systems] +*** xref:ai-agents:agents/build-index.adoc[Build Agents] +**** xref:ai-agents:agents/create-agent.adoc[Create an Agent] +**** xref:ai-agents:agents/prompt-best-practices.adoc[System Prompt Best Practices] +**** xref:ai-agents:agents/architecture-patterns.adoc[Architecture Patterns] +**** xref:ai-agents:agents/troubleshooting.adoc[Troubleshoot] +*** xref:ai-agents:agents/monitor-agents.adoc[Monitor Agents] +*** xref:ai-agents:agents/integration-index.adoc[Agent Integrations] +**** xref:ai-agents:agents/integration-overview.adoc[Integration Patterns] +**** xref:ai-agents:agents/pipeline-integration-patterns.adoc[Pipeline to Agent] +**** xref:ai-agents:agents/a2a-concepts.adoc[A2A Protocol] +** xref:ai-agents:observability/index.adoc[Transcripts] +*** xref:ai-agents:observability/concepts.adoc[Concepts] +*** xref:ai-agents:observability/view-transcripts.adoc[View Transcripts] +*** xref:ai-agents:observability/ingest-custom-traces.adoc[Ingest Traces from Custom Agents] +** xref:ai-agents:ai-gateway/index.adoc[AI Gateway] +*** xref:ai-agents:ai-gateway/what-is-ai-gateway.adoc[Overview] +*** xref:ai-agents:ai-gateway/gateway-quickstart.adoc[Quickstart] +*** xref:ai-agents:ai-gateway/gateway-architecture.adoc[Architecture] +*** For Administrators +**** xref:ai-agents:ai-gateway/admin/setup-guide.adoc[Setup Guide] +*** For Builders +**** xref:ai-agents:ai-gateway/builders/discover-gateways.adoc[Discover Gateways] +**** xref:ai-agents:ai-gateway/builders/connect-your-agent.adoc[Connect Your Agent] +**** xref:ai-agents:ai-gateway/cel-routing-cookbook.adoc[CEL Routing Patterns] +**** xref:ai-agents:ai-gateway/mcp-aggregation-guide.adoc[MCP Aggregation] +//*** Observability +//**** xref:ai-agents:ai-gateway/observability-logs.adoc[Request Logs] +//**** xref:ai-agents:ai-gateway/observability-metrics.adoc[Metrics and Analytics] +//*** xref:ai-agents:ai-gateway/migration-guide.adoc[Migrate] +*** xref:ai-agents:ai-gateway/integrations/index.adoc[Integrations] +**** Claude Code +***** xref:ai-agents:ai-gateway/integrations/claude-code-admin.adoc[Admin Guide] +***** xref:ai-agents:ai-gateway/integrations/claude-code-user.adoc[User Guide] +**** Cline +***** xref:ai-agents:ai-gateway/integrations/cline-admin.adoc[Admin Guide] +***** xref:ai-agents:ai-gateway/integrations/cline-user.adoc[User Guide] +**** Continue.dev +***** xref:ai-agents:ai-gateway/integrations/continue-admin.adoc[Admin Guide] +***** xref:ai-agents:ai-gateway/integrations/continue-user.adoc[User Guide] +**** Cursor IDE +***** xref:ai-agents:ai-gateway/integrations/cursor-admin.adoc[Admin Guide] +***** xref:ai-agents:ai-gateway/integrations/cursor-user.adoc[User Guide] +**** GitHub Copilot +***** xref:ai-agents:ai-gateway/integrations/github-copilot-admin.adoc[Admin Guide] +***** xref:ai-agents:ai-gateway/integrations/github-copilot-user.adoc[User Guide] * xref:develop:connect/about.adoc[Redpanda Connect] ** xref:develop:connect/connect-quickstart.adoc[Quickstart] @@ -79,6 +130,7 @@ **** xref:develop:connect/components/inputs/gcp_spanner_cdc.adoc[] **** xref:develop:connect/components/inputs/generate.adoc[] **** xref:develop:connect/components/inputs/http_client.adoc[] +**** xref:develop:connect/components/inputs/http_server.adoc[] **** xref:develop:connect/components/inputs/inproc.adoc[] **** xref:develop:connect/components/inputs/kafka.adoc[] **** xref:develop:connect/components/inputs/kafka_franz.adoc[] @@ -182,6 +234,7 @@ **** xref:develop:connect/components/outputs/timeplus.adoc[] *** xref:develop:connect/components/processors/about.adoc[] +**** xref:develop:connect/components/processors/a2a_message.adoc[] **** xref:develop:connect/components/processors/archive.adoc[] **** xref:develop:connect/components/processors/avro.adoc[] **** xref:develop:connect/components/processors/aws_bedrock_chat.adoc[] @@ -337,6 +390,7 @@ *** xref:develop:connect/cookbooks/joining_streams.adoc[] *** xref:develop:connect/cookbooks/redpanda_migrator.adoc[] *** xref:develop:connect/cookbooks/rag.adoc[] +*** xref:develop:connect/cookbooks/jira.adoc[] * xref:develop:index.adoc[Develop] ** xref:develop:kafka-clients.adoc[] diff --git a/modules/ai-agents/examples/agents/account-agent-prompt.txt b/modules/ai-agents/examples/agents/account-agent-prompt.txt new file mode 100644 index 000000000..292fa59b6 --- /dev/null +++ b/modules/ai-agents/examples/agents/account-agent-prompt.txt @@ -0,0 +1,62 @@ +You are the account agent for ACME Bank's dispute resolution system. You specialize in retrieving customer account information and transaction data. + +## Your Responsibilities + +- Look up customer account details with PII masking +- Retrieve specific transaction information +- Provide transaction pattern analysis +- Return only data available from your tools + +## Available Tools + +1. **get_customer_account**: Returns account data with masked PII + - Input: customer_id + - Returns: Name, masked email, card last 4, account type, location + +2. **get_transaction_details**: Returns detailed transaction information + - Input: transaction_id + - Returns: Amount, merchant, date, location, card used + +3. **get_transaction_history**: Returns spending pattern analysis + - Input: customer_id + - Returns: Aggregated spending patterns, categories, locations + +## PII Protection Rules + +Always return masked data: +- Email: First letter + **** + @domain (e.g., "s****@example.com") +- Phone: ***-***-XXXX (last 4 digits only) +- Card: Last 4 digits only +- Never return: Full card numbers, SSNs, full account numbers + +## Response Format + +Structure responses clearly: + +"I found the following account information: +- Customer: [Name] +- Account Type: [Type] +- Card ending in: [Last 4] +- Primary Location: [City, State, Country] + +Transaction details: +- Amount: $[Amount] +- Merchant: [Merchant Name] +- Date: [Date] +- Location: [Transaction Location]" + +## Error Handling + +If data not found: +- "I couldn't find an account for customer ID [ID]" +- "No transaction found with ID [ID]" +- Never guess or make up information + +## What You Don't Do + +- Don't calculate fraud scores (that's fraud-agent's job) +- Don't verify merchants (that's merchant-agent's job) +- Don't make recommendations about disputes +- Don't log audit events (that's compliance-agent's job) + +Your job is data retrieval only. Provide accurate, masked data and let the root agent make decisions. diff --git a/modules/ai-agents/examples/agents/compliance-agent-prompt.txt b/modules/ai-agents/examples/agents/compliance-agent-prompt.txt new file mode 100644 index 000000000..8704ecd35 --- /dev/null +++ b/modules/ai-agents/examples/agents/compliance-agent-prompt.txt @@ -0,0 +1,120 @@ +You are the compliance agent for ACME Bank's dispute resolution system. You specialize in regulatory requirements and audit logging. + +## Your Responsibilities + +- Log all dispute investigation actions for audit trail +- Check regulatory requirements for dispute types +- Verify compliance with banking regulations +- Provide timeline and documentation requirements + +## Available Tools + +1. **log_audit_event**: Log investigation actions + - Input: Transaction ID, customer ID, decision, evidence, outcome + - Returns: Audit record confirmation + +2. **check_regulatory_requirements**: Look up compliance rules + - Input: dispute_type (fraud, billing_error, service_not_received) + - Returns: Regulations, timelines, documentation requirements + +## Regulatory Frameworks + +You work with these regulations: + +1. **Regulation E (Electronic Fund Transfer Act)** + - Applies to: Fraud disputes, unauthorized transactions + - Customer liability: $50 if reported within 2 days, $500 if reported within 60 days + - Bank must provide provisional credit within 10 business days + - Investigation deadline: 90 days + +2. **Fair Credit Billing Act** + - Applies to: Billing errors, disputes + - Customer must dispute within 60 days of statement + - Bank must acknowledge within 30 days + - Resolution deadline: 90 days + +3. **Card Network Rules (Visa/Mastercard)** + - Chargeback rights and timelines + - Merchant response requirements + - Evidence requirements + +## Documentation Requirements + +For each dispute type, log: + +**Fraud Disputes:** +- Customer dispute affidavit +- Transaction details +- Fraud indicators identified +- Decision and reasoning +- Customer notification + +**Billing Errors:** +- Billing statement +- Customer dispute letter +- Merchant communication attempts +- Resolution details + +**Service Not Received:** +- Proof of non-delivery +- Merchant communication attempts +- Order/booking confirmation +- Resolution outcome + +## Timeline Tracking + +Monitor key deadlines: + +- Acknowledge dispute: 24-30 days (varies by type) +- Provisional credit: 10 business days (fraud) +- Final decision: 90 days (most disputes) +- Chargeback filing: 120 days (service issues) + +## Response Format + +For regulatory checks: + +"Compliance Requirements: + +Dispute Type: [Type] +Applicable Regulations: +- [Regulation 1] +- [Regulation 2] + +Customer Rights: +- Liability Limit: $[Amount] +- Notification Deadline: [Days] days + +Bank Obligations: +- Provisional Credit: [Required/Not Required] +- Investigation Deadline: [Days] days +- Customer Notification: [Required/Not Required] + +Documentation Required: +- [Document 1] +- [Document 2] +- [Document 3] + +Timeline: +- Acknowledge: [Timeframe] +- Decision: [Timeframe]" + +For audit logging: + +"Audit Event Logged: + +Audit ID: [UUID] +Timestamp: [ISO 8601] +Investigation Details: [Summary] +Decision: [Decision] +Evidence: [Evidence Sources] +Status: Recorded" + +## What You Don't Do + +- Don't retrieve transaction or account data +- Don't calculate fraud scores +- Don't verify merchants +- Don't make dispute recommendations + +Your job is compliance and audit only. Ensure all investigations are properly documented and regulatory requirements are met. diff --git a/modules/ai-agents/examples/agents/dispute-root-agent-prompt.txt b/modules/ai-agents/examples/agents/dispute-root-agent-prompt.txt new file mode 100644 index 000000000..a22888ddd --- /dev/null +++ b/modules/ai-agents/examples/agents/dispute-root-agent-prompt.txt @@ -0,0 +1,130 @@ +You are the root agent for a transaction dispute resolution system at ACME Bank. Your role is to orchestrate sub-agents and make final recommendations to customers about disputed transactions. + +## Your Responsibilities + +- Route customer queries to appropriate sub-agents +- Aggregate results from multiple sub-agents +- Make evidence-based recommendations +- Communicate clearly with customers +- Escalate complex cases to human agents + +## Available Sub-Agents + +You have access to four specialized sub-agents via A2A protocol: + +1. **account-agent**: Retrieves customer account data and transaction history +2. **fraud-agent**: Analyzes fraud risk and calculates risk scores +3. **merchant-agent**: Verifies merchant legitimacy and reputation +4. **compliance-agent**: Logs audit events and checks regulatory requirements + +## Decision Framework + +When investigating a dispute, follow this process: + +1. Start with account-agent to get customer and transaction details +2. Route to fraud-agent if fraud is suspected +3. Route to merchant-agent to verify merchant legitimacy +4. Route to compliance-agent to log the investigation and check requirements +5. Aggregate all evidence and make recommendation + +## Risk-Based Recommendations + +Based on aggregated evidence, take these actions: + +- **Fraud score 80-100 + high merchant risk**: Block the transaction immediately, block the card, issue new card +- **Fraud score 60-79**: Hold for specialist review, temporary card block +- **Fraud score 40-59**: Ask customer to verify with merchant first before taking action +- **Fraud score 0-39**: Likely legitimate transaction, help customer recall the purchase + +## Escalation Criteria + +Escalate to human agent when: + +- Fraud score is medium (40-70) and evidence is conflicting +- Customer disputes the recommendation strongly +- Regulatory requirements exceed available tools +- Subscription or recurring billing issues require merchant intervention + +## Compliance Constraints + +Never: + +- Expose full credit card numbers or SSNs (use masked versions only) +- Make guarantees about dispute outcomes (use "likely" or "recommend") +- Process disputes without logging to compliance-agent +- Reveal internal fraud detection logic or merchant scoring details to customers +- Make decisions without sub-agent evidence +- Ask customers for screenshots or additional proof (you have the transaction records) + +## Customer Communication Style + +**Clear, bank-appropriate language:** +- Use "I've reviewed your account" not "I called the account-agent" +- Use "this charge doesn't match your typical spending" not "fraud score is 95/100" +- Use "I'm blocking this card" not "I recommend you freeze it" +- Don't reveal merchant reputation scores or fraud report counts + +**Proactive protection:** +For likely fraud (score 80+): +- Block the card immediately: "I'm blocking your card ending in [XXXX] right now to prevent additional fraudulent charges" +- Issue replacement: "We'll send you a replacement card with a new number" +- Process the claim: "You'll see the credit for this charge within 10 business days" + +For uncertain cases (score 40-79): +- Temporary block: "I'm placing a temporary hold on this card while we investigate" +- Escalate: "A specialist will contact you within 24 hours" + +**Concise responses:** +Keep responses to 3-4 short paragraphs maximum. Customers want action, not detailed analysis. + +## Example Investigation Flow + +Customer: "I see a $1,847.99 charge from 'LUXURY WATCHES INT' in Singapore on transaction TXN-89012. This is fraud. My customer ID is CUST-1001." + +**Your response to customer:** + +"I've reviewed your account and this transaction. This charge doesn't match your typical spending pattern, and you haven't made international purchases in the past 90 days. + +Here's what I'm doing: +- Blocking your card ending in 4532 right now to prevent any additional unauthorized charges +- Approving your dispute for the full $1,847.99 - you'll see the credit within 10 business days +- Sending you a replacement card with a new number within 5-7 business days + +Your dispute has been logged and meets the requirements under Regulation E for unauthorized electronic fund transfers. + +Is there anything else I can help you with today?" + +**What you actually did behind the scenes:** +1. Called account-agent → confirmed US-based customer, no international history +2. Called fraud-agent → received score 95/100 (critical risk) +3. Called merchant-agent → confirmed high fraud indicators +4. Called compliance-agent → logged under Regulation E +5. Made decision: transaction is fraudulent, block card immediately + +(Don't share the scores or technical details with the customer) + +**Note:** When talking to customers, use natural banking language like "approving your dispute." But for programmatic JSON responses, "recommendation" describes the TRANSACTION status, not the dispute claim status. + +## Programmatic Invocations + +When invoked from a pipeline or automated system (you'll receive transaction data without conversational context), respond with ONLY valid JSON. No explanatory text, no markdown formatting, no commentary before or after - just the JSON object. + +Required JSON format: +{ + "recommendation": "block_and_investigate" | "hold_for_review" | "approve", + "fraud_score": , + "confidence": "high" | "medium" | "low", + "reasoning": "" +} + +**Recommendation field definitions:** +- **"block_and_investigate"**: Transaction is fraudulent. Block the card immediately and investigate. +- **"hold_for_review"**: Unclear if fraudulent. Place temporary hold and escalate to human specialist. +- **"approve"**: Transaction is legitimate. Customer likely forgot about it or needs clarification. + +**Mapping from conversational actions:** +- If you would block the card → use "block_and_investigate" +- If you would escalate to specialist → use "hold_for_review" +- If transaction seems legitimate → use "approve" + +The pipeline will parse this JSON to make automated decisions. Any non-JSON response will cause processing failures. diff --git a/modules/ai-agents/examples/agents/fraud-agent-prompt.txt b/modules/ai-agents/examples/agents/fraud-agent-prompt.txt new file mode 100644 index 000000000..b2c8a26de --- /dev/null +++ b/modules/ai-agents/examples/agents/fraud-agent-prompt.txt @@ -0,0 +1,85 @@ +You are the fraud detection agent for ACME Bank's dispute resolution system. You specialize in analyzing transactions for fraud indicators and calculating risk scores. + +## Your Responsibilities + +- Calculate fraud risk scores (0-100 scale) +- Identify specific fraud indicators +- Provide risk assessment reasoning +- Return confidence levels with assessments + +## Available Tools + +1. **calculate_fraud_score**: Multi-factor fraud scoring + - Input: transaction_id, customer_id + - Returns: Fraud score (0-100), risk level, breakdown by factor, recommendation + +2. **get_risk_indicators**: Detailed fraud signal detection + - Input: transaction_id + - Returns: Array of risk indicators with severity levels + +## Risk Scoring Factors + +Consider these factors: + +1. **Location Risk** (0-30 points) + - International vs. customer's country + - City mismatch from customer's primary location + - High-risk countries + +2. **Merchant Risk** (0-25 points) + - Merchant reputation score + - Fraud report history + - Business verification status + +3. **Amount Risk** (0-25 points) + - Deviation from customer's average + - Unusually large for merchant category + - Round numbers (potential testing) + +4. **Velocity Risk** (0-10 points) + - Multiple transactions in short timeframe + - Rapid succession of purchases + - Geographic impossibility + +5. **Category Risk** (0-10 points) + - Outside customer's typical categories + - High-risk MCC codes + - Mismatch with spending patterns +## Risk Levels + +- **Critical (80-100)**: Almost certainly fraud, immediate action needed +- **High (60-79)**: Strong fraud indicators, hold for review +- **Medium (40-59)**: Some concerning factors, customer verification recommended +- **Low (20-39)**: Minor flags, likely legitimate +- **Minimal (0-19)**: No significant fraud indicators + +## Response Format + +Structure your analysis: + +"Fraud Risk Analysis: + +Fraud Score: [Score]/100 - [Risk Level] + +Risk Breakdown: +- Location Risk: [Score] - [Explanation] +- Merchant Risk: [Score] - [Explanation] +- Amount Risk: [Score] - [Explanation] +- Velocity Risk: [Score] - [Explanation] +- Category Risk: [Score] - [Explanation] + +Key Indicators: +- [Indicator 1] +- [Indicator 2] +- [Indicator 3] + +Recommendation: [block_and_investigate | hold_for_review | monitor_closely | approve]" + +## What You Don't Do + +- Don't retrieve account or transaction data (use what's provided) +- Don't verify merchants (that's merchant-agent's job) +- Don't make final dispute decisions (provide recommendation only) +- Don't log audit events + +Your job is fraud analysis only. Provide objective risk assessment based on available data. diff --git a/modules/ai-agents/examples/agents/merchant-agent-prompt.txt b/modules/ai-agents/examples/agents/merchant-agent-prompt.txt new file mode 100644 index 000000000..bb6ee31da --- /dev/null +++ b/modules/ai-agents/examples/agents/merchant-agent-prompt.txt @@ -0,0 +1,87 @@ +You are the merchant verification agent for ACME Bank's dispute resolution system. You specialize in verifying merchant legitimacy and reputation. + +## Your Responsibilities + +- Verify merchant reputation and legitimacy +- Look up merchant category codes (MCC) +- Identify known fraud patterns for merchant categories +- Provide merchant-specific insights + +## Available Tools + +1. **verify_merchant**: Merchant reputation lookup + - Input: merchant_name + - Returns: Reputation score, fraud reports, business verification, red flags + +2. **get_merchant_category**: MCC code analysis + - Input: mcc (4-digit code) + - Returns: Category details, typical transaction ranges, fraud risk profile + +## Reputation Scoring + +Interpret reputation scores: + +- **90-100**: Excellent, trusted merchant +- **70-89**: Good, established business +- **50-69**: Moderate, some concerns +- **30-49**: Poor, significant red flags +- **0-29**: High risk, strong fraud indicators + +## Red Flags to Report + +Watch for: +- High volume of fraud reports +- Recently established businesses in high-risk categories +- Unverified business registration +- Pattern of chargebacks +- Operates in high-risk jurisdictions +- Billing descriptor mismatches + +## Common Merchant Issues + +Be aware of legitimate merchant problems: + +- **Subscription services**: Known for duplicate billing, difficult cancellation +- **International hotels**: Currency conversion confusion, incidental charges +- **Online marketplaces**: Third-party sellers, billing descriptor confusion +- **Travel booking**: Pre-authorization holds, cancellation fee disputes + +## Response Format + +Structure your verification: + +"Merchant Verification Results: + +Merchant: [Name] +Reputation Score: [Score]/100 - [Level] +Verification Status: [Verified | Unverified | Unknown] + +Business Details: +- Country: [Country] +- Years in Operation: [Years] +- Registration: [Verified/Unverified] + +Fraud Reports: +- Total Reports: [Count] +- Recent (30 days): [Count] +- Confirmed Fraud Cases: [Count] + +Category Analysis (MCC [Code]): +- Category: [Category Name] +- Risk Profile: [High/Medium/Low] +- Typical Transaction Range: $[Min]-$[Max] + +Red Flags: +- [Flag 1] +- [Flag 2] + +Recommendation: [trusted_merchant | verify_subscription_details | manual_review_required | block_merchant]" + +## What You Don't Do + +- Don't calculate fraud scores (that's fraud-agent's job) +- Don't retrieve transaction data (that's account-agent's job) +- Don't make final dispute decisions +- Don't log audit events + +Your job is merchant verification only. Provide objective assessment of merchant legitimacy. diff --git a/modules/ai-agents/examples/memory_cache.yaml b/modules/ai-agents/examples/mcp-tools/caches/memory_cache.yaml similarity index 100% rename from modules/ai-agents/examples/memory_cache.yaml rename to modules/ai-agents/examples/mcp-tools/caches/memory_cache.yaml diff --git a/modules/ai-agents/examples/redpanda_cache.yaml b/modules/ai-agents/examples/mcp-tools/caches/redpanda_cache.yaml similarity index 100% rename from modules/ai-agents/examples/redpanda_cache.yaml rename to modules/ai-agents/examples/mcp-tools/caches/redpanda_cache.yaml diff --git a/modules/ai-agents/examples/mcp-tools/caches/session_cache.yaml b/modules/ai-agents/examples/mcp-tools/caches/session_cache.yaml new file mode 100644 index 000000000..851773db9 --- /dev/null +++ b/modules/ai-agents/examples/mcp-tools/caches/session_cache.yaml @@ -0,0 +1,13 @@ +# In-memory cache for session data +# Example of cache tool +# tag::complete[] +label: session-cache + +memory: + default_ttl: 300s + +meta: + mcp: + enabled: true + description: "In-memory cache for session data" +# end::complete[] diff --git a/modules/ai-agents/examples/mcp-tools/inputs/consume_redpanda.yaml b/modules/ai-agents/examples/mcp-tools/inputs/consume_redpanda.yaml new file mode 100644 index 000000000..841dd5aaa --- /dev/null +++ b/modules/ai-agents/examples/mcp-tools/inputs/consume_redpanda.yaml @@ -0,0 +1,22 @@ +# Consume events from Redpanda topics +# Use for event-driven AI agents, audit logs, or data change streams +label: consume-events + +# tag::component[] +redpanda: + seed_brokers: [ "${REDPANDA_BROKERS}" ] + topics: [ "user-events" ] + consumer_group: "mcp-event-processor" + start_from_oldest: true + tls: + enabled: true + sasl: + - mechanism: "${REDPANDA_SASL_MECHANISM}" + username: "${REDPANDA_SASL_USERNAME}" + password: "${REDPANDA_SASL_PASSWORD}" +# end::component[] + +meta: + mcp: + enabled: true + description: "Consume events from user-events topic" diff --git a/modules/ai-agents/examples/mcp-tools/inputs/event_driven_workflow.yaml b/modules/ai-agents/examples/mcp-tools/inputs/event_driven_workflow.yaml new file mode 100644 index 000000000..f549f88a0 --- /dev/null +++ b/modules/ai-agents/examples/mcp-tools/inputs/event_driven_workflow.yaml @@ -0,0 +1,39 @@ +# Event-driven workflow orchestration +# Use for multi-step processes, saga patterns, microservice coordination +label: order-workflow + +# tag::component[] +redpanda: + seed_brokers: [ "${REDPANDA_BROKERS}" ] + topics: [ "order-events" ] + consumer_group: "workflow-orchestrator" + tls: + enabled: true + sasl: + - mechanism: "${REDPANDA_SASL_MECHANISM}" + username: "${REDPANDA_SASL_USERNAME}" + password: "${REDPANDA_SASL_PASSWORD}" + processors: + - switch: + - check: this.event_type == "order_created" + processors: + - http: + url: "${secrets.INVENTORY_API}/reserve" + verb: POST + headers: + Content-Type: application/json + body: '{"order_id": "${! this.order_id }", "items": ${! json("items") }}' + - check: this.event_type == "payment_confirmed" + processors: + - http: + url: "${secrets.FULFILLMENT_API}/ship" + verb: POST + headers: + Content-Type: application/json + body: '{"order_id": "${! this.order_id }"}' +# end::component[] + +meta: + mcp: + enabled: true + description: "Process order events and orchestrate fulfillment workflow" diff --git a/modules/ai-agents/examples/generate_input.yaml b/modules/ai-agents/examples/mcp-tools/inputs/generate_input.yaml similarity index 100% rename from modules/ai-agents/examples/generate_input.yaml rename to modules/ai-agents/examples/mcp-tools/inputs/generate_input.yaml diff --git a/modules/ai-agents/examples/mcp-tools/inputs/read_events.yaml b/modules/ai-agents/examples/mcp-tools/inputs/read_events.yaml new file mode 100644 index 000000000..8627214f5 --- /dev/null +++ b/modules/ai-agents/examples/mcp-tools/inputs/read_events.yaml @@ -0,0 +1,21 @@ +# Read events from Redpanda +# Example of input tool +# tag::complete[] +label: read-events + +redpanda: # <1> + seed_brokers: ["${REDPANDA_BROKERS}"] + topics: ["events"] + consumer_group: "mcp-reader" + tls: + enabled: true + sasl: + - mechanism: SCRAM-SHA-256 + username: "${secrets.MCP_USERNAME}" + password: "${secrets.MCP_PASSWORD}" + +meta: + mcp: + enabled: true + description: "Read events from Redpanda" +# end::complete[] diff --git a/modules/ai-agents/examples/mcp-tools/inputs/stream_processing.yaml b/modules/ai-agents/examples/mcp-tools/inputs/stream_processing.yaml new file mode 100644 index 000000000..0afbd9c14 --- /dev/null +++ b/modules/ai-agents/examples/mcp-tools/inputs/stream_processing.yaml @@ -0,0 +1,28 @@ +# Process streaming data with aggregations +# Use for real-time analytics, windowed aggregations, computing metrics +label: process-sensor-data + +# tag::component[] +redpanda: + seed_brokers: [ "${REDPANDA_BROKERS}" ] + topics: [ "sensor-readings" ] + consumer_group: "analytics-processor" + tls: + enabled: true + sasl: + - mechanism: "${REDPANDA_SASL_MECHANISM}" + username: "${REDPANDA_SASL_USERNAME}" + password: "${REDPANDA_SASL_PASSWORD}" + processors: + - mapping: | + root.sensor_id = this.sensor_id + root.avg_temperature = this.readings.map_each(r -> r.temperature).mean() + root.max_temperature = this.readings.map_each(r -> r.temperature).max() + root.reading_count = this.readings.length() + root.window_end = now() +# end::component[] + +meta: + mcp: + enabled: true + description: "Process sensor readings and compute aggregations" diff --git a/modules/ai-agents/examples/mcp-tools/outputs/publish_event.yaml b/modules/ai-agents/examples/mcp-tools/outputs/publish_event.yaml new file mode 100644 index 000000000..748aea014 --- /dev/null +++ b/modules/ai-agents/examples/mcp-tools/outputs/publish_event.yaml @@ -0,0 +1,20 @@ +# Publish event to Redpanda +# Example of output tool +# tag::complete[] +label: publish-event + +redpanda: + seed_brokers: ["${REDPANDA_BROKERS}"] + topic: "processed-events" + tls: + enabled: true + sasl: + - mechanism: SCRAM-SHA-256 + username: "${secrets.MCP_USERNAME}" + password: "${secrets.MCP_PASSWORD}" + +meta: + mcp: + enabled: true + description: "Publish event to Redpanda" +# end::complete[] diff --git a/modules/ai-agents/examples/mcp-tools/outputs/publish_with_timestamp.yaml b/modules/ai-agents/examples/mcp-tools/outputs/publish_with_timestamp.yaml new file mode 100644 index 000000000..93fcd82f5 --- /dev/null +++ b/modules/ai-agents/examples/mcp-tools/outputs/publish_with_timestamp.yaml @@ -0,0 +1,25 @@ +# Publish event with timestamp +# Example of output tool with processors +# tag::complete[] +label: publish-with-timestamp + +processors: + - mutation: | + root = this + root.published_at = now() + +redpanda: + seed_brokers: ["${REDPANDA_BROKERS}"] + topic: "processed-events" + tls: + enabled: true + sasl: + - mechanism: SCRAM-SHA-256 + username: "${secrets.MCP_USERNAME}" + password: "${secrets.MCP_PASSWORD}" + +meta: + mcp: + enabled: true + description: "Add timestamp and publish to Redpanda" +# end::complete[] diff --git a/modules/ai-agents/examples/redpanda_output.yaml b/modules/ai-agents/examples/mcp-tools/outputs/redpanda_output.yaml similarity index 100% rename from modules/ai-agents/examples/redpanda_output.yaml rename to modules/ai-agents/examples/mcp-tools/outputs/redpanda_output.yaml diff --git a/modules/ai-agents/examples/redpanda_output_with_processors.yaml b/modules/ai-agents/examples/mcp-tools/outputs/redpanda_output_with_processors.yaml similarity index 97% rename from modules/ai-agents/examples/redpanda_output_with_processors.yaml rename to modules/ai-agents/examples/mcp-tools/outputs/redpanda_output_with_processors.yaml index eea4b323f..30e4a387b 100644 --- a/modules/ai-agents/examples/redpanda_output_with_processors.yaml +++ b/modules/ai-agents/examples/mcp-tools/outputs/redpanda_output_with_processors.yaml @@ -3,7 +3,7 @@ label: summarize_and_publish processors: - openai_chat_completion: api_key: "${secrets.OPENAI_API_KEY}" - model: "gpt-4" + model: "gpt-5.2" prompt: ${! json("question") } - mapping: | root.question = this.question diff --git a/modules/ai-agents/examples/mcp-tools/processors/calculate_fraud_score.yaml b/modules/ai-agents/examples/mcp-tools/processors/calculate_fraud_score.yaml new file mode 100644 index 000000000..280ddef6f --- /dev/null +++ b/modules/ai-agents/examples/mcp-tools/processors/calculate_fraud_score.yaml @@ -0,0 +1,108 @@ +label: calculate_fraud_score +mapping: | + root = match { + this.transaction_id == "TXN-89012" && this.customer_id == "CUST-1001" => { + "transaction_id": "TXN-89012", + "customer_id": "CUST-1001", + "fraud_score": 95, + "risk_level": "critical", + "score_breakdown": { + "location_risk": 35, + "merchant_risk": 30, + "amount_risk": 25, + "velocity_risk": 0, + "category_risk": 20 + }, + "factors_detected": [ + "unusual_location", + "questionable_merchant", + "unusual_amount", + "unusual_category" + ], + "reasoning": "International transaction from Singapore with no customer history of international purchases. High-value jewelry purchase (14.5x customer average). Merchant has significant fraud indicators.", + "recommendation": "block_and_investigate" + }, + this.transaction_id == "TXN-89013" && this.customer_id == "CUST-1001" => { + "transaction_id": "TXN-89013", + "customer_id": "CUST-1001", + "fraud_score": 8, + "risk_level": "minimal", + "score_breakdown": { + "location_risk": 0, + "merchant_risk": 0, + "amount_risk": 0, + "velocity_risk": 0, + "category_risk": 0 + }, + "factors_detected": [], + "reasoning": "Local transaction from trusted merchant in customer's typical spending category and amount range.", + "recommendation": "approve" + }, + this.transaction_id == "TXN-89014" && this.customer_id == "CUST-1002" => { + "transaction_id": "TXN-89014", + "customer_id": "CUST-1002", + "fraud_score": 52, + "risk_level": "medium", + "score_breakdown": { + "location_risk": 0, + "merchant_risk": 15, + "amount_risk": 0, + "velocity_risk": 8, + "category_risk": 0 + }, + "factors_detected": [ + "questionable_merchant", + "high_velocity" + ], + "reasoning": "Recurring subscription service with known billing issues. Multiple charges detected from same merchant. Moderate merchant reputation score.", + "recommendation": "monitor_closely" + }, + this.transaction_id == "TXN-89015" && this.customer_id == "CUST-1003" => { + "transaction_id": "TXN-89015", + "customer_id": "CUST-1003", + "fraud_score": 12, + "risk_level": "minimal", + "score_breakdown": { + "location_risk": 0, + "merchant_risk": 0, + "amount_risk": 5, + "velocity_risk": 0, + "category_risk": 0 + }, + "factors_detected": [ + "slightly_elevated_amount" + ], + "reasoning": "International hotel charge consistent with customer's frequent travel patterns. Amount within expected range for lodging category.", + "recommendation": "approve" + }, + _ => { + "transaction_id": this.transaction_id, + "customer_id": this.customer_id, + "fraud_score": 50, + "risk_level": "medium", + "score_breakdown": { + "location_risk": 0, + "merchant_risk": 0, + "amount_risk": 0, + "velocity_risk": 0, + "category_risk": 0 + }, + "factors_detected": [], + "reasoning": "Insufficient data to calculate accurate fraud score for this transaction/customer combination.", + "recommendation": "monitor_closely" + } + } + +meta: + mcp: + enabled: true + description: "Calculate fraud risk score based on transaction patterns and risk indicators. Use TXN-89012 through TXN-89015 with corresponding customer IDs for testing." + properties: + - name: transaction_id + type: string + description: "Transaction identifier to analyze (format TXN-XXXXX)" + required: true + - name: customer_id + type: string + description: "Customer identifier for historical analysis (format CUST-XXXX)" + required: true diff --git a/modules/ai-agents/examples/mcp-tools/processors/check_regulatory_requirements.yaml b/modules/ai-agents/examples/mcp-tools/processors/check_regulatory_requirements.yaml new file mode 100644 index 000000000..f8df06efd --- /dev/null +++ b/modules/ai-agents/examples/mcp-tools/processors/check_regulatory_requirements.yaml @@ -0,0 +1,116 @@ +label: check_regulatory_requirements +mapping: | + root = match { + this.dispute_type == "fraud" => { + "dispute_type": "fraud", + "regulations_applicable": [ + "Regulation E (Electronic Fund Transfer Act)", + "Fair Credit Billing Act", + "Card Network Rules (Visa/Mastercard)" + ], + "customer_rights": { + "liability_limit": 50.00, + "zero_liability_if_reported_promptly": true, + "notification_deadline_days": 60 + }, + "bank_obligations": { + "provisional_credit_required": true, + "provisional_credit_deadline_days": 10, + "investigation_deadline_days": 90, + "customer_notification_required": true + }, + "documentation_required": [ + "Customer dispute affidavit", + "Transaction details", + "Customer communication log", + "Investigation findings" + ], + "timeline": { + "acknowledge_dispute_hours": 24, + "provisional_credit_days": 10, + "final_decision_days": 90 + } + }, + this.dispute_type == "billing_error" => { + "dispute_type": "billing_error", + "regulations_applicable": [ + "Fair Credit Billing Act", + "Regulation Z (Truth in Lending)" + ], + "customer_rights": { + "dispute_window_days": 60, + "interest_suspension": true + }, + "bank_obligations": { + "acknowledge_dispute_days": 30, + "investigation_deadline_days": 90, + "correction_required_if_error_found": true + }, + "documentation_required": [ + "Billing statement", + "Customer dispute letter", + "Merchant communication (if any)", + "Investigation results" + ], + "timeline": { + "acknowledge_dispute_days": 30, + "resolution_days": 90 + } + }, + this.dispute_type == "service_not_received" => { + "dispute_type": "service_not_received", + "regulations_applicable": [ + "Fair Credit Billing Act", + "Card Network Chargeback Rules" + ], + "customer_rights": { + "chargeback_eligibility": true, + "dispute_window_days": 120 + }, + "bank_obligations": { + "verify_merchant_response": true, + "chargeback_processing_days": 45 + }, + "documentation_required": [ + "Proof of non-delivery or service failure", + "Merchant communication attempts", + "Order/booking confirmation", + "Merchant response (if obtained)" + ], + "timeline": { + "merchant_response_wait_days": 15, + "chargeback_filing_days": 120 + } + }, + _ => { + "dispute_type": "general", + "regulations_applicable": [ + "Fair Credit Billing Act" + ], + "customer_rights": { + "dispute_right": true, + "dispute_window_days": 60 + }, + "bank_obligations": { + "investigation_required": true, + "customer_notification_required": true + }, + "documentation_required": [ + "Customer dispute statement", + "Transaction evidence" + ], + "timeline": { + "standard_review_days": 30 + } + } + } + +meta: + mcp: + enabled: true + description: "Check regulatory requirements for dispute resolution based on dispute type." + properties: + - name: dispute_type + type: string + description: "Type of dispute (fraud, billing_error, service_not_received)" + required: true diff --git a/modules/ai-agents/examples/customer_enrichment.yaml b/modules/ai-agents/examples/mcp-tools/processors/customer_enrichment.yaml similarity index 100% rename from modules/ai-agents/examples/customer_enrichment.yaml rename to modules/ai-agents/examples/mcp-tools/processors/customer_enrichment.yaml diff --git a/modules/ai-agents/examples/mcp-tools/processors/enrich_order.yaml b/modules/ai-agents/examples/mcp-tools/processors/enrich_order.yaml new file mode 100644 index 000000000..604f1da65 --- /dev/null +++ b/modules/ai-agents/examples/mcp-tools/processors/enrich_order.yaml @@ -0,0 +1,15 @@ +# Enrich order with customer data +# Example of processor tool with HTTP call +# tag::complete[] +label: enrich-order + +processors: + - http: + url: "https://api.example.com/lookup" + verb: GET + +meta: + mcp: + enabled: true + description: "Enrich order with customer data" +# end::complete[] diff --git a/modules/ai-agents/examples/gcp_bigquery_select_processor.yaml b/modules/ai-agents/examples/mcp-tools/processors/gcp_bigquery_select_processor.yaml similarity index 100% rename from modules/ai-agents/examples/gcp_bigquery_select_processor.yaml rename to modules/ai-agents/examples/mcp-tools/processors/gcp_bigquery_select_processor.yaml diff --git a/modules/ai-agents/examples/mcp-tools/processors/get_customer_account.yaml b/modules/ai-agents/examples/mcp-tools/processors/get_customer_account.yaml new file mode 100644 index 000000000..9701bb209 --- /dev/null +++ b/modules/ai-agents/examples/mcp-tools/processors/get_customer_account.yaml @@ -0,0 +1,51 @@ +label: get_customer_account +mapping: | + root = match { + this.customer_id == "CUST-1001" => { + "customer_id": "CUST-1001", + "name": "Dana A.", + "email": "s****@example.com", + "account_type": "premium_checking", + "card_last_four": "4532", + "card_status": "active", + "member_since": "2019-03-15", + "location": "Seattle, WA, USA", + "phone_masked": "***-***-7890" + }, + this.customer_id == "CUST-1002" => { + "customer_id": "CUST-1002", + "name": "Alex T.", + "email": "m****@example.com", + "account_type": "standard_checking", + "card_last_four": "8821", + "card_status": "active", + "member_since": "2021-07-22", + "location": "San Francisco, CA, USA", + "phone_masked": "***-***-4521" + }, + this.customer_id == "CUST-1003" => { + "customer_id": "CUST-1003", + "name": "Quinn N.", + "email": "e****@example.com", + "account_type": "premium_credit", + "card_last_four": "2193", + "card_status": "active", + "member_since": "2020-11-08", + "location": "Austin, TX, USA", + "phone_masked": "***-***-3344" + }, + _ => { + "error": "customer_not_found", + "message": "No account found for customer ID: " + this.customer_id + } + } + +meta: + mcp: + enabled: true + description: "Retrieve customer account information with masked PII. Use CUST-1001, CUST-1002, or CUST-1003 for testing." + properties: + - name: customer_id + type: string + description: "Customer identifier (format CUST-XXXX)" + required: true diff --git a/modules/ai-agents/examples/mcp-tools/processors/get_customer_history.yaml b/modules/ai-agents/examples/mcp-tools/processors/get_customer_history.yaml new file mode 100644 index 000000000..183e4f84b --- /dev/null +++ b/modules/ai-agents/examples/mcp-tools/processors/get_customer_history.yaml @@ -0,0 +1,38 @@ +label: get_customer_history + +processors: + - mapping: | + let customer_id = this.customer_id + root = if $customer_id == "CUST-100" { + { + "customer_id": $customer_id, + "orders": [ + {"order_id": "ORD-12345", "status": "shipped", "total": 1299.99, "order_date": "2025-01-10"}, + {"order_id": "ORD-67890", "status": "processing", "total": 299.98, "order_date": "2025-01-14"}, + {"order_id": "ORD-11111", "status": "delivered", "total": 89.99, "order_date": "2024-12-20"} + ], + "total_orders": 3 + } + } else if $customer_id == "CUST-999" { + { + "customer_id": $customer_id, + "orders": [], + "total_orders": 0, + "message": "No orders found for this customer" + } + } else { + { + "error": true, + "message": "Customer not found" + } + } + +meta: + mcp: + enabled: true + description: "Retrieve order history. Use CUST-100 (has orders) or CUST-999 (no orders) for testing." + properties: + - name: customer_id + type: string + description: "The customer ID (format CUST-XXX)" + required: true diff --git a/modules/ai-agents/examples/mcp-tools/processors/get_merchant_category.yaml b/modules/ai-agents/examples/mcp-tools/processors/get_merchant_category.yaml new file mode 100644 index 000000000..f8ac390f1 --- /dev/null +++ b/modules/ai-agents/examples/mcp-tools/processors/get_merchant_category.yaml @@ -0,0 +1,90 @@ +label: get_merchant_category +mapping: | + root = match { + this.mcc == "5944" => { + "mcc": "5944", + "category": "Jewelry, Watch, Clock, and Silverware Stores", + "high_level_category": "retail_luxury", + "risk_profile": "high", + "typical_transaction_range": { + "min": 100, + "max": 5000, + "average": 850 + }, + "fraud_risk_notes": "High-value items, common fraud target, verify customer intent", + "common_fraud_patterns": [ + "Stolen card purchases", + "Account takeover", + "Reshipping schemes" + ] + }, + this.mcc == "5942" => { + "mcc": "5942", + "category": "Book Stores", + "high_level_category": "retail_general", + "risk_profile": "low", + "typical_transaction_range": { + "min": 10, + "max": 200, + "average": 45 + }, + "fraud_risk_notes": "Low fraud risk, common online purchase category", + "common_fraud_patterns": [] + }, + this.mcc == "4899" => { + "mcc": "4899", + "category": "Cable, Satellite, and Other Pay Television and Radio Services", + "high_level_category": "subscription_services", + "risk_profile": "medium", + "typical_transaction_range": { + "min": 9.99, + "max": 99.99, + "average": 29.99 + }, + "fraud_risk_notes": "Recurring billing, watch for duplicate charges and unauthorized subscriptions", + "common_fraud_patterns": [ + "Duplicate subscriptions", + "Unauthorized recurring charges", + "Failed cancellation processing" + ] + }, + this.mcc == "7011" => { + "mcc": "7011", + "category": "Lodging - Hotels, Motels, Resorts", + "high_level_category": "travel_hospitality", + "risk_profile": "medium", + "typical_transaction_range": { + "min": 80, + "max": 500, + "average": 180 + }, + "fraud_risk_notes": "Verify travel patterns, check for location consistency", + "common_fraud_patterns": [ + "Stolen card at booking sites", + "Account takeover for rewards redemption" + ] + }, + _ => { + "mcc": this.mcc, + "category": "Unknown Category", + "high_level_category": "unclassified", + "risk_profile": "unknown", + "typical_transaction_range": { + "min": 0, + "max": 0, + "average": 0 + }, + "fraud_risk_notes": "MCC not recognized, manual review recommended", + "common_fraud_patterns": [] + } + } + +meta: + mcp: + enabled: true + description: "Retrieve merchant category information including fraud risk level and common patterns based on MCC code." + properties: + - name: mcc + type: string + description: "Merchant Category Code (5944 for jewelry, 5942 for books, 4899 for streaming, 7011 for hotels)" + required: true diff --git a/modules/ai-agents/examples/mcp-tools/processors/get_order_status.yaml b/modules/ai-agents/examples/mcp-tools/processors/get_order_status.yaml new file mode 100644 index 000000000..55c962761 --- /dev/null +++ b/modules/ai-agents/examples/mcp-tools/processors/get_order_status.yaml @@ -0,0 +1,46 @@ +label: get_order_status +mapping: | + let order_id = this.order_id + root = if $order_id == "ORD-12345" { + { + "order_id": $order_id, + "status": "shipped", + "items": [{"name": "Laptop", "quantity": 1, "price": 1299.99}], + "total": 1299.99, + "order_date": "2025-01-10", + "customer_id": "CUST-100" + } + } else if $order_id == "ORD-67890" { + { + "order_id": $order_id, + "status": "processing", + "items": [{"name": "Headphones", "quantity": 2, "price": 149.99}], + "total": 299.98, + "order_date": "2025-01-14", + "customer_id": "CUST-100" + } + } else if $order_id == "ORD-99999" { + { + "error": "order_not_found", + "message": "Order not found" + } + } else { + { + "order_id": $order_id, + "status": "pending", + "items": [{"name": "Generic Item", "quantity": 1, "price": 49.99}], + "total": 49.99, + "order_date": "2025-01-15", + "customer_id": "CUST-999" + } + } + +meta: + mcp: + enabled: true + description: "Retrieve order status and details. Use ORD-12345 (shipped), ORD-67890 (processing), or ORD-99999 (not found) for testing." + properties: + - name: order_id + type: string + description: "The order ID (format ORD-XXXXX)" + required: true diff --git a/modules/ai-agents/examples/mcp-tools/processors/get_risk_indicators.yaml b/modules/ai-agents/examples/mcp-tools/processors/get_risk_indicators.yaml new file mode 100644 index 000000000..c4ccdf19d --- /dev/null +++ b/modules/ai-agents/examples/mcp-tools/processors/get_risk_indicators.yaml @@ -0,0 +1,129 @@ +label: get_risk_indicators +mapping: | + root = match { + this.transaction_id == "TXN-89012" => { + "transaction_id": "TXN-89012", + "risk_indicators": [ + { + "indicator": "international_transaction", + "severity": "high", + "description": "Transaction originated from Singapore, customer has no international transaction history" + }, + { + "indicator": "first_time_merchant", + "severity": "medium", + "description": "Customer has never transacted with this merchant before" + }, + { + "indicator": "unusual_category", + "severity": "high", + "description": "Jewelry purchase is outside customer's typical spending categories" + }, + { + "indicator": "high_amount", + "severity": "high", + "description": "Transaction amount is 14.5x customer's average transaction" + }, + { + "indicator": "merchant_flagged", + "severity": "critical", + "description": "Merchant has been flagged in fraud databases" + } + ], + "total_indicators": 5, + "critical_count": 1, + "high_count": 3, + "medium_count": 1, + "overall_assessment": "high_fraud_probability" + }, + this.transaction_id == "TXN-89013" => { + "transaction_id": "TXN-89013", + "risk_indicators": [ + { + "indicator": "known_merchant", + "severity": "none", + "description": "Example Marketplace is a recognized and trusted merchant" + } + ], + "total_indicators": 1, + "critical_count": 0, + "high_count": 0, + "medium_count": 0, + "overall_assessment": "low_fraud_probability" + }, + this.transaction_id == "TXN-89014" => { + "transaction_id": "TXN-89014", + "risk_indicators": [ + { + "indicator": "recurring_billing", + "severity": "low", + "description": "Subscription service with recurring charges" + }, + { + "indicator": "merchant_billing_issues", + "severity": "medium", + "description": "Merchant has known history of duplicate billing complaints" + }, + { + "indicator": "duplicate_charge_pattern", + "severity": "medium", + "description": "Multiple charges detected from same merchant in short timeframe" + } + ], + "total_indicators": 3, + "critical_count": 0, + "high_count": 0, + "medium_count": 2, + "low_count": 1, + "none_count": 0, + "overall_assessment": "medium_fraud_probability" + }, + this.transaction_id == "TXN-89015" => { + "transaction_id": "TXN-89015", + "risk_indicators": [ + { + "indicator": "international_transaction", + "severity": "low", + "description": "Transaction in France matches customer's travel history" + }, + { + "indicator": "travel_category", + "severity": "none", + "description": "Hotel charge is consistent with customer's frequent travel patterns" + }, + { + "indicator": "timing_matches_travel", + "severity": "none", + "description": "Transaction date aligns with customer's Paris trip" + } + ], + "total_indicators": 3, + "critical_count": 0, + "high_count": 0, + "medium_count": 0, + "low_count": 1, + "none_count": 2, + "overall_assessment": "low_fraud_probability" + }, + _ => { + "transaction_id": this.transaction_id, + "risk_indicators": [], + "total_indicators": 0, + "critical_count": 0, + "high_count": 0, + "medium_count": 0, + "low_count": 0, + "none_count": 0, + "overall_assessment": "insufficient_data" + } + } + +meta: + mcp: + enabled: true + description: "Retrieve fraud risk indicators for a transaction including severity levels and overall assessment. Use TXN-89012 through TXN-89015 for testing." + properties: + - name: transaction_id + type: string + description: "Transaction identifier to analyze (format TXN-XXXXX)" + required: true diff --git a/modules/ai-agents/examples/mcp-tools/processors/get_shipping_info.yaml b/modules/ai-agents/examples/mcp-tools/processors/get_shipping_info.yaml new file mode 100644 index 000000000..b0a15b497 --- /dev/null +++ b/modules/ai-agents/examples/mcp-tools/processors/get_shipping_info.yaml @@ -0,0 +1,38 @@ +label: get_shipping_info + +processors: + - mapping: | + let order_id = this.order_id + root = if $order_id == "ORD-12345" { + { + "order_id": $order_id, + "tracking_number": "FX1234567890", + "carrier": "Example Shipping", + "status": "in_transit", + "estimated_delivery": "2025-01-17", + "last_location": "San Francisco Distribution Center", + "last_update": "2025-01-15T14:30:00Z" + } + } else if $order_id == "ORD-67890" { + { + "order_id": $order_id, + "error": true, + "message": "Order has not shipped yet" + } + } else { + { + "order_id": $order_id, + "error": true, + "message": "No shipping information available" + } + } + +meta: + mcp: + enabled: true + description: "Get tracking and shipping information. ORD-12345 has shipping info, ORD-67890 has not shipped yet." + properties: + - name: order_id + type: string + description: "The order ID to track" + required: true diff --git a/modules/ai-agents/examples/mcp-tools/processors/get_transaction_details.yaml b/modules/ai-agents/examples/mcp-tools/processors/get_transaction_details.yaml new file mode 100644 index 000000000..82b44ba6f --- /dev/null +++ b/modules/ai-agents/examples/mcp-tools/processors/get_transaction_details.yaml @@ -0,0 +1,99 @@ +label: get_transaction_details +mapping: | + root = match { + this.transaction_id == "TXN-89012" => { + "transaction_id": "TXN-89012", + "customer_id": "CUST-1001", + "amount": 1847.99, + "currency": "USD", + "merchant": { + "name": "LUXURY WATCHES INT", + "category": "jewelry", + "country": "SG", + "mcc": "5944" + }, + "card_last_four": "4532", + "date": "2026-01-18T14:22:00Z", + "location": { + "city": "Singapore", + "country": "SG", + "coordinates": "1.3521,103.8198" + }, + "status": "posted" + }, + this.transaction_id == "TXN-89013" => { + "transaction_id": "TXN-89013", + "customer_id": "CUST-1001", + "amount": 47.83, + "currency": "USD", + "merchant": { + "name": "EXAMPLE MKTPLACE", + "category": "online_retail", + "country": "US", + "mcc": "5942" + }, + "card_last_four": "4532", + "date": "2026-01-15T10:15:00Z", + "location": { + "city": "Seattle", + "country": "US", + "coordinates": "47.6062,-122.3321" + }, + "status": "posted" + }, + this.transaction_id == "TXN-89014" => { + "transaction_id": "TXN-89014", + "customer_id": "CUST-1002", + "amount": 29.99, + "currency": "USD", + "merchant": { + "name": "EXAMPLE STREAMING", + "category": "subscription_service", + "country": "US", + "mcc": "4899" + }, + "card_last_four": "8821", + "date": "2025-12-15T00:00:01Z", + "location": { + "city": "San Francisco", + "country": "US", + "coordinates": "37.7749,-122.4194" + }, + "status": "posted", + "recurring": true + }, + this.transaction_id == "TXN-89015" => { + "transaction_id": "TXN-89015", + "customer_id": "CUST-1003", + "amount": 312.50, + "currency": "EUR", + "merchant": { + "name": "HOTEL PARIS", + "category": "lodging", + "country": "FR", + "mcc": "7011" + }, + "card_last_four": "2193", + "date": "2026-01-10T20:30:00Z", + "location": { + "city": "Paris", + "country": "FR", + "coordinates": "48.8566,2.3522" + }, + "status": "posted" + }, + _ => { + "error": "transaction_not_found", + "message": "No transaction found with ID: " + this.transaction_id + } + } + +meta: + mcp: + enabled: true + description: "Retrieve detailed transaction information including merchant, location, and amount. Use TXN-89012 through TXN-89015 for testing." + properties: + - name: transaction_id + type: string + description: "Transaction identifier (format TXN-XXXXX)" + required: true diff --git a/modules/ai-agents/examples/mcp-tools/processors/get_transaction_history.yaml b/modules/ai-agents/examples/mcp-tools/processors/get_transaction_history.yaml new file mode 100644 index 000000000..3c8107fd3 --- /dev/null +++ b/modules/ai-agents/examples/mcp-tools/processors/get_transaction_history.yaml @@ -0,0 +1,108 @@ +label: get_transaction_history +mapping: | + root = match { + this.customer_id == "CUST-1001" => { + "customer_id": "CUST-1001", + "analysis_period": "last_90_days", + "spending_patterns": { + "average_transaction": 127.45, + "median_transaction": 65.20, + "total_transactions": 87, + "total_amount": 11088.15 + }, + "category_breakdown": [ + {"category": "online_retail", "count": 42, "avg_amount": 78.50}, + {"category": "groceries", "count": 28, "avg_amount": 95.30}, + {"category": "restaurants", "count": 12, "avg_amount": 45.80}, + {"category": "gas_stations", "count": 5, "avg_amount": 62.00} + ], + "location_patterns": { + "primary_region": "US_West_Coast", + "international_transactions": 0, + "cities": ["Seattle", "Bellevue", "Tacoma"] + }, + "merchant_patterns": { + "recurring_merchants": ["EXAMPLE MKTPLACE", "EXAMPLE WHOLESALE", "EXAMPLE COFFEE"], + "first_time_merchants_this_period": 3 + }, + "risk_indicators": { + "unusual_activity": false, + "velocity_flags": 0, + "declined_transactions": 1 + } + }, + this.customer_id == "CUST-1002" => { + "customer_id": "CUST-1002", + "analysis_period": "last_90_days", + "spending_patterns": { + "average_transaction": 95.33, + "median_transaction": 52.10, + "total_transactions": 64, + "total_amount": 6101.12 + }, + "category_breakdown": [ + {"category": "subscription_service", "count": 15, "avg_amount": 29.99}, + {"category": "restaurants", "count": 25, "avg_amount": 68.40}, + {"category": "online_retail", "count": 18, "avg_amount": 110.20}, + {"category": "entertainment", "count": 6, "avg_amount": 45.00} + ], + "location_patterns": { + "primary_region": "US_West_Coast", + "international_transactions": 0, + "cities": ["San Francisco", "Oakland", "San Jose"] + }, + "merchant_patterns": { + "recurring_merchants": ["EXAMPLE STREAMING", "EXAMPLE MEDIA", "EXAMPLE AUDIO"], + "first_time_merchants_this_period": 7 + }, + "risk_indicators": { + "unusual_activity": false, + "velocity_flags": 0, + "declined_transactions": 0 + } + }, + this.customer_id == "CUST-1003" => { + "customer_id": "CUST-1003", + "analysis_period": "last_90_days", + "spending_patterns": { + "average_transaction": 215.67, + "median_transaction": 145.00, + "total_transactions": 52, + "total_amount": 11214.84 + }, + "category_breakdown": [ + {"category": "travel", "count": 8, "avg_amount": 650.00}, + {"category": "lodging", "count": 6, "avg_amount": 380.50}, + {"category": "restaurants", "count": 22, "avg_amount": 85.20}, + {"category": "online_retail", "count": 16, "avg_amount": 95.75} + ], + "location_patterns": { + "primary_region": "US_South", + "international_transactions": 3, + "cities": ["Austin", "Houston", "Dallas", "Paris", "London"] + }, + "merchant_patterns": { + "recurring_merchants": ["EXAMPLE AIRLINES", "EXAMPLE HOTEL", "EXAMPLE TRAVEL"], + "first_time_merchants_this_period": 12 + }, + "risk_indicators": { + "unusual_activity": false, + "velocity_flags": 0, + "declined_transactions": 0 + } + }, + _ => { + "error": "customer_not_found", + "message": "No transaction history found for customer ID: " + this.customer_id + } + } + +meta: + mcp: + enabled: true + description: "Retrieve customer transaction history with spending patterns, category breakdown, and risk indicators. Use CUST-1001, CUST-1002, or CUST-1003 for testing." + properties: + - name: customer_id + type: string + description: "Customer identifier (format CUST-XXXX)" + required: true diff --git a/modules/ai-agents/examples/mcp-tools/processors/get_weather_complete.yaml b/modules/ai-agents/examples/mcp-tools/processors/get_weather_complete.yaml new file mode 100644 index 000000000..5e57cc929 --- /dev/null +++ b/modules/ai-agents/examples/mcp-tools/processors/get_weather_complete.yaml @@ -0,0 +1,55 @@ +# Complete weather tool with validation, error handling, and response formatting +# tag::complete[] +label: get-weather + +processors: + # Validate and sanitize input + - label: validate_city + mutation: | + root.city = if this.city.or("").trim() == "" { + throw("city is required") + } else { + this.city.trim().lowercase().re_replace_all("[^a-z\\s\\-]", "") + } + root.units = this.units.or("metric") + + # Fetch weather data + - label: fetch_weather + try: + - http: + url: 'https://wttr.in/${! json("city") }?format=j1' + verb: GET + timeout: 10s + + - mutation: | + root.weather = { + "location": this.nearest_area.0.areaName.0.value, + "country": this.nearest_area.0.country.0.value, + "temperature_c": this.current_condition.0.temp_C, + "temperature_f": this.current_condition.0.temp_F, + "condition": this.current_condition.0.weatherDesc.0.value, + "humidity": this.current_condition.0.humidity, + "wind_kph": this.current_condition.0.windspeedKmph + } + + # Handle errors gracefully + - label: handle_errors + catch: + - mutation: | + root.error = true + root.message = "Failed to fetch weather: " + error() + +meta: + mcp: + enabled: true + description: "Get current weather for a city. Returns temperature, conditions, humidity, and wind speed." + properties: + - name: city + type: string + description: "City name (e.g., 'London', 'New York', 'Tokyo')" + required: true + - name: units + type: string + description: "Temperature units: 'metric' or 'imperial' (default: metric)" + required: false +# end::complete[] diff --git a/modules/ai-agents/examples/mcp-tools/processors/get_weather_simple.yaml b/modules/ai-agents/examples/mcp-tools/processors/get_weather_simple.yaml new file mode 100644 index 000000000..445bf7679 --- /dev/null +++ b/modules/ai-agents/examples/mcp-tools/processors/get_weather_simple.yaml @@ -0,0 +1,17 @@ +# Simple weather tool - minimal example +# tag::complete[] +http: + url: "https://wttr.in/${! this.city }?format=j1" + verb: GET + +meta: + mcp: + enabled: true + name: get_weather + description: "Get current weather for a city" + properties: + - name: city + type: string + description: "City name" + required: true +# end::complete[] diff --git a/modules/ai-agents/examples/http_processor.yaml b/modules/ai-agents/examples/mcp-tools/processors/http_processor.yaml similarity index 100% rename from modules/ai-agents/examples/http_processor.yaml rename to modules/ai-agents/examples/mcp-tools/processors/http_processor.yaml diff --git a/modules/ai-agents/examples/mcp-tools/processors/log_audit_event.yaml b/modules/ai-agents/examples/mcp-tools/processors/log_audit_event.yaml new file mode 100644 index 000000000..57b0a81a5 --- /dev/null +++ b/modules/ai-agents/examples/mcp-tools/processors/log_audit_event.yaml @@ -0,0 +1,60 @@ +label: log_audit_event +processors: + - mapping: | + root = { + "audit_id": uuid_v4(), + "timestamp": now(), + "event_type": "dispute_investigation", + "transaction_id": this.transaction_id, + "customer_id": this.customer_id, + "agent_decision": this.decision, + "risk_score": this.risk_score, + "evidence_reviewed": this.evidence, + "outcome": this.outcome, + "escalated": this.escalated, + "compliance_notes": this.notes, + "logged_by": "dispute-resolution-agent", + "status": "recorded" + } + + - log: + level: INFO + message: "Compliance audit event: ${!json()}" + +meta: + mcp: + enabled: true + description: "Log compliance audit events for dispute resolution. Records customer ID, transaction details, decision, and notes." + properties: + - name: customer_id + type: string + description: "Customer identifier (format CUST-XXXX)" + required: true + - name: transaction_id + type: string + description: "Transaction identifier (format TXN-XXXXX)" + required: true + - name: decision + type: string + description: "Dispute resolution decision (approve_refund, deny_claim, etc.)" + required: true + - name: risk_score + type: number + description: "Calculated fraud risk score (0-100)" + required: true + - name: evidence + type: object + description: "Evidence reviewed during investigation" + required: true + - name: outcome + type: string + description: "Final outcome of the dispute (approved, denied, escalated, pending)" + required: true + - name: escalated + type: boolean + description: "Whether case was escalated for manual review" + required: false + - name: notes + type: string + description: "Additional compliance notes" + required: false diff --git a/modules/ai-agents/examples/mcp-tools/processors/lookup_customer.yaml b/modules/ai-agents/examples/mcp-tools/processors/lookup_customer.yaml new file mode 100644 index 000000000..6fba9edfa --- /dev/null +++ b/modules/ai-agents/examples/mcp-tools/processors/lookup_customer.yaml @@ -0,0 +1,23 @@ +# Look up customer by ID from PostgreSQL +# Example of sql_select processor tool +# tag::complete[] +label: lookup-customer # <1> + +sql_select: # <2> + driver: postgres + dsn: "${secrets.DATABASE_URL}" + table: customers + columns: ["id", "name", "email", "plan"] + where: id = ? + args_mapping: '[this.customer_id]' + +meta: # <3> + mcp: + enabled: true + description: "Look up a customer by ID and return their profile." + properties: + - name: customer_id + type: string + description: "The customer's unique identifier" + required: true +# end::complete[] diff --git a/modules/ai-agents/examples/observable_tool.yaml b/modules/ai-agents/examples/mcp-tools/processors/observable_tool.yaml similarity index 100% rename from modules/ai-agents/examples/observable_tool.yaml rename to modules/ai-agents/examples/mcp-tools/processors/observable_tool.yaml diff --git a/modules/ai-agents/examples/mcp-tools/processors/openai_chat.yaml b/modules/ai-agents/examples/mcp-tools/processors/openai_chat.yaml new file mode 100644 index 000000000..b8c202a66 --- /dev/null +++ b/modules/ai-agents/examples/mcp-tools/processors/openai_chat.yaml @@ -0,0 +1,27 @@ +# OpenAI chat completion for sentiment analysis +# Use for text generation, classification, summarization +label: analyze-feedback + +# tag::component[] +openai_chat_completion: + api_key: "${secrets.OPENAI_API_KEY}" + model: "gpt-5.2" + prompt: | + Analyze this customer feedback and provide: + 1. Sentiment (positive/negative/neutral) + 2. Key themes + 3. Actionable insights + + Feedback: ${! json(feedback_text) } + max_tokens: 500 +# end::component[] + +meta: + mcp: + enabled: true + description: "Analyze customer feedback for sentiment and themes" + properties: + - name: feedback_text + type: string + description: "The customer feedback text to analyze" + required: true diff --git a/modules/ai-agents/examples/mcp-tools/processors/openai_embeddings.yaml b/modules/ai-agents/examples/mcp-tools/processors/openai_embeddings.yaml new file mode 100644 index 000000000..0c7f15f8e --- /dev/null +++ b/modules/ai-agents/examples/mcp-tools/processors/openai_embeddings.yaml @@ -0,0 +1,20 @@ +# Generate embeddings with OpenAI +# Use for semantic search, RAG pipelines, similarity matching +label: generate-embeddings + +# tag::component[] +openai_embeddings: + api_key: "${secrets.OPENAI_API_KEY}" + model: "text-embedding-3-small" + text: ${! json("content") } +# end::component[] + +meta: + mcp: + enabled: true + description: "Generate vector embeddings for text content" + properties: + - name: content + type: string + description: "Text content to generate embeddings for" + required: true diff --git a/modules/ai-agents/examples/order_workflow.yaml b/modules/ai-agents/examples/mcp-tools/processors/order_workflow.yaml similarity index 95% rename from modules/ai-agents/examples/order_workflow.yaml rename to modules/ai-agents/examples/mcp-tools/processors/order_workflow.yaml index ad0dc29ee..aebaac897 100644 --- a/modules/ai-agents/examples/order_workflow.yaml +++ b/modules/ai-agents/examples/mcp-tools/processors/order_workflow.yaml @@ -39,7 +39,7 @@ processors: root = this.merge({ "processing_tier": "premium", "processing_time_estimate": "2-4 hours", - "assigned_rep": "premium-team@company.com", + "assigned_rep": "premium-team@example.com", "priority_score": 95 }) @@ -51,7 +51,7 @@ processors: root = this.merge({ "processing_tier": "vip", "processing_time_estimate": "1-2 hours", - "assigned_rep": "vip-team@company.com", + "assigned_rep": "vip-team@example.com", "priority_score": 90, "perks": ["expedited_shipping", "white_glove_service"] }) @@ -63,7 +63,7 @@ processors: root = this.merge({ "processing_tier": "standard", "processing_time_estimate": "24-48 hours", - "assigned_rep": "support@company.com", + "assigned_rep": "support@example.com", "priority_score": 50 }) diff --git a/modules/ai-agents/examples/mcp-tools/processors/search_jira.yaml b/modules/ai-agents/examples/mcp-tools/processors/search_jira.yaml new file mode 100644 index 000000000..7ef2d6673 --- /dev/null +++ b/modules/ai-agents/examples/mcp-tools/processors/search_jira.yaml @@ -0,0 +1,31 @@ +# Search Jira issues using JQL +# Requires Enterprise license +# tag::complete[] +label: search-jira + +processors: + - generate: + count: 1 + mapping: | + root.jql = this.jql + root.maxResults = this.max_results.or(50) + root.fields = ["key", "summary", "status", "assignee", "priority"] + - jira: + base_url: "${secrets.JIRA_BASE_URL}" + username: "${secrets.JIRA_USERNAME}" + api_token: "${secrets.JIRA_API_TOKEN}" + +meta: + mcp: + enabled: true + description: "Search Jira issues using JQL. Returns matching issues with key, summary, status, assignee, and priority." + properties: + - name: jql + type: string + description: "JQL query (e.g., 'project = DOC AND status = Open')" + required: true + - name: max_results + type: number + description: "Maximum issues to return (default: 50)" + required: false +# end::complete[] diff --git a/modules/ai-agents/examples/mcp-tools/processors/transform_validate.yaml b/modules/ai-agents/examples/mcp-tools/processors/transform_validate.yaml new file mode 100644 index 000000000..b05b619ad --- /dev/null +++ b/modules/ai-agents/examples/mcp-tools/processors/transform_validate.yaml @@ -0,0 +1,40 @@ +# Transform and validate data with Bloblang +# Use for parsing, validation, filtering, enrichment +label: transform-user-data + +processors: + # tag::mapping[] + - mapping: | + # Parse and validate incoming data + root.user_id = this.user_id.or(throw("user_id is required")) + root.timestamp = now().ts_format("2006-01-02T15:04:05Z07:00") + + # Transform and enrich + root.email_domain = this.email.split("@").index(1) + root.is_premium = this.subscription_tier == "premium" + + # Filter sensitive data + root.profile = this.profile.or({}).without("ssn", "credit_card") + # end::mapping[] + +meta: + mcp: + enabled: true + description: "Transform and validate user data" + properties: + - name: user_id + type: string + description: "User identifier" + required: true + - name: email + type: string + description: "User email address" + required: true + - name: subscription_tier + type: string + description: "Subscription level" + required: false + - name: profile + type: object + description: "User profile data" + required: false diff --git a/modules/ai-agents/examples/mcp-tools/processors/verify_merchant.yaml b/modules/ai-agents/examples/mcp-tools/processors/verify_merchant.yaml new file mode 100644 index 000000000..e0ad87731 --- /dev/null +++ b/modules/ai-agents/examples/mcp-tools/processors/verify_merchant.yaml @@ -0,0 +1,126 @@ +label: verify_merchant +mapping: | + root = match { + this.merchant_name == "LUXURY WATCHES INT" => { + "merchant_name": "LUXURY WATCHES INT", + "merchant_id": "MER-99912", + "reputation_score": 12, + "reputation_level": "high_risk", + "verification_status": "unverified", + "fraud_reports": { + "total_reports": 247, + "recent_reports_30d": 42, + "confirmed_fraud_cases": 89 + }, + "business_details": { + "country": "Singapore", + "years_in_operation": 1, + "registration_verified": false + }, + "red_flags": [ + "High volume of fraud reports", + "Recently established business", + "Unverified business registration", + "Operates in high-risk category", + "Pattern of chargebacks" + ], + "recommendation": "block_merchant" + }, + this.merchant_name == "EXAMPLE MKTPLACE" => { + "merchant_name": "EXAMPLE MKTPLACE", + "merchant_id": "MER-00001", + "reputation_score": 98, + "reputation_level": "excellent", + "verification_status": "verified", + "fraud_reports": { + "total_reports": 1203, + "recent_reports_30d": 15, + "confirmed_fraud_cases": 0 + }, + "business_details": { + "country": "USA", + "years_in_operation": 20, + "registration_verified": true, + "parent_company": "Example Organization" + }, + "red_flags": [], + "recommendation": "trusted_merchant" + }, + this.merchant_name == "EXAMPLE STREAMING" => { + "merchant_name": "EXAMPLE STREAMING", + "merchant_id": "MER-45678", + "reputation_score": 65, + "reputation_level": "moderate", + "verification_status": "verified", + "fraud_reports": { + "total_reports": 892, + "recent_reports_30d": 67, + "confirmed_fraud_cases": 12 + }, + "business_details": { + "country": "USA", + "years_in_operation": 5, + "registration_verified": true + }, + "red_flags": [ + "Known billing system issues", + "Frequent duplicate charge complaints", + "Difficult cancellation process" + ], + "common_issues": [ + "Duplicate subscriptions", + "Failed cancellation processing", + "Unclear billing descriptors" + ], + "recommendation": "verify_subscription_details" + }, + this.merchant_name == "HOTEL PARIS" => { + "merchant_name": "HOTEL PARIS", + "merchant_id": "MER-78234", + "reputation_score": 88, + "reputation_level": "trusted", + "verification_status": "verified", + "fraud_reports": { + "total_reports": 45, + "recent_reports_30d": 2, + "confirmed_fraud_cases": 0 + }, + "business_details": { + "country": "France", + "years_in_operation": 15, + "registration_verified": true, + "chain": "Independent Boutique Hotels" + }, + "red_flags": [], + "pricing": { + "average_room_rate_eur": 280, + "typical_range_eur": "220-350" + }, + "recommendation": "legitimate_merchant" + }, + _ => { + "merchant_name": this.merchant_name, + "reputation_score": 50, + "reputation_level": "unknown", + "verification_status": "not_found", + "fraud_reports": { + "total_reports": 0, + "recent_reports_30d": 0, + "confirmed_fraud_cases": 0 + }, + "business_details": {}, + "red_flags": [], + "message": "Merchant not found in verification database", + "recommendation": "manual_review_required" + } + } + +meta: + mcp: + enabled: true + description: "Verify merchant reputation and fraud history. Use LUXURY WATCHES INT (high risk), EXAMPLE MKTPLACE (trusted), EXAMPLE STREAMING (moderate), or HOTEL PARIS (trusted) for testing." + properties: + - name: merchant_name + type: string + description: "Merchant name as it appears on transaction" + required: true diff --git a/modules/ai-agents/examples/weather_service.yaml b/modules/ai-agents/examples/mcp-tools/processors/weather_service.yaml similarity index 100% rename from modules/ai-agents/examples/weather_service.yaml rename to modules/ai-agents/examples/mcp-tools/processors/weather_service.yaml diff --git a/modules/ai-agents/examples/mcp-tools/snippets/bloblang_this_context.yaml b/modules/ai-agents/examples/mcp-tools/snippets/bloblang_this_context.yaml new file mode 100644 index 000000000..18409d0a1 --- /dev/null +++ b/modules/ai-agents/examples/mcp-tools/snippets/bloblang_this_context.yaml @@ -0,0 +1,15 @@ +# Bloblang 'this' context examples +# Use 'this' to access message fields in mutation, mapping, or args_mapping + +# tag::mutation[] +mutation: | + root.search_query = this.query.lowercase() + root.max_results = this.limit.or(10) +# end::mutation[] + +# tag::args_mapping[] +sql_select: + table: orders + where: customer_id = ? AND status = ? + args_mapping: '[this.customer_id, this.status.or("active")]' +# end::args_mapping[] diff --git a/modules/ai-agents/examples/mcp-tools/snippets/defaults.yaml b/modules/ai-agents/examples/mcp-tools/snippets/defaults.yaml new file mode 100644 index 000000000..844ea3326 --- /dev/null +++ b/modules/ai-agents/examples/mcp-tools/snippets/defaults.yaml @@ -0,0 +1,24 @@ +# Handling optional parameters with defaults + +# tag::mutation[] +mutation: | + root.city = this.city # Required - will error if missing + root.units = this.units.or("metric") # Optional with default + root.limit = this.limit.or(10).number() # Optional, converted to number +# end::mutation[] + +# tag::properties[] +properties: + - name: city + type: string + description: "City name to look up" + required: true + - name: units + type: string + description: "Temperature units: 'metric' or 'imperial' (default: metric)" + required: false + - name: limit + type: number + description: "Max results (default: 10)" + required: false +# end::properties[] diff --git a/modules/ai-agents/examples/mcp-tools/snippets/interpolation.yaml b/modules/ai-agents/examples/mcp-tools/snippets/interpolation.yaml new file mode 100644 index 000000000..90bd0d8c5 --- /dev/null +++ b/modules/ai-agents/examples/mcp-tools/snippets/interpolation.yaml @@ -0,0 +1,13 @@ +# Bloblang interpolation in string fields +# Use ${! ... } to embed expressions in URLs, topics, headers + +# tag::http_url[] +http: + url: 'https://api.weather.com/v1/current?city=${! json("city") }&units=${! json("units").or("metric") }' +# end::http_url[] + +# tag::redpanda_topic[] +redpanda: + seed_brokers: ["${REDPANDA_BROKERS}"] # <1> + topic: '${! json("topic_name") }' # <2> +# end::redpanda_topic[] diff --git a/modules/ai-agents/examples/mcp-tools/snippets/secrets.yaml b/modules/ai-agents/examples/mcp-tools/snippets/secrets.yaml new file mode 100644 index 000000000..3ac6f98db --- /dev/null +++ b/modules/ai-agents/examples/mcp-tools/snippets/secrets.yaml @@ -0,0 +1,14 @@ +# Using secrets in tool configurations +# Reference secrets with ${secrets.SECRET_NAME} syntax + +# tag::example[] +http: + url: "https://api.example.com/data" + headers: + Authorization: "Bearer ${secrets.API_TOKEN}" + +sql_select: + driver: postgres + dsn: "${secrets.DATABASE_URL}" + table: customers +# end::example[] diff --git a/modules/ai-agents/examples/test-mcp-examples.sh b/modules/ai-agents/examples/mcp-tools/test-mcp-tools.sh similarity index 62% rename from modules/ai-agents/examples/test-mcp-examples.sh rename to modules/ai-agents/examples/mcp-tools/test-mcp-tools.sh index 068f7306a..f815a561f 100755 --- a/modules/ai-agents/examples/test-mcp-examples.sh +++ b/modules/ai-agents/examples/mcp-tools/test-mcp-tools.sh @@ -7,8 +7,8 @@ # 2. MCP metadata validation (enabled, description, properties) # # Usage: -# ./test-mcp-examples.sh # Run all tests -# ./test-mcp-examples.sh --lint-only # Only lint, skip metadata validation +# ./test-mcp-tools.sh # Run all tests +# ./test-mcp-tools.sh --lint-only # Only lint, skip metadata validation # # Unlike rp-connect-docs, Cloud MCP tools cannot be tested with # `rpk connect run` because they are standalone tool definitions, not @@ -24,10 +24,13 @@ BLUE='\033[0;34m' CYAN='\033[0;36m' NC='\033[0m' -# Get script directory +# Get script directory (script lives inside mcp-tools/) SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" cd "$SCRIPT_DIR" +# Component type directories +COMPONENT_DIRS=("inputs" "outputs" "processors" "caches") + # Counters TOTAL_TOOLS=0 PASSED_LINT=0 @@ -61,20 +64,24 @@ echo -e "📦 ${CYAN}SECTION 1: MCP Tool Linting${NC}" echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" echo "" -# Count YAML files -file_count=$(find . -maxdepth 1 -name "*.yaml" | wc -l | tr -d ' ') -TOTAL_TOOLS=$file_count - -echo -n -e "${BLUE}📁 examples/${NC} ($file_count files)... " - -if output=$(rpk connect mcp-server lint --skip-env-var-check . 2>&1); then - echo -e "${GREEN}✓ PASSED${NC}" - PASSED_LINT=$file_count -else - echo -e "${RED}✗ FAILED${NC}" - echo "$output" | sed 's/^/ /' | head -20 - FAILED_LINT=$file_count -fi +for dir in "${COMPONENT_DIRS[@]}"; do + if [[ -d "$dir" ]]; then + file_count=$(find "$dir" -maxdepth 1 -name "*.yaml" | wc -l | tr -d ' ') + if [[ $file_count -gt 0 ]]; then + TOTAL_TOOLS=$((TOTAL_TOOLS + file_count)) + echo -n -e "${BLUE}📁 $dir/${NC} ($file_count files)... " + + if output=$(rpk connect mcp-server lint --skip-env-var-check "$dir" 2>&1); then + echo -e "${GREEN}✓ PASSED${NC}" + PASSED_LINT=$((PASSED_LINT + file_count)) + else + echo -e "${RED}✗ FAILED${NC}" + echo "$output" | sed 's/^/ /' | head -20 + FAILED_LINT=$((FAILED_LINT + file_count)) + fi + fi + fi +done # ============================================================================ # SECTION 2: MCP Metadata Validation @@ -99,17 +106,19 @@ if $RUN_METADATA; then fi if $RUN_METADATA; then - for file in *.yaml; do - if [[ -f "$file" ]]; then - echo -n -e " ${BLUE}$file${NC}... " - - # Check if .meta.mcp exists - if $use_yq; then - mcp_exists=$(yq eval '.meta.mcp' "$file" 2>/dev/null) - enabled=$(yq eval '.meta.mcp.enabled' "$file" 2>/dev/null) - description=$(yq eval '.meta.mcp.description' "$file" 2>/dev/null) - else - mcp_exists=$(python3 -c " + for dir in "${COMPONENT_DIRS[@]}"; do + if [[ -d "$dir" ]]; then + for file in "$dir"/*.yaml; do + if [[ -f "$file" ]]; then + echo -n -e " ${BLUE}$file${NC}... " + + # Check if .meta.mcp exists + if $use_yq; then + mcp_exists=$(yq eval '.meta.mcp' "$file" 2>/dev/null) + enabled=$(yq eval '.meta.mcp.enabled' "$file" 2>/dev/null) + description=$(yq eval '.meta.mcp.description' "$file" 2>/dev/null) + else + mcp_exists=$(python3 -c " import yaml try: with open('$file') as f: @@ -120,7 +129,7 @@ try: except: print('null') " 2>/dev/null) - enabled=$(python3 -c " + enabled=$(python3 -c " import yaml try: with open('$file') as f: @@ -130,7 +139,7 @@ try: except: print('null') " 2>/dev/null) - description=$(python3 -c " + description=$(python3 -c " import yaml try: with open('$file') as f: @@ -140,22 +149,24 @@ try: except: print('null') " 2>/dev/null) - fi - - # Validate - if [[ "$mcp_exists" == "null" || -z "$mcp_exists" ]]; then - echo -e "${YELLOW}SKIPPED${NC} (no MCP metadata)" - SKIPPED=$((SKIPPED + 1)) - elif [[ "$enabled" != "true" ]]; then - echo -e "${YELLOW}WARNING${NC} (mcp.enabled not true)" - SKIPPED=$((SKIPPED + 1)) - elif [[ "$description" == "null" || -z "$description" ]]; then - echo -e "${RED}FAILED${NC} (missing description)" - FAILED_METADATA=$((FAILED_METADATA + 1)) - else - echo -e "${GREEN}PASSED${NC}" - PASSED_METADATA=$((PASSED_METADATA + 1)) - fi + fi + + # Validate + if [[ "$mcp_exists" == "null" || -z "$mcp_exists" ]]; then + echo -e "${YELLOW}SKIPPED${NC} (no MCP metadata)" + SKIPPED=$((SKIPPED + 1)) + elif [[ "$enabled" != "true" ]]; then + echo -e "${YELLOW}WARNING${NC} (mcp.enabled not true)" + SKIPPED=$((SKIPPED + 1)) + elif [[ "$description" == "null" || -z "$description" ]]; then + echo -e "${RED}FAILED${NC} (missing description)" + FAILED_METADATA=$((FAILED_METADATA + 1)) + else + echo -e "${GREEN}PASSED${NC}" + PASSED_METADATA=$((PASSED_METADATA + 1)) + fi + fi + done fi done fi @@ -173,16 +184,20 @@ echo "━━━━━━━━━━━━━━━━━━━━━━━━ echo "" secrets_issues=0 -for file in *.yaml; do - if [[ -f "$file" ]]; then - # Check for non-Cloud secrets patterns (${VAR} without secrets. prefix) - # Exclude: - # - ${! ... } which is Bloblang interpolation - # - ${REDPANDA_BROKERS} which is platform-injected - if grep -E '\$\{[A-Z_]+\}' "$file" | grep -v '\${secrets\.' | grep -v '\${!' | grep -v '\${REDPANDA_BROKERS}' > /dev/null 2>&1; then - echo -e " ${BLUE}$file${NC}... ${YELLOW}WARNING${NC} (uses env vars instead of \${secrets.X})" - secrets_issues=$((secrets_issues + 1)) - fi +for dir in "${COMPONENT_DIRS[@]}"; do + if [[ -d "$dir" ]]; then + for file in "$dir"/*.yaml; do + if [[ -f "$file" ]]; then + # Check for non-Cloud secrets patterns (${VAR} without secrets. prefix) + # Exclude: + # - ${! ... } which is Bloblang interpolation + # - ${REDPANDA_BROKERS} which is platform-injected + if grep -E '\$\{[A-Z_]+\}' "$file" | grep -v '\${secrets\.' | grep -v '\${!' | grep -v '\${REDPANDA_BROKERS}' > /dev/null 2>&1; then + echo -e " ${BLUE}$file${NC}... ${YELLOW}WARNING${NC} (uses env vars instead of \${secrets.X})" + secrets_issues=$((secrets_issues + 1)) + fi + fi + done fi done diff --git a/modules/ai-agents/examples/pipelines/agent-transformation.yaml b/modules/ai-agents/examples/pipelines/agent-transformation.yaml new file mode 100644 index 000000000..b4c14609c --- /dev/null +++ b/modules/ai-agents/examples/pipelines/agent-transformation.yaml @@ -0,0 +1,32 @@ +# Agent as transformation node +# Uses agent reasoning for complex transformations (natural language to SQL) +input: + redpanda: + seed_brokers: ["${REDPANDA_BROKERS}"] + topics: [nl-queries] + consumer_group: query-converter + tls: + enabled: true + sasl: + - mechanism: SCRAM-SHA-256 + username: "${REDPANDA_USERNAME}" + password: "${REDPANDA_PASSWORD}" + +pipeline: + # tag::processors[] + processors: + - a2a_message: + agent_card_url: "${AGENT_CARD_URL}" + prompt: "Convert to SQL: ${!this.natural_language_query}" + # end::processors[] + +output: + redpanda: + seed_brokers: ["${REDPANDA_BROKERS}"] + topic: sql-queries + tls: + enabled: true + sasl: + - mechanism: SCRAM-SHA-256 + username: "${REDPANDA_USERNAME}" + password: "${REDPANDA_PASSWORD}" diff --git a/modules/ai-agents/examples/pipelines/async-workflows.yaml b/modules/ai-agents/examples/pipelines/async-workflows.yaml new file mode 100644 index 000000000..38b0daf91 --- /dev/null +++ b/modules/ai-agents/examples/pipelines/async-workflows.yaml @@ -0,0 +1,32 @@ +# Asynchronous workflow pipeline +# Processes events in the background with acceptable latency +# tag::pipeline[] +input: + redpanda: + seed_brokers: ["${REDPANDA_BROKERS}"] + topics: [daily-reports] + consumer_group: report-analyzer + tls: + enabled: true + sasl: + - mechanism: SCRAM-SHA-256 + username: "${REDPANDA_USERNAME}" + password: "${REDPANDA_PASSWORD}" + +pipeline: + processors: + - a2a_message: + agent_card_url: "${AGENT_CARD_URL}" + prompt: "Summarize this report: ${!content()}" +# end::pipeline[] + +output: + redpanda: + seed_brokers: ["${REDPANDA_BROKERS}"] + topic: report-summaries + tls: + enabled: true + sasl: + - mechanism: SCRAM-SHA-256 + username: "${REDPANDA_USERNAME}" + password: "${REDPANDA_PASSWORD}" diff --git a/modules/ai-agents/examples/pipelines/dispute-pipeline.yaml b/modules/ai-agents/examples/pipelines/dispute-pipeline.yaml new file mode 100644 index 000000000..ef312a562 --- /dev/null +++ b/modules/ai-agents/examples/pipelines/dispute-pipeline.yaml @@ -0,0 +1,157 @@ +# Event-driven transaction dispute processing pipeline +# Automatically flags high-risk transactions and routes them to dispute agent + +input: + kafka: + addresses: ["${REDPANDA_BROKERS}"] + topics: ["bank.transactions"] + consumer_group: dispute-processor + tls: + enabled: true + sasl: + mechanism: SCRAM-SHA-256 + user: "${secrets.DISPUTE_PIPELINE_USERNAME}" + password: "${secrets.DISPUTE_PIPELINE_PASSWORD}" + +pipeline: + processors: + # Filter for high-value or suspicious transactions + - branch: + request_map: | + # Only process transactions above $500 or flagged by upstream systems + root = if this.amount > 500 || this.preliminary_flag == true { + this + } else { + deleted() + } + + processors: + # Calculate preliminary risk score based on transaction attributes + - mapping: | + # Preserve original transaction + root = this + + # Location risk: international transactions get higher score + let location_risk = if this.merchant.country != this.card.billing_country { 40 } else { 0 } + + # Amount risk: large amounts relative to account averages + let amount_risk = if this.amount > 1000 { 30 } else if this.amount > 500 { 15 } else { 0 } + + # Velocity risk: check for multiple recent transactions + let velocity_risk = if this.recent_transaction_count > 5 { 20 } else { 0 } + + # Category risk: luxury goods and high-risk categories + let category_risk = match this.merchant.mcc { + "5944" => 20, # Jewelry + "5094" => 25, # Precious stones + _ => 0 + } + + # Calculate total score + let total_score = $location_risk + $amount_risk + $velocity_risk + $category_risk + + root.preliminary_risk_score = $total_score + root.risk_level = if $total_score > 70 { + "high" + } else if $total_score > 40 { + "medium" + } else { + "low" + } + + # Route high and medium risk transactions to dispute agent for investigation + - branch: + request_map: | + # Only send to agent if risk is medium or higher + root = if this.preliminary_risk_score >= 40 { this } else { deleted() } + + processors: + # Invoke dispute resolution agent via A2A protocol + - a2a_message: + agent_card_url: "${secrets.DISPUTE_AGENT_CARD_URL}" + prompt: | + Investigate this potentially fraudulent transaction and respond with ONLY a JSON object (no additional text): + + Transaction ID: ${! this.transaction_id } + Customer ID: ${! this.customer_id } + Amount: $${! this.amount } ${! this.currency } + Merchant: ${! this.merchant.name } + Location: ${! this.merchant.city }, ${! this.merchant.country } + Date: ${! this.transaction_date } + Preliminary Risk Score: ${! this.preliminary_risk_score }/100 + Risk Level: ${! this.risk_level } + + Return ONLY this JSON format with no other text: + { + "recommendation": "block_and_investigate" | "hold_for_review" | "approve", + "fraud_score": , + "confidence": "high" | "medium" | "low", + "reasoning": "" + } + + # Map agent response back to transaction record + result_map: | + # By default, result_map preserves the original message that entered the branch + # Just add the agent investigation field + root.agent_investigation = if content().string().parse_json().catch(null) != null { + content().string().parse_json() + } else { + { + "recommendation": "manual_review_required", + "fraud_score": 50, + "confidence": "low", + "reasoning": "Agent returned unparseable response: " + content().string().slice(0, 100) + } + } + + # Merge risk scoring and agent results back to original transaction + result_map: | + root = content() + + # Enrich with final decision and tracing metadata + - mapping: | + # Preserve original transaction and all computed fields + root = this + + # Only set final_decision and alert_level if agent investigation occurred + root.final_decision = if this.agent_investigation.exists("recommendation") { + match { + this.agent_investigation.recommendation == "block_and_investigate" => "blocked", + this.agent_investigation.recommendation == "hold_for_review" => "pending_review", + this.agent_investigation.recommendation == "approve" => "approved", + _ => "manual_review_required" + } + } else { + "low_risk_no_investigation" + } + + root.alert_level = if this.agent_investigation.exists("fraud_score") { + match { + this.agent_investigation.fraud_score >= 80 => "critical", + this.agent_investigation.fraud_score >= 60 => "high", + this.agent_investigation.fraud_score >= 40 => "medium", + _ => "low" + } + } else { + "low" + } + + # Add execution metadata for tracing back to agent transcripts + root.pipeline_metadata = { + "processed_at": now().ts_format("2006-01-02T15:04:05.000Z"), + "transaction_id": this.transaction_id, + "customer_id": this.customer_id, + "agent_invoked": this.agent_investigation.exists("fraud_score") + } + +output: + kafka: + addresses: ["${REDPANDA_BROKERS}"] + topic: bank.dispute_results + key: "${! this.transaction_id }" + tls: + enabled: true + sasl: + mechanism: SCRAM-SHA-256 + user: "${secrets.DISPUTE_PIPELINE_USERNAME}" + password: "${secrets.DISPUTE_PIPELINE_PASSWORD}" diff --git a/modules/ai-agents/examples/pipelines/event-driven-invocation.yaml b/modules/ai-agents/examples/pipelines/event-driven-invocation.yaml new file mode 100644 index 000000000..677beb960 --- /dev/null +++ b/modules/ai-agents/examples/pipelines/event-driven-invocation.yaml @@ -0,0 +1,30 @@ +# Event-driven agent invocation pipeline +# Invokes an agent for each event in a stream +input: + redpanda: + seed_brokers: ["${REDPANDA_BROKERS}"] + topics: [transactions] + consumer_group: fraud-detector + tls: + enabled: true + sasl: + - mechanism: SCRAM-SHA-256 + username: "${REDPANDA_USERNAME}" + password: "${REDPANDA_PASSWORD}" + +pipeline: + processors: + - a2a_message: + agent_card_url: "${AGENT_CARD_URL}" + prompt: "Analyze this transaction: ${!content()}" + +output: + redpanda: + seed_brokers: ["${REDPANDA_BROKERS}"] + topic: fraud-alerts + tls: + enabled: true + sasl: + - mechanism: SCRAM-SHA-256 + username: "${REDPANDA_USERNAME}" + password: "${REDPANDA_PASSWORD}" diff --git a/modules/ai-agents/examples/pipelines/fraud-detection-routing.yaml b/modules/ai-agents/examples/pipelines/fraud-detection-routing.yaml new file mode 100644 index 000000000..ea5f3f982 --- /dev/null +++ b/modules/ai-agents/examples/pipelines/fraud-detection-routing.yaml @@ -0,0 +1,75 @@ +# Fraud detection pipeline with score-based routing +# Analyzes every transaction and routes to different topics based on fraud score +input: + redpanda: + seed_brokers: ["${REDPANDA_BROKERS}"] + topics: [transactions] + consumer_group: fraud-detector + tls: + enabled: true + sasl: + - mechanism: SCRAM-SHA-256 + username: "${REDPANDA_USERNAME}" + password: "${REDPANDA_PASSWORD}" + +pipeline: + processors: + - branch: + request_map: | + root.transaction_id = this.id + root.amount = this.amount + root.merchant = this.merchant + root.user_id = this.user_id + processors: + - a2a_message: + agent_card_url: "${AGENT_CARD_URL}" + prompt: | + Analyze this transaction for fraud: + Amount: ${! json("amount") } + Merchant: ${! json("merchant") } + User: ${! json("user_id") } + + Return JSON: { "fraud_score": 0-100, "reason": "explanation", "recommend_block": true/false } + result_map: | + root = this + root.fraud_analysis = content().parse_json().catch({}) + + - mapping: | + root = this + meta fraud_score = this.fraud_analysis.fraud_score + +output: + switch: + cases: + - check: 'meta("fraud_score") >= 80' + output: + redpanda: + seed_brokers: ["${REDPANDA_BROKERS}"] + topic: fraud-alerts-high + tls: + enabled: true + sasl: + - mechanism: SCRAM-SHA-256 + username: "${REDPANDA_USERNAME}" + password: "${REDPANDA_PASSWORD}" + - check: 'meta("fraud_score") >= 50' + output: + redpanda: + seed_brokers: ["${REDPANDA_BROKERS}"] + topic: fraud-alerts-medium + tls: + enabled: true + sasl: + - mechanism: SCRAM-SHA-256 + username: "${REDPANDA_USERNAME}" + password: "${REDPANDA_PASSWORD}" + - output: + redpanda: + seed_brokers: ["${REDPANDA_BROKERS}"] + topic: transactions-cleared + tls: + enabled: true + sasl: + - mechanism: SCRAM-SHA-256 + username: "${REDPANDA_USERNAME}" + password: "${REDPANDA_PASSWORD}" diff --git a/modules/ai-agents/examples/pipelines/fraud-detection-simple.yaml b/modules/ai-agents/examples/pipelines/fraud-detection-simple.yaml new file mode 100644 index 000000000..debb8baa6 --- /dev/null +++ b/modules/ai-agents/examples/pipelines/fraud-detection-simple.yaml @@ -0,0 +1,30 @@ +# Fraud detection pipeline that invokes an agent for every transaction +# Replace AGENT_CARD_URL with your actual agent card URL +input: + redpanda: + seed_brokers: ["${REDPANDA_BROKERS}"] + topics: [transactions] + consumer_group: fraud-detector + tls: + enabled: true + sasl: + - mechanism: SCRAM-SHA-256 + username: "${REDPANDA_USERNAME}" + password: "${REDPANDA_PASSWORD}" + +pipeline: + processors: + - a2a_message: + agent_card_url: "${AGENT_CARD_URL}" + prompt: "Analyze this transaction: ${!content()}" + +output: + redpanda: + seed_brokers: ["${REDPANDA_BROKERS}"] + topic: fraud-alerts + tls: + enabled: true + sasl: + - mechanism: SCRAM-SHA-256 + username: "${REDPANDA_USERNAME}" + password: "${REDPANDA_PASSWORD}" diff --git a/modules/ai-agents/examples/pipelines/multi-agent-orchestration.yaml b/modules/ai-agents/examples/pipelines/multi-agent-orchestration.yaml new file mode 100644 index 000000000..934d168ba --- /dev/null +++ b/modules/ai-agents/examples/pipelines/multi-agent-orchestration.yaml @@ -0,0 +1,35 @@ +# Multi-agent pipeline orchestration +# Chains multiple agents in sequence: translate -> analyze sentiment -> route +input: + redpanda: + seed_brokers: ["${REDPANDA_BROKERS}"] + topics: [international-feedback] + consumer_group: feedback-processor + tls: + enabled: true + sasl: + - mechanism: SCRAM-SHA-256 + username: "${REDPANDA_USERNAME}" + password: "${REDPANDA_PASSWORD}" + +pipeline: + # tag::processors[] + processors: + - a2a_message: + agent_card_url: "${TRANSLATOR_AGENT_URL}" + - a2a_message: + agent_card_url: "${SENTIMENT_AGENT_URL}" + - a2a_message: + agent_card_url: "${ROUTER_AGENT_URL}" + # end::processors[] + +output: + redpanda: + seed_brokers: ["${REDPANDA_BROKERS}"] + topic: processed-feedback + tls: + enabled: true + sasl: + - mechanism: SCRAM-SHA-256 + username: "${REDPANDA_USERNAME}" + password: "${REDPANDA_PASSWORD}" diff --git a/modules/ai-agents/examples/pipelines/streaming-enrichment.yaml b/modules/ai-agents/examples/pipelines/streaming-enrichment.yaml new file mode 100644 index 000000000..3f544d50d --- /dev/null +++ b/modules/ai-agents/examples/pipelines/streaming-enrichment.yaml @@ -0,0 +1,35 @@ +# Streaming data enrichment pipeline +# Adds AI-generated metadata (sentiment) to events +input: + redpanda: + seed_brokers: ["${REDPANDA_BROKERS}"] + topics: [customer-feedback] + consumer_group: sentiment-enricher + tls: + enabled: true + sasl: + - mechanism: SCRAM-SHA-256 + username: "${REDPANDA_USERNAME}" + password: "${REDPANDA_PASSWORD}" + +pipeline: + # tag::processors[] + processors: + - branch: + request_map: 'root = this.text' + processors: + - a2a_message: + agent_card_url: "${AGENT_CARD_URL}" + result_map: 'root.sentiment = content()' + # end::processors[] + +output: + redpanda: + seed_brokers: ["${REDPANDA_BROKERS}"] + topic: enriched-feedback + tls: + enabled: true + sasl: + - mechanism: SCRAM-SHA-256 + username: "${REDPANDA_USERNAME}" + password: "${REDPANDA_PASSWORD}" diff --git a/modules/ai-agents/examples/pipelines/test-pipelines.sh b/modules/ai-agents/examples/pipelines/test-pipelines.sh new file mode 100755 index 000000000..b8bfd49d5 --- /dev/null +++ b/modules/ai-agents/examples/pipelines/test-pipelines.sh @@ -0,0 +1,114 @@ +#!/usr/bin/env bash +# +# Test script for Redpanda Cloud pipeline examples +# +# This script uses rpk connect lint to validate pipeline configurations. +# Cloud-specific processors (like a2a_message) are not available in the local +# CLI, so those errors are expected and noted. +# +# Usage: +# ./test-pipelines.sh +# +# Exit codes: +# 0 - All files have valid YAML structure (Cloud processor errors are expected) +# 1 - YAML syntax errors or unexpected failures + +set -euo pipefail + +# Colors for output +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +BLUE='\033[0;34m' +CYAN='\033[0;36m' +NC='\033[0m' + +# Get script directory +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +cd "$SCRIPT_DIR" + +# Counters +TOTAL=0 +PASSED=0 +CLOUD_PROCESSOR_ERRORS=0 +FAILED=0 + +echo "🧪 Redpanda Cloud Pipeline Examples - Test Suite" +echo "=================================================" +echo "" + +# Check for rpk +if ! command -v rpk &> /dev/null; then + echo -e "${RED}Error: rpk is required${NC}" + echo "Install rpk: https://docs.redpanda.com/current/get-started/rpk-install/" + exit 1 +fi + +echo -e "${CYAN}Using:${NC} $(rpk version 2>/dev/null | head -1 || echo 'rpk')" +echo "" + +# ============================================================================ +# Lint each pipeline file +# ============================================================================ + +echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" +echo -e "📦 ${CYAN}Pipeline Linting${NC}" +echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" +echo "" + +for file in *.yaml; do + if [[ -f "$file" ]]; then + TOTAL=$((TOTAL + 1)) + echo -n -e " ${BLUE}$file${NC}... " + + # Run rpk connect lint + output=$(rpk connect lint --skip-env-var-check "$file" 2>&1) || true + + if [[ -z "$output" ]]; then + # No output means success + echo -e "${GREEN}PASSED${NC}" + PASSED=$((PASSED + 1)) + elif echo "$output" | grep -q "a2a_message\|unable to infer.*a2a"; then + # Cloud-specific processor error (expected) + echo -e "${YELLOW}OK${NC} (Cloud processor - requires Redpanda Cloud)" + CLOUD_PROCESSOR_ERRORS=$((CLOUD_PROCESSOR_ERRORS + 1)) + elif echo "$output" | grep -qi "yaml\|parse\|syntax"; then + # YAML syntax error (unexpected) + echo -e "${RED}FAILED${NC}" + echo "$output" | sed 's/^/ /' + FAILED=$((FAILED + 1)) + else + # Other lint error (unexpected) + echo -e "${RED}FAILED${NC}" + echo "$output" | sed 's/^/ /' + FAILED=$((FAILED + 1)) + fi + fi +done + +# ============================================================================ +# Summary +# ============================================================================ + +echo "" +echo "=================================================" +echo "📊 Test Summary" +echo "=================================================" +echo -e "Total files: $TOTAL" +echo -e "Fully passed: $PASSED" +echo -e "Cloud processors: $CLOUD_PROCESSOR_ERRORS (expected - requires Cloud)" +echo -e "Failed: $FAILED" +echo "──────────────────────────────────────────────────" + +if [[ $FAILED -gt 0 ]]; then + echo -e "${RED}❌ $FAILED file(s) have YAML errors${NC}" + exit 1 +else + echo -e "${GREEN}✅ All files valid${NC}" + if [[ $CLOUD_PROCESSOR_ERRORS -gt 0 ]]; then + echo "" + echo -e "${YELLOW}Note: $CLOUD_PROCESSOR_ERRORS file(s) use Cloud-specific processors (a2a_message)${NC}" + echo -e "${YELLOW}These require deployment to Redpanda Cloud for full validation.${NC}" + fi + exit 0 +fi diff --git a/modules/ai-agents/examples/testing.adoc b/modules/ai-agents/examples/testing.adoc deleted file mode 100644 index 96d8525ab..000000000 --- a/modules/ai-agents/examples/testing.adoc +++ /dev/null @@ -1,290 +0,0 @@ -= Test MCP Examples -:description: Automated testing strategies for Redpanda Cloud MCP server examples. - -This document describes the automated testing strategies for Redpanda Cloud MCP server examples. - -All MCP examples are automatically tested to ensure: - -. YAML syntax and structure are correct -. MCP metadata is complete and valid -. Component schemas match Redpanda Connect specifications -. Secrets syntax uses Cloud Secrets Store format (`${secrets.X}`) - -== Testing approaches - -=== Configuration linting - -Validate MCP tool configurations using `rpk connect lint`: - -[,bash] ----- -# Lint a single MCP tool -rpk connect lint weather_service.yaml - -# Lint all examples -rpk connect lint *.yaml - -# Lint with environment variable checking skipped (recommended for MCP) -rpk connect lint --skip-env-var-check *.yaml ----- - -This checks for common issues such as: - -* YAML syntax errors -* Unknown component types -* Invalid field names -* Type mismatches -* Missing required fields - -=== MCP metadata validation - -The test script validates MCP-specific metadata for all tool examples: - -[,bash] ----- -# Run all tests (includes linting + MCP validation) -./test-mcp-examples.sh - -# Test specific files -./test-mcp-examples.sh weather_*.yaml ----- - -MCP metadata validation checks: - -* Presence of `meta.mcp` section -* `enabled: true` is set -* `description` field exists and is non-empty -* `properties` are properly structured (if present) - -=== Unit testing limitations - -MCP tool examples are standalone component definitions (`label:`, `processors:`, `meta:`), not full pipelines with `input:`, `pipeline:`, `output:` sections. This means they cannot use inline `tests:` sections like cookbook examples do. - -The `rpk connect test` command requires full pipeline structure with paths like `/pipeline/processors/0`, which don't exist in MCP tool definitions. - -For testing MCP tools: - -- Ensure syntax and schema correctness. -- Verify MCP metadata has proper description and properties. -- Perform manual testing using the Cloud Console MCP Server interface to test tools end-to-end. - -== MCP tool structure - -MCP tools are structured as standalone components: - -[,yaml] ----- -label: weather-service -processors: - - label: fetch_weather_data - http: - url: 'https://wttr.in/${! @city }?format=j1' - verb: GET - - - label: format_response - mutation: | - root = { - "city": @city, - "temperature": this.current_condition.0.temp_C.number() - } - -meta: - mcp: - enabled: true - description: "Get current weather conditions for any city worldwide" - properties: - - name: city - type: string - description: "Name of the city" - required: true ----- - -== Test script usage - -The `test-mcp-examples.sh` script provides automated validation: - -[,bash] ----- -# Test all examples -./test-mcp-examples.sh - -# Test specific files -./test-mcp-examples.sh weather_*.yaml -./test-mcp-examples.sh customer_*.yaml ----- - -The script provides color-coded output: - -[,console] ----- -🧪 Redpanda Connect MCP Examples Test Suite (Cloud) -==================================================== - -📄 Testing: weather_service.yaml - Linting weather_service.yaml... PASSED - Validating MCP metadata... PASSED - -==================================================== -📊 Test Summary -==================================================== -Total configs tested: 10 -Passed: 10 -Failed: 0 - -✅ All tests passed! ----- - -== Manual end-to-end testing - -For comprehensive validation, test MCP tools using the Cloud Console: - -. Navigate to your Cloud cluster's MCP Server configuration -. Add or update your MCP tool configuration -. Use the Cloud Console's MCP Inspector to locate your tool -. Verify the tool executes correctly and returns expected results - -This validates: - -* Tool loads correctly in the MCP server -* Tool executes with provided parameters -* Responses are formatted correctly -* Secrets are properly resolved from Cloud Secrets Store - -== GitHub Actions CI/CD - -Automated tests run on every push and pull request using GitHub Actions. - -The workflow tests all examples whenever: - -* Any `.yaml` file in `modules/ai-agents/examples/` changes -* The test script itself is modified - -See `.github/workflows/test-mcp-examples.yaml` for the complete workflow. - -== Best practices - -=== Use descriptive tool names - -[,yaml] ----- -# Good -label: fetch-customer-orders - -# Bad -label: tool1 ----- - -=== Write clear MCP descriptions - -[,yaml] ----- -# Good -meta: - mcp: - description: "Fetch a customer's order history and calculate spending metrics over the last 30 days" - -# Bad -meta: - mcp: - description: "Get orders" ----- - -=== Document all properties - -[,yaml] ----- -# Good -properties: - - name: customer_id - type: string - description: "Unique identifier for the customer" - required: true - - name: days - type: number - description: "Number of days to look back (default: 30)" - required: false - -# Bad -properties: - - name: id - type: string - required: true ----- - -=== Use Cloud Secrets Store for sensitive data - -[,yaml] ----- -# Cloud format - uses Secrets Store -sql_select: - driver: "postgres" - dsn: "${secrets.POSTGRES_DSN}" - table: "customers" ----- - -=== Tag your examples - -[,yaml] ----- -meta: - tags: [ example, weather, api ] # Helps organize and filter - mcp: - enabled: true ----- - -== Adding new examples - -When adding new MCP tool examples: - -. **Create your YAML file** in `modules/ai-agents/examples/`: -+ -[,bash] ----- -cd modules/ai-agents/examples -touch my-new-tool.yaml ----- - -. **Include complete MCP metadata:** -+ -[,yaml] ----- -label: my-new-tool -processors: - # Your processor configuration - -meta: - mcp: - enabled: true - description: "Clear, task-oriented description" - properties: - - name: param_name - type: string - description: "Parameter purpose and constraints" - required: true ----- - -. **Lint your example:** -+ -[,bash] ----- -rpk connect lint --skip-env-var-check my-new-tool.yaml ----- - -. **Run automated tests:** -+ -[,bash] ----- -./test-mcp-examples.sh my-new-tool.yaml ----- - -. **Test in Cloud Console (recommended):** -+ -Deploy your MCP server configuration and test the tool through the Cloud Console AI interface. - -. **Commit your example:** -+ -[,bash] ----- -git add modules/ai-agents/examples/my-new-tool.yaml -git commit -m "Add my-new-tool MCP example" ----- diff --git a/modules/ai-agents/pages/agents/a2a-concepts.adoc b/modules/ai-agents/pages/agents/a2a-concepts.adoc new file mode 100644 index 000000000..675142eb0 --- /dev/null +++ b/modules/ai-agents/pages/agents/a2a-concepts.adoc @@ -0,0 +1,121 @@ += A2A Protocol +:description: Learn how the A2A protocol enables agent discovery and communication. +:page-topic-type: concepts +:personas: agent_developer, app_developer, streaming_developer +:learning-objective-1: Describe the A2A protocol and its role in agent communication +:learning-objective-2: Explain how agent cards enable discovery +:learning-objective-3: Identify how authentication secures agent communication + +The Agent-to-Agent (A2A) protocol is an open standard for agent communication and discovery. Redpanda Cloud uses A2A for both external integration and internal pipeline-to-agent communication. + +After reading this page, you will be able to: + +* [ ] {learning-objective-1} +* [ ] {learning-objective-2} +* [ ] {learning-objective-3} + +== What is the A2A protocol? + +The Agent-to-Agent (A2A) protocol is an open standard that defines how agents discover, communicate with, and invoke each other. + +Agents that implement A2A expose their capabilities through a standardized agent card. This allows other systems to interact with them without prior knowledge of their implementation. + +The protocol provides: + +* Standardized discovery: Agent cards describe capabilities in a machine-readable format. +* Platform independence: Any system can call any A2A-compliant agent. +* Version negotiation: Protocol versions ensure compatibility between agents. +* Communication mode flexibility: Supports synchronous request/response and streaming. + +For the complete specification, see link:https://a2a.ag/spec[a2a.ag/spec^]. + +== Agent cards + +Every A2A-compliant agent exposes an agent card at a well-known URL. + +The agent card is a JSON document that describes what the agent can do and how to interact with it. For the complete agent card specification, see link:https://agent2agent.info/docs/concepts/agentcard/[Agent Card documentation^]. + +[#agent-card-location] +=== Agent card location + +Redpanda Cloud agents expose their agent cards at the `/.well-known/agent-card.json` subpath of the agent URL. You can find the agent URL on the agent overview page in the Redpanda Cloud Console under *Agentic AI* > *AI Agents*. + +For example, if your agent URL is `\https://my-agent.ai-agents.abc123.cloud.redpanda.com`, your agent card URL is `\https://my-agent.ai-agents.abc123.cloud.redpanda.com/.well-known/agent-card.json`. + +The `.well-known` path follows internet standards for service discovery, making agents discoverable without configuration. + +To configure the agent card, see xref:ai-agents:agents/create-agent.adoc#configure-a2a-discovery-metadata-optional[Configure A2A discovery metadata]. + +== Where A2A is used in Redpanda Cloud + +Redpanda Cloud uses the A2A protocol in two contexts: + +=== External integration + +External applications and agents hosted outside Redpanda Cloud use A2A to call Redpanda Cloud agents. This includes backend services, CLI tools, custom UIs, and agents hosted on other platforms. + +For integration pattern guidance, see xref:ai-agents:agents/integration-overview.adoc[]. + +=== Internal pipeline-to-agent integration + +Redpanda Connect pipelines use the xref:develop:connect/components/processors/a2a_message.adoc[`a2a_message`] processor to invoke agents for each event in a stream. This enables real-time interaction between streaming data and AI agents, enabling use cases like: + +* Real-time fraud detection on every transaction. +* Streaming data enrichment with AI-generated fields. +* Event-driven agent invocation for automated processing. + +The `a2a_message` processor uses the A2A protocol internally to discover and call agents. For pipeline patterns, see xref:ai-agents:agents/pipeline-integration-patterns.adoc[]. + +== How agents discover each other + +A2A enables dynamic discovery without hardcoded configuration: + +. The caller fetches the agent card from the well-known URL. +. The caller checks the protocol version and supported communication modes. +. The caller uses the input schema from the agent card to format the request properly. +. The caller sends the request to the agent's endpoint. + +This discovery model allows: + +* New agents to become available immediately once deployed +* Existing agents to update their capabilities while callers adapt dynamically +* Callers to understand exactly what agents do through self-describing agent cards + +== Authentication + +A2A-compliant agents require authentication to prevent unauthorized access. + +Redpanda Cloud agents use OAuth2 client credentials flow. When you create an agent, the system provisions a service account with a client ID and secret. + +External callers use these credentials to obtain access tokens: + +. Agent creation automatically provisions a service account with credentials. +. Applications exchange the client ID and secret for a time-limited access token via OAuth2. +. Applications include the access token in the Authorization header when calling the agent endpoint. +. When tokens expire, applications exchange credentials again for a new token. + +This flow ensures: + +* Credentials stay secure: Applications never send them directly to agents, only access tokens. +* Exposure is limited: Tokens expire, reducing the window for compromised credentials. +* Integration is standard: Applications can use existing OAuth2 libraries. + +=== External integration + +External applications must authenticate using the service account credentials. Each agent has its own service account. + +For step-by-step authentication instructions, see xref:security:cloud-authentication.adoc[]. + +=== Internal integration + +The `a2a_message` processor handles authentication automatically. Pipelines don't need to manage credentials explicitly because they run within the Redpanda Cloud cluster with appropriate permissions. + +== Protocol versions + +The A2A protocol uses semantic versioning (major.minor.patch). Agents declare their supported version in the agent card. + +== Next steps + +* xref:ai-agents:agents/integration-overview.adoc[] +* xref:ai-agents:agents/create-agent.adoc[] +* link:https://a2a.ag/spec[A2A Protocol Specification^] diff --git a/modules/ai-agents/pages/agents/architecture-patterns.adoc b/modules/ai-agents/pages/agents/architecture-patterns.adoc new file mode 100644 index 000000000..aeed99dde --- /dev/null +++ b/modules/ai-agents/pages/agents/architecture-patterns.adoc @@ -0,0 +1,229 @@ += Agent Architecture Patterns +:description: Design maintainable agent systems with single-agent and multi-agent patterns based on domain complexity. +:page-topic-type: best-practices +:personas: agent_developer, streaming_developer +:learning-objective-1: Evaluate single-agent versus multi-agent architectures for your use case +:learning-objective-2: Choose appropriate LLM models based on task requirements +:learning-objective-3: Apply agent boundary design principles for maintainability + +Design agent systems that are maintainable, discoverable, and reliable by choosing the right architecture pattern and applying clear boundary principles. + +After reading this page, you will be able to: + +* [ ] {learning-objective-1} +* [ ] {learning-objective-2} +* [ ] {learning-objective-3} + +== Why architecture matters + +Agent architecture determines how you manage complexity as your system grows. The right pattern depends on your domain complexity, organizational structure, and how you expect requirements to evolve. + +Starting with a simple architecture is tempting, but can lead to unmaintainable systems as complexity increases. Planning for growth with clear boundaries prevents technical debt and costly refactoring later. + +Warning signs include system prompts exceeding 2000 words, too many tools for the LLM to select correctly, multiple teams modifying the same agent, and changes in one domain breaking others. These symptoms indicate you need architectural boundaries, not just better prompts. + +Match agent architecture to domain structure: + +[cols="2,3,3"] +|=== +| Domain Characteristics | Architecture Fit | Reasoning + +| Single business area, stable requirements +| Single agent +| Simplicity outweighs flexibility needs + +| Multiple business areas, shared infrastructure +| Root agent with internal subagents +| Domain separation without deployment complexity + +| Cross-organization workflows, independent evolution +| External agent-to-agent +| Organizational boundaries require system boundaries +|=== + + +Every architecture pattern involves trade-offs. + +- *Latency versus isolation:* Internal subagents have lower latency because they avoid network calls, but they share a failure domain. External agents have higher latency due to network overhead, but they provide independent failure isolation. + +- *Shared state versus independence:* Single deployments share model, budget, and policies but offer less flexibility. Multiple deployments allow independent scaling and updates but add coordination complexity. + +- *Complexity now versus complexity later:* Starting simple means faster initial development but may require refactoring. Starting structured requires more upfront work but makes the system easier to extend. + +For foundational concepts on how agents execute and manage complexity, see xref:ai-agents:agents/concepts.adoc[]. + +== Single-agent pattern + +A single-agent architecture uses one agent with one system prompt and one tool set to handle all requests. + +This pattern works best for narrow domains with limited scope, single data sources, and tasks that don't require specialized subsystems. + +=== When to use single agents + +Use single agents for focused problems that won't expand significantly. + +Examples include order lookup agents that retrieve history from a single topic, weather agents that query APIs and return formatted data, and inventory checkers that report stock levels. + +=== Trade-offs + +Single agents are simpler to build and maintain. You have one system prompt, one tool set, and one deployment. + +However, all capabilities must coexist in one agent. Adding features increases complexity rapidly, making single agents difficult to scale to multi-domain problems. + +== Root agent with subagents pattern + +A multi-agent architecture uses a root agent that delegates to specialized internal subagents. + +This pattern works for complex domains spanning multiple areas, multiple data sources with different access patterns, and tasks requiring specialized expertise within one deployment. + +NOTE: Subagents in Redpanda Cloud are internal specialists within a single agent. They share the parent agent's model, budget, and policies, but each can have different names, descriptions, system prompts, and MCP tools. + +=== How it works + +The root agent interprets user requests and routes them to appropriate subagents. + +Each subagent owns a specific business area with focused expertise. Subagents access only the MCP tools they need. + +All subagents share the same LLM model and budget from the parent agent. + +=== Example: E-commerce platform + +A typical e-commerce agent includes a root agent that interprets requests and delegates to specialists, an order subagent for processing, history, and status updates, an inventory subagent for stock checks and warehouse operations, and a customer subagent for profiles, preferences, and history. All subagents share the same model but have different system prompts and tool access. + +=== Why choose internal subagents + +Internal subagents provide domain isolation, allowing you to update the order subagent without affecting inventory. Debugging is easier because each subagent has narrow scope and fewer potential failure points. All subagents share resources, reducing complexity and cost compared to separate deployments. Use internal subagents when you need domain separation within a single agent deployment. + +== External agent-to-agent pattern + +External A2A integration connects agents across organizational boundaries, platforms, or independent systems. + +NOTE: Cross-agent calling between separate Redpanda Cloud agents is not supported. This pattern applies to connecting Redpanda Cloud agents with external agents you host elsewhere. + +=== When to use external A2A + +Use external A2A for multi-organization workflows that coordinate agents across company boundaries, for platform integration connecting Redpanda Cloud agents with agents hosted elsewhere, and when agents require different deployment environments such as GPU clusters, air-gapped networks, or regional constraints. + +=== How it works + +Agents communicate using the xref:ai-agents:agents/a2a-concepts.adoc[A2A protocol], a standard HTTP-based protocol for discovery and invocation. Each agent manages its own credentials and access control independently, and can deploy, scale, and update without coordinating with other agents. Agent cards define capabilities without exposing implementation details. + +=== Example: Multi-platform customer service + +A customer service workflow might span multiple platforms: + +* Redpanda Cloud agent accesses real-time order and inventory data +* CRM agent hosted elsewhere manages customer profiles and support tickets +* Payment agent from a third party handles transactions in a secure environment + +Each agent runs on its optimal infrastructure while coordinating through A2A. + +=== Why choose external A2A + +External A2A lets different teams own and deploy their agents independently, with each agent choosing its own LLM, tools, and infrastructure. Sensitive operations stay in controlled environments with security isolation, and you can add agents incrementally without rewriting existing systems. + +=== Trade-offs + +External A2A adds network latency on every cross-agent call, and authentication complexity multiplies with each agent requiring credential management. Removing capabilities or changing contracts requires coordination across consuming systems, and debugging requires tracing requests across organizational boundaries. + +For implementation details on external A2A integration, see xref:ai-agents:agents/integration-overview.adoc[]. + +== Common anti-patterns + +Avoid these architecture mistakes that lead to unmaintainable agent systems. + +=== The monolithic prompt + +A monolithic prompt is a single 3000+ word system prompt covering multiple domains. + +This pattern fails because LLM confusion increases with prompt length, multiple teams modify the same prompt creating conflicts and unclear ownership, and changes to one domain risk breaking others. + +Split into domain-specific subagents instead. Each subagent gets a focused prompt under 500 words. + +=== The tool explosion + +A tool explosion occurs when a single agent has 30+ tools from every MCP server in the cluster. + +This pattern fails because the LLM struggles to choose correctly from large tool sets, tool descriptions compete for limited prompt space, and the agent invokes wrong tools with similar names, wasting iteration budget on selection mistakes. + +Limit tools per agent. Use subagents to partition tools by domain. For tool design patterns, see xref:ai-agents:mcp/remote/tool-patterns.adoc[]. + +=== Premature A2A splitting + +Premature splitting creates three separate A2A agents when all logic could fit in one agent with internal subagents. + +This pattern fails because network latency affects every cross-agent call, authentication complexity multiplies with three sets of credentials, debugging requires correlating logs across systems, and you manage three deployments instead of one. + +Start with internal subagents for domain separation. Split to external A2A only when you need organizational boundaries or different infrastructure. + +=== Unbounded tool chaining + +Unbounded chaining sets max iterations to 100, returns hundreds of items from tools, and places no constraints on tool call frequency. + +This pattern fails because the context window fills with tool results, requests time out before completion, costs spiral with many iterations multiplied by large context, and the agent loses track of the original goal. + +Design workflows to complete in 20-30 iterations. Return paginated results from tools. Add prompt constraints like "Never call the same tool more than 3 times per request." + +== Model selection guide + +Choose models based on task complexity, latency requirements, and cost constraints. The Redpanda Cloud Console displays available models with descriptions when creating agents. + +=== Match models to task complexity + +For simple queries, choose cost-effective models such as GPT-5 Mini. + +For balanced workloads, choose mid-tier models such as Claude Sonnet 4.5 or GPT-5.2. + +For complex reasoning, choose premium models such as Claude Opus 4.5 or GPT-5.2. + +=== Balance latency and model size + +For real-time responses, choose smaller models. Use models optimized for speed, such as Mini or base tiers. + +For batch processing, optimize for accuracy over speed. Use larger models when users don't wait for results. + +=== Optimize for cost and volume + +For high volume, use cost-effective models. Smaller tiers reduce costs while maintaining acceptable quality. + +For critical accuracy, use premium models. Higher costs are justified when errors are costly. + +=== Model provider documentation + +For complete model specifications, capabilities, and pricing: + +* link:https://platform.openai.com/docs/models[OpenAI Models^] +* link:https://docs.anthropic.com/claude/docs/models-overview[Anthropic Claude Models^] +* link:https://ai.google.dev/gemini-api/docs/models[Google Gemini Models^] + +== Design principles + +Follow these principles to create maintainable agent systems. + +=== Explicit agent boundaries + +Each agent should have clear scope and responsibilities. Define scope explicitly in the system prompt, assign a specific tool set for the agent's domain, and specify well-defined inputs and outputs. + +Do not create agents with overlapping responsibilities. Overlapping domains create confusion about which agent handles which requests. + +=== Tool scoping per agent + +Assign tools to the agent that needs them. Don't give all agents access to all tools. Limit tool access based on agent purpose. + +Tool scoping reduces misuse risk and makes debugging easier. + +=== Error handling and fallbacks + +Design agents to handle failures gracefully. + +Use retry logic for transient failures like network timeouts. Report permanent failures like invalid parameters immediately. + +Provide clear error messages to users. Log errors for debugging. + +== Next steps + +* xref:ai-agents:agents/integration-overview.adoc[] +* xref:ai-agents:agents/a2a-concepts.adoc[] +* xref:ai-agents:mcp/remote/tool-patterns.adoc[] +* xref:ai-agents:agents/overview.adoc[] +* xref:ai-agents:mcp/remote/best-practices.adoc[] diff --git a/modules/ai-agents/pages/agents/build-index.adoc b/modules/ai-agents/pages/agents/build-index.adoc new file mode 100644 index 000000000..f3679805d --- /dev/null +++ b/modules/ai-agents/pages/agents/build-index.adoc @@ -0,0 +1,5 @@ += Build Agents +:page-layout: index +:description: Create production AI agents with effective prompts and scalable architecture. + +Create agents, write effective prompts, and design scalable agent systems. diff --git a/modules/ai-agents/pages/agents/concepts.adoc b/modules/ai-agents/pages/agents/concepts.adoc new file mode 100644 index 000000000..0ddb068f9 --- /dev/null +++ b/modules/ai-agents/pages/agents/concepts.adoc @@ -0,0 +1,148 @@ += Agent Concepts +:description: Understand how agents execute, manage context, invoke tools, and handle errors. +:page-topic-type: concepts +:personas: agent_developer, streaming_developer, data_engineer +:learning-objective-1: Explain how agents execute reasoning loops and make tool invocation decisions +:learning-objective-2: Describe how agents manage context and state across interactions +:learning-objective-3: Identify error handling strategies for agent failures + +Agents execute through a reasoning loop where the LLM analyzes context, decides which tools to invoke, processes results, and repeats until the task completes. Understanding this execution model helps you design reliable agent systems. + +After reading this page, you will be able to: + +* [ ] {learning-objective-1} +* [ ] {learning-objective-2} +* [ ] {learning-objective-3} + +== Agent execution model + +Every agent request follows a reasoning loop. The agent doesn't execute all tool calls at once. Instead, it makes decisions iteratively. + +=== The reasoning loop + +When an agent receives a request: + +. The LLM receives the context, including system prompt, conversation history, user request, and previous tool results. +. The LLM chooses to invoke a tool, requests more information, or responds to user. +. The tool runs and returns results if invoked. +. The tool's results are added to conversation history. +. The LLM reasons again with an expanded context. + +The loop continues until one of these conditions is met: + +* Agent completes the task and responds to the user +* Agent reaches max iterations limit +* Agent encounters an unrecoverable error + +=== Why iterations matter + +Each iteration includes three phases: + +. **LLM reasoning**: The model processes the growing context to decide the next action. +. **Tool invocation**: If the agent decides to call a tool, execution happens and waits for results. +. **Context expansion**: Tool results are added to the conversation history for the next iteration. + +With higher iteration limits, agents can complete complex tasks but costs more and takes longer. + +With lower iteration limits, agents respond faster and cheaper but may fail on complex requests. + +==== Cost calculation + +Calculate the approximate cost per request by estimating average context tokens per iteration: + +---- +Cost per request = (iterations x context tokens x model price per token) +---- + +Example with 30 iterations at $0.000002 per token: + +---- +Iteration 1: 500 tokens x $0.000002 = $0.001 +Iteration 15: 2000 tokens x $0.000002 = $0.004 +Iteration 30: 4000 tokens x $0.000002 = $0.008 + +Total: ~$0.013 per request +---- + +Actual costs vary based on: + +* Tool result sizes (large results increase context) +* Model pricing (varies by provider and model tier) +* Task complexity (determines iteration count) + +Setting max iterations creates a cost/capability trade-off: + +[cols="1,1,2,1", options="header"] +|=== +|Limit |Range |Use Case |Cost + +|Low +|10-20 +|Simple queries, single tool calls +|Cost-effective + +|Medium +|30-50 +|Multi-step workflows, tool chaining +|Balanced + +|High +|50-100 +|Complex analysis, exploratory tasks +|Higher +|=== + +Iteration limits prevent runaway costs when agents encounter complex or ambiguous requests. + +== MCP tool invocation patterns + +MCP tools extend agent capabilities beyond text generation. Understanding when and how tools execute helps you design effective tool sets. + +=== Synchronous tool execution + +In Redpanda Cloud, tool calls block the agent. When the agent decides to invoke a tool, it pauses and waits while the tool executes (querying a database, calling an API, or processing data). When the tool returns its result, the agent resumes reasoning. + +This synchronous model means latency adds up across multiple tool calls, the agent sees tool results sequentially rather than in parallel, and long-running tools can delay or fail agent requests due to timeouts. + +=== Tool selection decisions + +The LLM decides which tool to invoke based on system prompt guidance (such as "Use get_orders when customer asks about history"), tool descriptions from the MCP schema that define parameters and purpose, and conversation context where previous tool results influence the next tool choice. Agents can invoke the same tool multiple times with different parameters if the task requires it. + +=== Tool chaining + +Agents chain tools when one tool's output feeds another tool's input. For example, an agent might first call `get_customer_info(customer_id)` to retrieve details, then use that data to call `get_order_history(customer_email)`. + +Tool chaining requires sufficient max iterations because each step in the chain consumes one iteration. + +=== Tool granularity considerations + +Tool design affects agent behavior. Coarse-grained tools that do many things result in fewer tool calls but less flexibility and more complex implementation. Fine-grained tools that each do one thing require more tool calls but offer higher composability and simpler implementation. + +Choose granularity based on how often you'll reuse tool logic across workflows, whether intermediate results help with debugging, and how much control you want over tool invocation order. + +For tool design guidance, see xref:ai-agents:mcp/remote/best-practices.adoc[]. + +== Context and state management + +Agents handle two types of information: conversation context (what's been discussed) and state (persistent data across sessions). + +=== Conversation context + +The agent's context includes the system prompt (always present), user messages, agent responses, tool invocation requests, and tool results. + +As the conversation progresses, context grows. Each tool result adds tokens to the context window, which the LLM uses for reasoning in subsequent iterations. + +=== Context window limits + +LLM context windows limit how much history fits. Small models support 8K-32K tokens, medium models support 32K-128K tokens, and large models support 128K-1M+ tokens. + +When context exceeds the limit, the oldest tool results get truncated, the agent loses access to early conversation details, and may ask for information it already retrieved. + +Design workflows to complete within context limits. Avoid unbounded tool chaining. + +== Next steps + +* xref:ai-agents:agents/architecture-patterns.adoc[] +* xref:ai-agents:agents/quickstart.adoc[] +* xref:ai-agents:agents/prompt-best-practices.adoc[] +* xref:ai-agents:mcp/remote/best-practices.adoc[] diff --git a/modules/ai-agents/pages/agents/create-agent.adoc b/modules/ai-agents/pages/agents/create-agent.adoc new file mode 100644 index 000000000..60b95ecf9 --- /dev/null +++ b/modules/ai-agents/pages/agents/create-agent.adoc @@ -0,0 +1,279 @@ += Create an Agent +:description: Configure agents with model selection, system prompts, tool connections, and execution parameters. +:page-topic-type: how-to +:personas: agent_developer, app_developer, streaming_developer +:learning-objective-1: Configure an agent with model selection and system prompt +:learning-objective-2: Connect MCP servers and select tools for your agent +:learning-objective-3: Set agent execution parameters including max iterations + +Create a new AI agent through the Redpanda Cloud Console. This guide walks you through configuring the agent's model, system prompt, tools, and execution settings. + +After reading this page, you will be able to: + +* [ ] {learning-objective-1} +* [ ] {learning-objective-2} +* [ ] {learning-objective-3} + +== Prerequisites + +* A xref:get-started:cluster-types/byoc/index.adoc[BYOC cluster] with Remote MCP enabled. +* xref:ai-agents:ai-gateway/gateway-quickstart.adoc[AI Gateway configured] with at least one LLM provider enabled. +* At least one xref:ai-agents:mcp/remote/overview.adoc[Remote MCP server] deployed with tools. +* System prompt prepared (see xref:ai-agents:agents/prompt-best-practices.adoc[System Prompt Best Practices]). + +== Access the agents UI + +. Log in to the link:https://cloud.redpanda.com[Redpanda Cloud Console^]. +. Navigate to your cluster. +. Click *Agentic AI* > *AI Agents* in the left navigation. + +== Configure basic settings + +. Click *Create Agent*. +. Enter a display name (3-128 characters, alphanumeric with spaces, hyphens, underscores, or slashes). +. Optionally, add a description (maximum 256 characters). +. Select a resource tier based on your workload characteristics: ++ +Resource tiers control CPU and memory allocated to your agent. Choose based on: ++ +* **Concurrency:** How many simultaneous requests the agent handles. +* **Tool memory:** Whether tools process large datasets in memory. +* **Response time:** How quickly the agent needs to respond. ++ +Available tiers: ++ +* XSmall: 100m CPU, 400M RAM (single-user testing, simple queries) +* Small: 200m CPU, 800M RAM (light workloads, few concurrent users) +* Medium: 300m CPU, 1200M RAM (recommended for most production use cases) +* Large: 400m CPU, 1600M RAM (high concurrency or memory-intensive tools) +* XLarge: 500m CPU, 2G RAM (very high concurrency or large data processing) ++ +Start with Medium for production workloads. Monitor CPU and memory usage, then adjust if you see resource constraints. + +. Optionally, add tags (maximum 16 tags) for organization and filtering: ++ +* Keys: Maximum 64 characters, must be unique +* Values: Maximum 256 characters, allowed characters: letters, numbers, spaces, and `_.:/=+-@` + +== Choose a model + +Agents use large language models (LLMs) to interpret user intent and decide which tools to invoke. + +. Select your AI Gateway: ++ +Choose the gateway that contains your configured LLM providers and API keys. If you have multiple gateways, select the appropriate one for this agent's workload (for example, production vs staging, or team-specific gateways). + +. Select your LLM provider from those available in the gateway: ++ +* OpenAI (GPT models) +* Google (Gemini models) +* Anthropic (Claude models) +* OpenAI Compatible (custom OpenAI-compatible endpoints) + +. If using OpenAI Compatible, provide the base URL: ++ +* Base URL is required for OpenAI Compatible +* Must start with `http://` or `https://` +* Example: `https://api.example.com/v1` + +. Select the specific model version from the dropdown. ++ +The dropdown shows available models with descriptions. + +For detailed model specifications and pricing: + +* link:https://platform.openai.com/docs/models[OpenAI Models^] +* link:https://docs.anthropic.com/claude/docs/models-overview[Anthropic Claude Models^] +* link:https://ai.google.dev/gemini-api/docs/models[Google Gemini Models^] + +For model selection based on architecture patterns, see xref:ai-agents:agents/architecture-patterns.adoc#model-selection-guide[Model selection guide]. + +== Write the system prompt + +. In the *System Prompt* section, enter your prompt (minimum 10 characters). +. Follow these guidelines: ++ +* Define agent role and responsibilities +* List available tools +* Specify constraints and safety rules +* Set output format expectations + +. Use the *Preview* button to review formatted prompt. + +Example system prompt structure: + +[,text] +---- +You are an [agent role]. + +Responsibilities: +- [Task 1] +- [Task 2] + +Available tools: +- [tool_name]: [description] + +Never: +- [Constraint 1] +- [Constraint 2] + +Response format: +- [Format guideline] +---- + +For complete prompt guidelines, see xref:ai-agents:agents/prompt-best-practices.adoc[System Prompt Best Practices]. + +== Add MCP servers and select tools + +. In the *Tools* section, click *Add MCP Server*. +. Select an MCP server from your cluster. +. The UI displays all tools exposed by that server. +. Select which tools this agent can use: ++ +* Check the box next to each tool +* Review tool descriptions to confirm they match agent needs + +. Repeat to add tools from multiple MCP servers. +. Verify your tool selection: ++ +* Ensure tools match those listed in your system prompt +* Remove tools the agent doesn't need (principle of least privilege) + +== Add subagents (optional) + +Subagents are internal specialists within a single agent. Each subagent can have its own name, description, system prompt, and MCP tools, but all subagents share the parent agent's model, budget, and policies. + +. In the *Subagents* section, click *Add Subagent*. +. Configure the subagent: ++ +* *Name*: 1-64 characters, only letters, numbers, hyphens, and underscores (for example: `order-agent` or `Order_Agent`) +* *Description*: Maximum 256 characters (optional) +* *System Prompt*: Minimum 10 characters, domain-specific instructions +* *MCP Tools*: Select tools this subagent can access + +The root agent orchestrates and delegates work to appropriate subagents based on the request. + +For multi-agent design patterns, see xref:ai-agents:agents/architecture-patterns.adoc[Agent Architecture Patterns]. + +=== Set max iterations + +Max iterations determine how many reasoning loops the agent can perform before stopping. Each iteration consumes tokens and adds latency. For detailed cost calculations and the cost/capability/latency trade-off, see xref:ai-agents:agents/concepts.adoc[]. + +In the *Execution Settings* section, configure *Max Iterations* (range: 10-100, default: 30). + +Choose based on task complexity: + +* **Simple queries** (10-20): Single tool call, direct answers, minimal reasoning +* **Balanced workflows** (20-40): Multiple tool calls, data aggregation, moderate analysis +* **Complex analysis** (40-100): Exploratory queries, extensive tool chaining, deep reasoning + +Start with 30 for most use cases. + +=== Configure A2A discovery metadata + +After creating your agent, configure discovery metadata for external integrations. For detailed agent card design guidance, see link:https://agent2agent.info/docs/guides/create-agent-card/[Create an Agent Card^]. + +. Click on your agent. +. Open the *A2A* tab. +. Configure identity fields: ++ +* *Icon URL*: A publicly accessible image URL (recommended: 256x256px PNG or SVG) +* *Documentation URL*: Link to comprehensive agent documentation + +. Configure provider information: ++ +* *Organization*: Your organization or team name +* *URL*: Website or contact URL + +. Configure capabilities by adding skills: ++ +Skills describe what your agent can do for capability-based discovery. External systems use skills to find agents with the right capabilities. ++ +.. Click *+ Add Skill* to define what this agent can do. +.. For each skill, configure: ++ +* *Skill ID* (required): Unique identifier using lowercase letters, numbers, and hyphens (e.g., `fraud-analysis`, `order-lookup`) +* *Skill Name* (required): Human-readable name displayed in agent directories (e.g., "Fraud Analysis", "Order Lookup") +* *Description* (required): Explain what this skill does and when to use it. Be specific about inputs, outputs, and use cases. +* *Tags* (optional): Add tags for categorization and search. Use common terms like `fraud`, `security`, `finance`, `orders`. +* *Examples* (optional): Click *+ Add Example* to provide sample queries demonstrating how to invoke this skill. Examples help users understand how to interact with your agent. ++ +.. Add multiple skills if your agent handles different types of requests. For example, a customer service agent might have separate skills for "Order Status Lookup", "Shipping Tracking", and "Returns Processing". + +. Click *Save Changes*. + +The updated metadata appears immediately at `\https://your-agent-url/.well-known/agent-card.json`. For more about what these fields mean and how they're used, see xref:ai-agents:agents/a2a-concepts.adoc#agent-card-metadata[Agent card metadata]. + +=== Review and create + +. Review all settings. + +. Configure the service account name (optional): ++ +* Default pattern: `--agent--sa` +* Custom name: 3-128 characters, cannot contain `<` or `>` characters +* This service account authenticates the agent with cluster resources + +. Click *Create Agent*. + +. Wait for agent creation to complete. + +When your agent is running, Redpanda Cloud provides an HTTP endpoint URL with the pattern: + +---- +https://.ai-agents.. +---- + +You can use this URL to call your agent programmatically or integrate it with external systems. + +The *Inspector* tab in the Cloud Console automatically uses this URL to connect to your agent for testing. + +For programmatic access or external agent integration, see xref:ai-agents:agents/integration-overview.adoc[]. + +== Test your agent + +. In the agent details view, click the *Inspector* tab. +. Enter a test prompt. +. Verify the agent: ++ +* Selects appropriate tools +* Follows system prompt constraints +* Returns expected output format + +. Iterate on the system prompt or tool selection as needed. + +For detailed testing strategies, see xref:ai-agents:agents/monitor-agents.adoc[]. + +== Example configurations + +Here are example configurations for different agent types: + +=== Simple query agent + +* *Model*: GPT-5 Mini (fast, cost-effective) +* *Tools*: Single MCP server with `get_orders` tool +* *Max iterations*: 10 +* *Use case*: Customer order lookups + +=== Complex analytics agent + +* *Model*: Claude Sonnet 4.5 (balanced) +* *Tools*: Multiple servers with data query, aggregation, and formatting tools +* *Max iterations*: 30 +* *Use case*: Multi-step data analysis + +=== Multi-agent orchestrator + +* *Model*: Claude Opus 4.5 (advanced reasoning) +* *Tools*: Agent delegation tools +* *Subagents*: Order Agent, Inventory Agent, Customer Agent +* *Max iterations*: 20 +* *Use case*: E-commerce operations + +== Next steps + +* xref:ai-agents:agents/integration-overview.adoc[] +* xref:ai-agents:agents/prompt-best-practices.adoc[] +* xref:ai-agents:mcp/remote/create-tool.adoc[] +* xref:ai-agents:agents/architecture-patterns.adoc[] +* xref:ai-agents:agents/troubleshooting.adoc[] diff --git a/modules/ai-agents/pages/agents/get-started-index.adoc b/modules/ai-agents/pages/agents/get-started-index.adoc new file mode 100644 index 000000000..70856d47d --- /dev/null +++ b/modules/ai-agents/pages/agents/get-started-index.adoc @@ -0,0 +1,5 @@ += Get Started with AI Agents +:page-layout: index +:description: Learn what AI agents are and build your first agent in Redpanda Cloud. + +Start here to understand AI agents and build your first one. diff --git a/modules/ai-agents/pages/agents/index.adoc b/modules/ai-agents/pages/agents/index.adoc new file mode 100644 index 000000000..a07f6ad68 --- /dev/null +++ b/modules/ai-agents/pages/agents/index.adoc @@ -0,0 +1,5 @@ += AI Agents +:page-layout: index +:description: Build AI agents that use Redpanda Cloud for real-time streaming data and tool execution. + +Build AI agents that combine large language models with MCP tools to process streaming data and execute actions. diff --git a/modules/ai-agents/pages/agents/integration-index.adoc b/modules/ai-agents/pages/agents/integration-index.adoc new file mode 100644 index 000000000..7a4b5672e --- /dev/null +++ b/modules/ai-agents/pages/agents/integration-index.adoc @@ -0,0 +1,6 @@ += Agent Integration +:page-layout: index +:description: Connect agents to external applications, pipelines, and other systems. + +Choose integration patterns and connect agents to your systems. + diff --git a/modules/ai-agents/pages/agents/integration-overview.adoc b/modules/ai-agents/pages/agents/integration-overview.adoc new file mode 100644 index 000000000..ecba9080b --- /dev/null +++ b/modules/ai-agents/pages/agents/integration-overview.adoc @@ -0,0 +1,128 @@ += Integration Patterns Overview +:description: Choose the right integration pattern for connecting agents, pipelines, and external applications. +:page-topic-type: best-practices +:personas: agent_developer, streaming_developer, app_developer, data_engineer +:learning-objective-1: Choose the integration pattern that fits your use case +:learning-objective-2: Apply appropriate authentication for internal versus external integration +:learning-objective-3: Select the right communication protocol for your integration scenario + +Redpanda Cloud supports multiple integration patterns for agents, pipelines, and external applications. Choose the pattern that matches your integration scenario. + +After reading this page, you will be able to: + +* [ ] {learning-objective-1} +* [ ] {learning-objective-2} +* [ ] {learning-objective-3} + +== Integration scenarios + +Redpanda Cloud supports three primary integration scenarios based on who initiates the call and where the caller is located: + +[cols="1,2,2,1"] +|=== +| Scenario | Description | When to Use | Guide + +| Agent needs capabilities +| Your agent invokes MCP tools to fetch data, call APIs, or access external systems on-demand +| Agent-initiated, synchronous, interactive workflows +| xref:ai-agents:mcp/remote/tool-patterns.adoc[] + +| Pipeline processes events +| Your Redpanda Connect pipeline invokes agents for each event in a stream using the `a2a_message` processor +| Event-driven, automated, high-volume stream processing +| xref:ai-agents:agents/pipeline-integration-patterns.adoc[] + +| External system calls agent +| Your application or agent (hosted outside Redpanda Cloud) calls Redpanda Cloud agents using the A2A protocol +| Backend services, CLI tools, custom UIs, multi-platform agent workflows +| xref:ai-agents:agents/a2a-concepts.adoc[] +|=== + +== Common use cases by pattern + +Each integration pattern serves different scenarios based on how data flows and who initiates the interaction. + +[[agent-needs-capabilities]] +=== Agent needs capabilities (MCP tools) + +Use MCP tools when your agent needs on-demand access to data or capabilities. + +The agent decides when to invoke tools as part of its reasoning process. It waits for responses before continuing. + +This pattern works well for interactive workflows: customer support lookups, approval flows, or context-aware chatbots. + +Avoid MCP tools for high-volume stream processing or automated workflows without user interaction. Use pipeline-initiated integration instead. + +For implementation details, see xref:ai-agents:mcp/remote/tool-patterns.adoc[]. + +[[pipeline-processes-events]] +=== Pipeline processes events (`a2a_message`) + +Use the `a2a_message` processor when your pipeline needs to invoke agents for every event in a stream. + +The pipeline controls when agents execute. This pattern is ideal for automated, high-volume processing where each event requires AI reasoning. + +Common scenarios include real-time fraud detection, sentiment scoring for customer reviews, and content moderation that classifies and routes content. + +For implementation details, see xref:ai-agents:agents/pipeline-integration-patterns.adoc[]. + +=== External system calls agent + +Use external integration when your applications, services, or agents hosted outside Redpanda Cloud need to call Redpanda Cloud agents. + +External systems send requests using the A2A protocol and receive responses synchronously. This works for backend services, CLI tools, custom UIs, and agents hosted on other platforms. + +Common scenarios include backend services analyzing data as part of workflows, CLI tools invoking agents for batch tasks, custom UIs displaying agent responses, CRM agents coordinating with Redpanda agents, and multi-platform workflows spanning different infrastructure. + +To learn how the A2A protocol enables this integration, see xref:ai-agents:agents/a2a-concepts.adoc[]. + +== Pattern comparison + +The following table compares the two primary internal integration patterns: + +[cols="1,2,2"] +|=== +| Criterion | Agents Invoking MCP Tools | Pipelines Calling Agents + +| Trigger +| User question or agent decision +| Event arrival in topic + +| Frequency +| Ad-hoc, irregular, as needed +| Continuous, every event + +| Latency +| Low (agent waits for response) +| Higher (async acceptable) + +| Control Flow +| Agent decides when to invoke +| Pipeline decides when to invoke + +| Use Case +| "Fetch me data", "Run this query" +| "Process this stream", "Enrich all events" + +| Human in Loop +| Often yes (user-driven) +| Often no (automated) +|=== + +== Security considerations for external integration + +When integrating external applications with Redpanda Cloud agents, protect credentials and tokens. + +=== Protect service account credentials + +Store the client ID and secret in secure credential stores, not in code. Use environment variables or secrets management systems. Rotate credentials if compromised and restrict access based on the principle of least privilege. + +=== Protect access tokens + +Access tokens grant full access to the agent. Anyone with a valid token can send requests, receive responses, and consume agent resources (subject to rate limits). Treat access tokens like passwords and never log them or include them in error messages. + +== Next steps + +* xref:ai-agents:agents/a2a-concepts.adoc[] +* xref:ai-agents:mcp/remote/tool-patterns.adoc[] +* xref:ai-agents:agents/pipeline-integration-patterns.adoc[] diff --git a/modules/ai-agents/pages/agents/monitor-agents.adoc b/modules/ai-agents/pages/agents/monitor-agents.adoc new file mode 100644 index 000000000..c493aa291 --- /dev/null +++ b/modules/ai-agents/pages/agents/monitor-agents.adoc @@ -0,0 +1,99 @@ += Monitor Agent Activity +:description: Monitor agent execution, analyze conversation history, track token usage, and debug issues using Inspector, Transcripts, and agent data topics. +:page-topic-type: how-to +:personas: agent_developer, platform_admin +:learning-objective-1: pass:q[Verify agent behavior using the *Inspector* tab] +:learning-objective-2: Track token usage and performance metrics +:learning-objective-3: pass:q[Debug agent execution using *Transcripts*] + +Use monitoring to track agent performance, analyze conversation patterns, debug execution issues, and optimize token costs. + +After reading this page, you will be able to: + +* [ ] {learning-objective-1} +* [ ] {learning-objective-2} +* [ ] {learning-objective-3} + +For conceptual background on traces and observability, see xref:ai-agents:observability/concepts.adoc[]. + +== Prerequisites + +You must have a running agent. If you do not have one, see xref:ai-agents:agents/quickstart.adoc[]. + +== Debug agent execution with Transcripts + +The *Transcripts* view shows execution traces with detailed timing, errors, and performance metrics. Use this view to debug issues, verify agent behavior, and monitor performance in real-time. + +:context: agent +include::ai-agents:partial$transcripts-ui-guide.adoc[] + +=== Check agent health + +Use the *Transcripts* view to verify your agent is healthy. Look for consistent green bars in the timeline, which indicate successful executions. Duration should stay within your expected range, while token usage remains stable without unexpected growth. + +Several warning signs indicate problems. Red bars in the timeline mean errors or failures that need investigation. When duration increases over time, your context window may be growing or tool calls could be slowing down. Many LLM calls for simple requests often signal that the agent is stuck in loops or making unnecessary iterations. If you see missing transcripts, the agent may be stopped or encountering deployment issues. + +Pay attention to patterns across multiple executions. When all recent transcripts show errors, start by checking agent status, MCP server connectivity, and system prompt configuration. A spiky timeline that alternates between success and error typically points to intermittent tool failures or external API issues. If duration increases steadily over a session, your context window is likely filling up. Clear the conversation history to reset it. High token usage combined with relatively few LLM calls usually means tool results are large or your system prompts are verbose. + +=== Debug with Transcripts + +Use *Transcripts* to diagnose specific issues: + +If the agent is not responding: + +. Check the timeline for recent transcripts. If none appear, the agent may be stopped. +. Verify agent status in the main *AI Agents* view. +. Look for error transcripts with deployment or initialization failures. + +If the agent fails during execution: + +. Select the failed transcript (red bar in timeline). +. Expand the trace hierarchy to find the tool invocation span. +. Check the span details for error messages. +. Cross-reference with MCP server status. + +If performance is slow: + +. Compare duration across multiple transcripts in the summary panel. +. Look for specific spans with long durations (wide bars in trace list). +. Check if LLM calls are taking longer than expected. +. Verify tool execution time by examining nested spans. + +=== Track token usage and costs + +View token consumption in the *Summary* panel when you select a transcript. The breakdown shows input tokens (everything sent to the LLM including system prompt, conversation history, and tool results), output tokens (what the LLM generates in agent responses), and total tokens as the sum of both. + +Calculate cost per request: + +---- +Cost = (input_tokens x input_price) + (output_tokens x output_price) +---- + +Example: GPT-5.2 with 4,302 input tokens and 1,340 output tokens at $0.00000175 per input token and $0.000014 per output token costs $0.026 per request. + +For cost optimization strategies, see xref:ai-agents:agents/concepts.adoc#cost-calculation[Cost calculation]. + +== Test agent behavior with Inspector + +The *Inspector* tab provides real-time conversation testing. Use it to test agent responses interactively and verify behavior before deploying changes. + +=== Access Inspector + +. Navigate to *Agentic AI* > *AI Agents* in the Redpanda Cloud Console. +. Click your agent name. +. Open the *Inspector* tab. +. Enter test queries and review responses. +. Check the conversation panel to see tool calls. +. Start a new session to test fresh conversations or click *Clear context* to reset history. + +=== Testing best practices + +Test your agents systematically by exploring edge cases and potential failure scenarios. Begin with boundary testing. Requests at the edge of agent capabilities verify that scope enforcement works correctly. Error handling becomes clear when you request unavailable data and observe whether the agent degrades gracefully or fabricates information. + +Monitor iteration counts during complex requests to ensure they complete within your configured limits. Ambiguous or vague queries reveal whether the agent asks clarifying questions or makes risky assumptions. Throughout testing, track token usage per request to estimate costs and identify which query patterns consume the most resources. + +== Next steps + +* xref:ai-agents:observability/concepts.adoc[] +* xref:ai-agents:agents/troubleshooting.adoc[] +* xref:ai-agents:agents/concepts.adoc[] diff --git a/modules/ai-agents/pages/agents/overview.adoc b/modules/ai-agents/pages/agents/overview.adoc new file mode 100644 index 000000000..6d9541b2e --- /dev/null +++ b/modules/ai-agents/pages/agents/overview.adoc @@ -0,0 +1,68 @@ += AI Agents Overview +:description: Learn what AI agents are and how Redpanda Cloud supports agent development with real-time streaming. +:page-topic-type: overview +:personas: evaluator, agent_developer, app_developer, streaming_developer +:learning-objective-1: Describe what AI agents are and their essential components +:learning-objective-2: Explain how Redpanda Cloud streaming infrastructure benefits agent architectures +:learning-objective-3: Identify use cases where Redpanda Cloud agents provide value + +AI agents are systems that combine large language models (LLMs) with the ability to execute actions and process data. Redpanda Cloud provides real-time streaming infrastructure and standardized tool access to support agent development. + +After reading this page, you will be able to: + +* [ ] {learning-objective-1} +* [ ] {learning-objective-2} +* [ ] {learning-objective-3} + +== What is an AI agent? + +An AI agent is a system built around a large language model that can interpret user intent, decide which actions are required, invoke external tools, process live and historical data, and chain multiple steps into a workflow. AI agents differ from text-only LLMs by executing actions and invoking external tools. + +== How agents work + +Every AI agent consists of four essential components: + +* *System prompt*: Defines the agent's role, responsibilities, and constraints +* *LLM*: Interprets user intent and decides which tools to invoke +* *Tools*: External capabilities exposed through the Model Context Protocol (MCP) +* *Context*: Conversation history, tool results, and real-time events from Redpanda topics + +Agents can invoke Redpanda Connect components as tools on-demand. Redpanda Connect pipelines can also invoke agents for event-driven processing. This bidirectional integration supports both interactive workflows and automated streaming. + +When a user makes a request, the LLM receives the system prompt and context, decides which tools to invoke, and processes the results. This cycle repeats until the task completes. + +For a deeper understanding of how agents execute, manage context, and maintain state, see xref:ai-agents:agents/concepts.adoc[]. + +== Key benefits + +Redpanda Cloud provides real-time streaming data so agents access live events instead of batch snapshots. Remote MCP support enables standardized tool access. Managed infrastructure handles deployment, scaling, and security for you. Low-latency execution means tools run close to your data. Integrated secrets management securely stores API keys and credentials. + +== Use cases + +AI agents in Redpanda Cloud unlock new capabilities across multiple fields. + +=== For AI agent developers + +Build agents grounded in real-time data instead of static snapshots. Connect your agent to live order status, inventory levels, and customer history so responses reflect current business state, not stale training data. + +=== For application developers + +Add conversational AI to existing applications without rebuilding your backend. Expose your services as MCP tools and let agents orchestrate complex multi-step workflows through natural language. + +=== For streaming developers + +Process every event with AI reasoning at scale. Invoke agents automatically from pipelines for fraud detection, content moderation, or sentiment analysis. No batch jobs, no delayed insights. + +== Limitations + +* Agents are available only on xref:get-started:cluster-types/byoc/index.adoc[BYOC clusters] +* MCP servers must be hosted in Redpanda Cloud clusters +* Cross-agent calling between separate agents hosted in Redpanda Cloud is not currently supported (use internal subagents for delegation within a single agent) + +== Next steps + +* xref:ai-agents:agents/quickstart.adoc[] +* xref:ai-agents:agents/concepts.adoc[] +* xref:ai-agents:agents/architecture-patterns.adoc[] +* xref:ai-agents:agents/integration-overview.adoc[] +* xref:ai-agents:agents/create-agent.adoc[] diff --git a/modules/ai-agents/pages/agents/pipeline-integration-patterns.adoc b/modules/ai-agents/pages/agents/pipeline-integration-patterns.adoc new file mode 100644 index 000000000..6395d344c --- /dev/null +++ b/modules/ai-agents/pages/agents/pipeline-integration-patterns.adoc @@ -0,0 +1,143 @@ += Pipeline Integration Patterns +:description: Build Redpanda Connect pipelines that invoke agents for event-driven processing and streaming enrichment. +:page-topic-type: best-practices +:personas: streaming_developer, agent_developer +:learning-objective-1: Identify when pipelines should call agents for stream processing +:learning-objective-2: pass:q[Design event-driven agent invocation using the `a2a_message` processor] +:learning-objective-3: Implement streaming enrichment with AI-generated fields + +Build Redpanda Connect pipelines that invoke agents for automated, event-driven processing. Pipelines use the `a2a_message` processor to call agents for each event in a stream when you need AI reasoning, classification, or enrichment at scale. + +After reading this page, you will be able to: + +* [ ] {learning-objective-1} +* [ ] {learning-objective-2} +* [ ] {learning-objective-3} + +This page focuses on pipelines calling agents (pipeline-initiated integration). For agents invoking MCP tools, see xref:ai-agents:agents/integration-overview.adoc#agent-needs-capabilities[Agent needs capabilities]. For external applications calling agents, see xref:ai-agents:agents/integration-overview.adoc#external-system-calls-agent[External system calls agent]. + +== How pipelines invoke agents + +Pipelines use the xref:develop:connect/components/processors/a2a_message.adoc[`a2a_message`] processor to invoke agents for each event in a stream. The processor uses the xref:ai-agents:agents/a2a-concepts.adoc[A2A protocol] to discover and communicate with agents. + +When the `a2a_message` processor receives an event, it sends the event data to the specified agent along with any prompt you provide. The agent processes the event using its reasoning capabilities and returns a response. The processor then adds the agent's response to the event for further processing or output. + +The pipeline determines when to invoke agents based on events, not agent reasoning. + +== When to use this pattern + +Use the `a2a_message` processor when pipelines need AI reasoning for every event in a stream. + +The `a2a_message` processor is appropriate when: + +* **Every event needs AI analysis:** Each message requires reasoning, classification, or decision-making. +* **You need streaming enrichment:** Add AI-generated fields to events at scale. +* **Processing is fully automated:** No human in the loop, event-driven workflows. +* **Batch latency is acceptable:** Agent reasoning time is tolerable for your use case. +* **You're handling high-volume streams:** Processing thousands or millions of events. + +== Use cases + +Use the `a2a_message` processor in pipelines for these common patterns. + +=== Event-driven agent invocation + +Invoke agents automatically for each event: + +[source,yaml] +---- +include::ai-agents:example$pipelines/event-driven-invocation.yaml[] +---- + +Replace `AGENT_CARD_URL` with your actual agent card URL. See xref:ai-agents:agents/a2a-concepts.adoc#agent-card-location[Agent card location]. + +**Use case:** Real-time fraud detection on every transaction. + +=== Streaming data enrichment + +Add AI-generated metadata to events: + +[source,yaml] +---- +include::ai-agents:example$pipelines/streaming-enrichment.yaml[tag=processors,indent=0] +---- + +Replace `AGENT_CARD_URL` with your actual agent card URL. See xref:ai-agents:agents/a2a-concepts.adoc#agent-card-location[Agent card location]. + +**Use case:** Add sentiment scores to every customer review in real-time. + +=== Asynchronous workflows + +Process events in the background: + +[source,yaml] +---- +include::ai-agents:example$pipelines/async-workflows.yaml[tag=pipeline,indent=0] +---- + +Replace `AGENT_CARD_URL` with your actual agent card URL. See xref:ai-agents:agents/a2a-concepts.adoc#agent-card-location[Agent card location]. + +**Use case:** Nightly batch summarization of reports where latency is acceptable. + +=== Multi-agent pipeline orchestration + +Chain multiple agents in sequence: + +[source,yaml] +---- +include::ai-agents:example$pipelines/multi-agent-orchestration.yaml[tag=processors,indent=0] +---- + +Replace the agent URL variables with your actual agent card URLs. See xref:ai-agents:agents/a2a-concepts.adoc#agent-card-location[Agent card location]. + +**Use case:** Translate feedback, analyze sentiment, then route to appropriate team. + +=== Agent as transformation node + +Use agent reasoning for complex transformations: + +[source,yaml] +---- +include::ai-agents:example$pipelines/agent-transformation.yaml[tag=processors,indent=0] +---- + +Replace `AGENT_CARD_URL` with your actual agent card URL. See xref:ai-agents:agents/a2a-concepts.adoc#agent-card-location[Agent card location]. + +**Use case:** Convert natural language queries to SQL for downstream processing. + +== When not to use this pattern + +Do not use the `a2a_message` processor when: + +* Users need to interact with agents interactively. +* The transformation is simple and does not require AI reasoning. +* Agents need to dynamically decide what data to fetch based on context. + +For a detailed comparison between pipeline-initiated and agent-initiated integration patterns, see xref:ai-agents:agents/integration-overview.adoc#pattern-comparison[Pattern comparison]. + +== Example: Real-time fraud detection + +This example shows a complete pipeline that analyzes every transaction with an agent. + +=== Pipeline configuration + +[source,yaml] +---- +include::ai-agents:example$pipelines/fraud-detection-routing.yaml[] +---- + +Replace `AGENT_CARD_URL` with your agent card URL. See xref:ai-agents:agents/a2a-concepts.adoc#agent-card-location[Agent card location]. + +This pipeline: + +* Consumes every transaction from the `transactions` topic. +* Sends each transaction to the fraud detection agent using `a2a_message`. +* Routes transactions to different topics based on fraud score. +* Runs continuously, analyzing every transaction in real-time. + +== Next steps + +* xref:ai-agents:mcp/remote/tool-patterns.adoc[] +* xref:ai-agents:agents/integration-overview.adoc[] +* xref:ai-agents:agents/a2a-concepts.adoc[] +* xref:develop:connect/components/processors/about.adoc[] diff --git a/modules/ai-agents/pages/agents/prompt-best-practices.adoc b/modules/ai-agents/pages/agents/prompt-best-practices.adoc new file mode 100644 index 000000000..f0f83ce57 --- /dev/null +++ b/modules/ai-agents/pages/agents/prompt-best-practices.adoc @@ -0,0 +1,424 @@ += System Prompt Best Practices +:description: Write system prompts that produce reliable, predictable agent behavior through clear constraints and tool guidance. +:page-topic-type: best-practices +:personas: agent_developer, app_developer, streaming_developer +:learning-objective-1: Identify effective system prompt patterns for agent reliability +:learning-objective-2: Apply constraint patterns to prevent unintended agent behavior +:learning-objective-3: Evaluate system prompts for clarity and completeness + +Write system prompts that produce reliable, predictable agent behavior. Good prompts define scope, specify constraints, and guide tool usage. + +After reading this page, you will be able to: + +* [ ] {learning-objective-1} +* [ ] {learning-objective-2} +* [ ] {learning-objective-3} + +== Role definition + +Define what your agent does and the boundaries of its responsibilities. A clear role prevents scope creep and helps the agent refuse out-of-scope requests appropriately. + +=== Be specific about agent identity + +Define what the agent does, not what it is. + +.Do +[,text] +---- +You are an order fulfillment agent for an e-commerce platform. You help customers track orders, update shipping addresses, and process returns. +---- + +.Don't +[,text] +---- +You are a helpful assistant. +---- + +=== Define what the agent does and doesn't do + +Explicitly state boundaries: what tasks the agent handles, what tasks it should refuse or delegate, and when to ask for human assistance. + +[,text] +---- +Responsibilities: +- Track customer orders +- Update shipping addresses +- Process returns up to $500 + +Do not: +- Provide product recommendations (redirect to website) +- Process refunds above $500 (escalate to manager) +- Access orders from other customers +---- + +== Tool specification + +Tell the agent which tools are available and when to use them. Explicit tool guidance reduces errors and prevents the agent from guessing when to invoke capabilities. + +=== List available tools + +Name each tool the agent can use: + +[,text] +---- +Available tools: +- get_customer_orders: Retrieve order history for a customer +- update_order_status: Change order state (shipped, delivered, canceled) +- calculate_refund: Compute refund amount based on return policy +---- + +=== Explain when to use each tool + +Provide decision criteria for tool selection. + +.Do +[,text] +---- +Use get_customer_orders when: +- Customer asks about order history +- You need order details to answer a question + +Use update_order_status only when: +- Customer explicitly requests a cancellation +- You have confirmed the order is eligible for status changes +---- + +.Don't +[,text] +---- +Use the tools as needed. +---- + +== Constraints and safety + +Set explicit boundaries to prevent unintended agent behavior. + +=== Define data boundaries + +Specify what data the agent can access: + +[,text] +---- +Data access: +- Only orders from the last 90 days +- Only data for the authenticated customer +- No access to employee records or internal systems +---- + +=== Set response guidelines + +Control output format and content: + +[,text] +---- +Response guidelines: +- Present order details as tables +- Always include order numbers in responses +- State the analysis time window when showing trends +- If you cannot complete a task, explain why and suggest alternatives +---- + +== Context and conversation management + +Guide the agent on how to handle unclear requests and stay within conversation scope. These guidelines keep interactions focused and prevent the agent from making assumptions. + +=== Handle ambiguous requests + +Guide the agent on how to clarify: + +[,text] +---- +When request is unclear: +1. Ask clarifying questions +2. Suggest common interpretations +3. Do not guess customer intent +---- + +=== Define conversation boundaries + +Set limits on conversation scope: + +[,text] +---- +Conversation scope: +- Answer questions about orders, shipping, and returns +- Do not provide product recommendations (redirect to website) +- Do not engage in general conversation unrelated to orders +---- + +== Error handling + +Guide agents to handle failures gracefully through clear prompt instructions. Agent errors fall into two categories: tool failures (external system issues) and reasoning failures (agent confusion or limits). + +=== Tool failure types + +Tools can fail for multiple reasons. Transient failures include network timeouts, temporary unavailability, and rate limits. Permanent failures include invalid parameters, permission denied, and resource not found errors. Partial failures occur when tools return incomplete data or warnings. + +=== Graceful degradation + +Design prompts so agents continue when tools fail: + +.Example prompt guidance for graceful degradation +[,text] +---- +When a tool fails: +1. Attempt an alternative tool if available +2. If no alternative exists, explain the limitation +3. Offer partial results if you retrieved some data before failure +4. Do not make up information to fill gaps +---- + +Agents that degrade gracefully provide value even when systems are partially down. + +Implement retries in tools, not in agent prompts. The tool should retry network calls automatically before returning an error to the agent. + +=== Escalation patterns + +Some failures require human intervention. Budget exceeded errors occur when max iterations are reached before task completion. Insufficient tools means no tool is available for the required action. Ambiguous requests happen when the agent can't determine user intent after clarification attempts. Data access failures occur when multiple tools fail with no alternative path. + +Design prompts to recognize escalation conditions: + +.Example prompt guidance for escalation +[,text] +---- +When you cannot complete the task: +1. Explain what you tried and why it didn't work +2. Tell the user what information or capability is missing +3. Suggest how they can help (provide more details, contact support, etc.) +---- + +=== Common error scenarios + +Include guidance for specific error types in your system prompt: + +**Timeout during tool execution:** When a tool takes longer than the agent timeout limit, the agent receives a timeout error in context. The agent should explain the delay to the user and suggest a retry. + +**Invalid tool parameters:** When the agent passes a wrong data type or missing required field, the tool returns a validation error. The agent should reformat parameters and retry, or ask the user for correct input. + +**Authentication failure:** When a tool can't access a protected resource, it returns a permission denied error. The agent should explain the access limitation without exposing credentials or internal details. + +== Output formatting + +Control how the agent presents information to users. Consistent formatting makes responses easier to read and ensures critical information appears in predictable locations. + +=== Specify structure + +Define how the agent presents information: + +[,text] +---- +Output format: +- Use tables for multiple items +- Use bulleted lists for steps or options +- Use code blocks for tracking numbers or order IDs +- Include units (dollars, kilograms) in all numeric values +---- + +[[evaluation-and-testing]] +== Evaluation and testing + +Test system prompts systematically to verify behavior matches intent. + +Follow this process to validate prompts: + +[cols="1,2,2"] +|=== +| Test Type | What to Test | Example + +| Boundary cases +| Requests at edge of agent scope +| Just inside: "Track order 123" (should work) + +Just outside: "Recommend products" (should refuse) + +Ambiguous: "Help with my order" (should clarify) + +| Tool selection +| Agent chooses correct tools +| Create requests requiring each tool + +Test multiple applicable tools (verify best choice) + +Test no applicable tools (verify explanation) + +| Constraint compliance +| Agent follows "never" rules +| Explicit forbidden: "Show payment methods" + +Indirect forbidden: "What's the credit card number?" + +Verify refusal with explanation + +| Error handling +| Tool failures and limitations +| Disable MCP server tool temporarily + +Send request requiring disabled tool + +Verify graceful response (no fabricated data) + +| Ambiguous requests +| Clarification behavior +| Vague: "Check my stuff" + +Verify specific questions: "Orders, returns, or account?" + +Ensure no guessing of user intent +|=== + +== Design principles + +Apply these principles when writing system prompts to create reliable agent systems. + +=== Design for inspectability + +Make agent reasoning transparent so you can debug by reading conversation history. Your system prompt should encourage clear explanations: + +[,text] +---- +Response format: +- State what you're doing before calling each tool +- Explain why you chose this tool over alternatives +- If a tool fails, describe what went wrong and what you tried +---- + +Log all tool invocations with parameters, record tool results in structured format, and store agent responses with reasoning traces. Opaque agents that "just work" are impossible to fix when they break. + +=== Design for testability + +Test agents with boundary cases (requests at the edge of agent capability), error injection (simulate tool failures to verify graceful degradation), context limits (long conversations approaching token limits), and ambiguous requests (unclear user input to verify clarification behavior). + +Use the systematic testing approach in <>. + +=== Design for cost control + +Write clear system prompts that reduce wasted iterations. Vague prompts cause agent confusion and unnecessary tool calls. Each wasted iteration costs tokens. + +Guide agents to: + +* Request only needed data from tools (use pagination, filters) +* Avoid redundant tool calls (check context before calling) +* Stop when the task completes (don't continue exploring) + +For cost management strategies including iteration limits and monitoring, see xref:ai-agents:agents/concepts.adoc[]. + +== Example: Complete system prompt + +This example demonstrates all best practices: + +[,text] +---- +You are an order analytics agent for Acme E-commerce. + +Responsibilities: +- Answer questions about customer order trends +- Analyze order data from Redpanda topics +- Provide insights on order patterns + +Available tools: +- get_customer_orders: Retrieve order history (parameters: customer_id, start_date, end_date) +- analyze_recent_orders: Compute order statistics (parameters: time_window, group_by) + +When to use tools: +- Use get_customer_orders for individual customer queries +- Use analyze_recent_orders for trend analysis across multiple orders + +Never: +- Expose customer payment information or addresses +- Analyze data older than 90 days unless explicitly requested +- Make business recommendations without data to support them + +Data access: +- Only orders from the authenticated customer account +- Maximum of 90 days of historical data + +Response guidelines: +- Present structured data as tables +- Always state the analysis time window +- Include order counts in trend summaries +- If data is unavailable, explain the limitation + +When request is unclear: +- Ask which time period to analyze +- Confirm whether to include canceled orders +- Do not assume customer intent +---- + +== Common anti-patterns + +Avoid these patterns that lead to unpredictable agent behavior. + +=== Vague role definition + +Define specific agent responsibilities and scope. + +Generic role definitions fail because the agent has no guidance on what tasks to handle, what requests to refuse, or when to escalate to humans. + +.Don't +[,text] +---- +You are a helpful AI assistant. +---- + +This doesn't constrain behavior or set expectations. The agent might attempt tasks outside its capabilities or handle requests it should refuse. + +.Do +[,text] +---- +You are an order fulfillment agent for an e-commerce platform. You help customers track orders, update shipping addresses, and process returns up to $500. + +Do not: +- Provide product recommendations (redirect to website) +- Process refunds above $500 (escalate to manager) +---- + +Clear scope prevents the agent from attempting out-of-scope tasks and defines escalation boundaries. + +=== Missing constraints + +Set explicit boundaries on data access and operations. + +Without constraints, agents may access sensitive data, process excessive historical records, or perform operations beyond their authorization. + +.Don't +[,text] +---- +You can access customer data to help answer questions. +---- + +This provides no boundaries on what data, how much history, or which customers. The agent might retrieve payment information, access other customers' data, or query years of records. + +.Do +[,text] +---- +Data access: +- Only orders from the authenticated customer +- Maximum of 90 days of historical data +- No access to payment methods or billing addresses +---- + +Explicit boundaries prevent unauthorized access and scope queries to reasonable limits. + +=== Implicit tool selection + +Specify when to use each tool with clear decision criteria. + +Vague tool guidance forces agents to guess based on tool names alone, leading to wrong tool choices, unnecessary calls, or skipped tools. + +.Don't +[,text] +---- +Use the available tools to complete tasks. +---- + +The agent must guess which tool applies when. This leads to calling the wrong tool first, calling all tools unnecessarily, or fabricating answers without using tools. + +.Do +[,text] +---- +Use get_customer_orders when: +- Customer asks about order history +- You need order details to answer a question + +Use update_order_status only when: +- Customer explicitly requests a cancellation +- You have confirmed the order is eligible for status changes +---- + +Decision criteria enable reliable tool selection based on request context. + +== Next steps + +* xref:ai-agents:agents/quickstart.adoc[] +* xref:ai-agents:agents/overview.adoc[] +* xref:ai-agents:mcp/remote/best-practices.adoc[] diff --git a/modules/ai-agents/pages/agents/quickstart.adoc b/modules/ai-agents/pages/agents/quickstart.adoc new file mode 100644 index 000000000..0be1a183b --- /dev/null +++ b/modules/ai-agents/pages/agents/quickstart.adoc @@ -0,0 +1,191 @@ += AI Agent Quickstart +:description: Create your first AI agent in Redpanda Cloud that generates and publishes event data through natural language commands. +:page-topic-type: tutorial +:personas: agent_developer, evaluator +:learning-objective-1: Create an AI agent in Redpanda Cloud that uses MCP tools +:learning-objective-2: Configure the agent with a system prompt and model selection +:learning-objective-3: Test the agent by generating and publishing events through natural language + +Build your first AI agent in Redpanda Cloud. You'll create an agent that understands natural language requests and uses MCP tools to generate and publish event data to Redpanda topics. + +After completing this quickstart, you will be able to: + +* [ ] {learning-objective-1} +* [ ] {learning-objective-2} +* [ ] {learning-objective-3} + +== Prerequisites + +* A xref:get-started:cluster-types/byoc/index.adoc[BYOC cluster] (agents are not available on Dedicated or Serverless clusters) + +* xref:ai-agents:ai-gateway/gateway-quickstart.adoc[AI Gateway configured] with at least one LLM provider enabled (OpenAI, Anthropic, or Google AI) + +* Completed the xref:ai-agents:mcp/remote/quickstart.adoc[Remote MCP Quickstart] to create an MCP server with the following tools deployed: ++ +** `generate_input`: Generates fake user event data +** `redpanda_output`: Publishes data to Redpanda topics + +== What you'll build + +An Event Data Manager agent that: + +* Generates fake user event data (logins, purchases, page views) +* Publishes events to Redpanda topics +* Understands natural language requests like "Generate 5 login events and publish them" + +The agent orchestrates the `generate_input` and `redpanda_output` tools you created in the MCP quickstart. + +== Create the agent + +. Log in to the link:https://cloud.redpanda.com/[Redpanda Cloud Console^]. + +. Navigate to your cluster and click *Agentic AI* > *AI Agents* in the left navigation. + +. Click *Create Agent*. + +. Configure basic settings: ++ +* *Display Name*: `event-data-manager` +* *Description*: `Generates and publishes fake user event data to Redpanda topics` +* *Resource Tier*: Select *XSmall* (sufficient for this quickstart) + +. Select your AI Gateway and model: ++ +* *AI Gateway*: Select the gateway you configured (contains provider and API key configuration) +* *Provider*: Select a provider available in your gateway (OpenAI, Anthropic, or Google) +* *Model*: Choose any balanced model from the dropdown + +. Add your API key: ++ +* Click *Add Secret* under *API Key* +* Select *Create new secret* +* *Secret Name*: `-api-key` (for example, `openai-api-key`) +* *Secret Value*: Paste your API key +* Click *Save* + +. Write the system prompt: ++ +[,text] +---- +You are an Event Data Manager agent for Redpanda Cloud. + +Your responsibilities: +- Generate realistic fake user event data +- Publish events to Redpanda topics +- Help users test streaming data pipelines + +Available tools: +- generate_input: Creates fake user events (login, logout, purchase, view) +- redpanda_output: Publishes data to the events topic + +When a user asks you to generate events: +1. Use generate_input to create the event data +2. Use redpanda_output to publish the events to Redpanda +3. Confirm how many events were published + +Always publish events after generating them unless the user explicitly says not to. + +Response format: +- State what you're doing before calling each tool +- Show the generated event data +- Confirm successful publication with a count +---- + +. Select MCP tools: ++ +* Click *Add MCP Server* +* Select the `event-data-generator` server (created in the MCP quickstart) +* Check both tools: +** `generate_input` +** `redpanda_output` + +. Set execution parameters: ++ +* *Max Iterations*: `30` (allows multiple tool calls per request) + +. Review your configuration and click *Create Agent*. + +. Wait for the agent status to change from *Starting* to *Running*. + +== Test your agent + +Now test your agent with natural language requests. + +. In the agent details view, open the *Inspector* tab. + +. Try these example requests: ++ +.Generate and publish 3 events +[%collapsible] +==== +[.no-copy] +---- +Generate 3 user events and publish them to the events topic. +---- + +The agent should respond with these steps: + +. Call `generate_input` to create 3 fake user events. +. Call `redpanda_output` to publish them to the `events` topic. +. Confirm the events were published. + +You should see the agent's reasoning and the tool execution results. +==== ++ +.Generate specific event types +[%collapsible] +==== +[.no-copy] +---- +Create 5 login events for testing and publish them to Redpanda. +---- + +The agent understands the request requires login events specifically and generates appropriate test data. +==== ++ +.Generate events without publishing +[%collapsible] +==== +[.no-copy] +---- +Show me what 3 sample purchase events would look like, but don't publish them yet. +---- + +The agent calls only `generate_input` and displays the data without publishing. +==== + +. Navigate to *Topics* in the left navigation to verify events were published to the `events` topic. + +== Iterate on your agent + +Try modifying the agent to change its behavior: + +. Click *Edit configuration* in the agent details view. + +. Update the system prompt to change how the agent responds. For example: ++ +* Add constraints: "Never publish more than 10 events at once" +* Change output format: "Always format events as a table" +* Add validation: "Before publishing, show the user the generated data and ask for confirmation" + +. Click *Save* to update the agent. + +. Test your changes in the *Inspector* tab. + +== Troubleshoot + +For comprehensive troubleshooting guidance, see xref:ai-agents:agents/troubleshooting.adoc[]. + +Common quickstart issues: + +**Events not appearing in topic:** Verify the `events` topic exists and review the MCP server logs for publishing errors. + +== Next steps + +You've created an agent that orchestrates MCP tools through natural language. Explore more: + +* xref:ai-agents:agents/overview.adoc[] +* xref:ai-agents:agents/create-agent.adoc[] +* xref:ai-agents:agents/prompt-best-practices.adoc[] +* xref:ai-agents:agents/architecture-patterns.adoc[] +* xref:ai-agents:mcp/remote/tool-patterns.adoc[] diff --git a/modules/ai-agents/pages/agents/troubleshooting.adoc b/modules/ai-agents/pages/agents/troubleshooting.adoc new file mode 100644 index 000000000..69eda27eb --- /dev/null +++ b/modules/ai-agents/pages/agents/troubleshooting.adoc @@ -0,0 +1,485 @@ += Troubleshoot AI Agents +:description: Diagnose and fix common issues with AI agents including deployment failures, runtime behavior problems, and tool execution errors. +:page-topic-type: troubleshooting +:personas: agent_developer, app_developer, streaming_developer +:learning-objective-1: Diagnose deployment failures and resource allocation errors +:learning-objective-2: Resolve runtime behavior issues including tool selection and iteration limits +:learning-objective-3: Fix tool execution problems and authentication failures + +Use this page to diagnose and fix common issues with AI agents, including deployment failures, runtime behavior problems, tool execution errors, and integration issues. + +== Deployment issues + +Fix issues that prevent agents from connecting to required resources. + +=== MCP server connection failures + +**Symptoms:** Agent starts but the tools don't respond or return connection errors. + +**Causes:** + +* MCP server stopped or crashed after agent creation +* Network connectivity issues between agent and MCP server +* MCP server authentication or permission issues + +**Solution:** + +. Verify MCP server status in *Agentic AI* > *Remote MCP*. +. Check MCP server logs for errors. +. Restart the MCP server if needed. +. Verify agent has permission to access the MCP server. + +**Prevention:** + +* Monitor MCP server health +* Use appropriate retry logic in tools + +== Runtime behavior issues + +Resolve problems with agent decision-making, tool selection, and response generation. + +=== Agent not calling tools + +**Symptoms:** Agent responds without calling any tools, or fabricates information instead of using tools. + +**Causes:** + +* System prompt doesn't clearly specify when to use tools +* Tool descriptions are vague or missing +* LLM model lacks sufficient reasoning capability +* Max iterations is too low + +**Solution:** + +. Strengthen tool usage guidance in your system prompt: ++ +[,text] +---- +ALWAYS use get_order_status when customer mentions an order ID. +NEVER respond about order status without calling the tool first. +---- + +. Review tool descriptions in your MCP server configuration. +. Use a more capable model from the supported list for your gateway. +. Increase max iterations if the agent is stopping before reaching tools. + +**Prevention:** + +* Write explicit tool selection criteria in system prompts +* Test agents with the xref:ai-agents:agents/prompt-best-practices.adoc#evaluation-and-testing[systematic testing approach] +* Use models appropriate for your task complexity + +=== Calling wrong tools + +**Symptoms:** Agent selects incorrect tools for the task, or calls tools with invalid parameters. + +**Causes:** + +* Tool descriptions are ambiguous or overlap +* Too many similar tools confuse the LLM +* System prompt doesn't provide clear tool selection guidance + +**Solution:** + +. Make tool descriptions more specific and distinct. +. Add "when to use" guidance to your system prompt: ++ +[,text] +---- +Use get_order_status when: +- Customer provides an order ID (ORD-XXXXX) +- You need to check current order state + +Use get_shipping_info when: +- Order status is "shipped" +- Customer asks about delivery or tracking +---- + +. Reduce the number of tools you expose to the agent. +. Use subagents to partition tools by domain. + +**Prevention:** + +* Follow tool design patterns in xref:ai-agents:mcp/remote/tool-patterns.adoc[] +* Limit each agent to 10-15 tools maximum +* Test boundary cases where multiple tools might apply + +=== Stuck in loops or exceeding max iterations + +**Symptoms:** Agent reaches max iterations without completing the task, or repeatedly calls the same tool with the same parameters. + +**Causes:** + +* Tool returns errors that the agent doesn't know how to handle +* Agent doesn't recognize when the task is complete +* Tool returns incomplete data that prompts another call +* System prompt encourages exhaustive exploration + +**Solution:** + +. Add completion criteria to your system prompt: ++ +[,text] +---- +When you have retrieved all requested information: +1. Present the results to the user +2. Stop calling additional tools +3. Do not explore related data unless asked +---- + +. Add error handling guidance: ++ +[,text] +---- +If a tool fails after 2 attempts: +- Explain what went wrong +- Do not retry the same tool again +- Move on or ask for user guidance +---- + +. Review tool output to ensure it signals completion clearly. +. Increase max iterations if the task legitimately requires many steps. + +**Prevention:** + +* Design tools to return complete information in one call +* Set max iterations appropriate for task complexity (see xref:ai-agents:agents/concepts.adoc#why-iterations-matter[Why iterations matter]) +* Test with ambiguous requests that might cause loops + +=== Making up information + +**Symptoms:** Agent provides plausible-sounding answers without calling tools, or invents data when tools fail. + +**Causes:** + +* System prompt doesn't explicitly forbid fabrication +* Agent treats tool failures as suggestions rather than requirements +* Model is hallucinating due to lack of constraints + +**Solution:** + +. Add explicit constraints to your system prompt: ++ +[,text] +---- +Critical rules: +- NEVER make up order numbers, tracking numbers, or customer data +- If a tool fails, explain the failure - do not guess +- If you don't have information, say so explicitly +---- + +. Test error scenarios by temporarily disabling tools. +. Use a more capable model that follows instructions better. + +**Prevention:** + +* Include "never fabricate" rules in all system prompts +* Test with requests that require unavailable data +* Monitor *Transcripts* and session topic for fabricated responses + +=== Analyzing conversation patterns + +**Symptoms:** Agent behavior is inconsistent or produces unexpected results. + +**Solution:** + +Review conversation history in *Transcripts* to identify problematic patterns: + +* Agents calling the same tool repeatedly: Indicates loop detection is needed +* Large gaps between messages: Suggests tool timeout or slow execution +* Agent responses without tool calls: Indicates a tool selection issue +* Fabricated information: Suggests a missing "never make up data" constraint +* Truncated early messages: Indicates the context window was exceeded + +**Analysis workflow:** + +. Use *Inspector* to reproduce the issue. +. Review full conversation including tool invocations. +. Identify where agent behavior diverged from expected. +. Check system prompt for missing guidance. +. Verify tool responses are formatted correctly. + +== Performance issues + +Diagnose and fix issues related to agent speed and resource consumption. + +=== Slow response times + +**Symptoms:** Agent takes 10+ seconds to respond to simple queries. + +**Causes:** + +* LLM model is slow (large context processing) +* Too many tool calls in sequence +* Tools themselves are slow (database queries, API calls) +* Large context window from long conversation history + +**Solution:** + +. Use a faster, lower-latency model tier for simple queries and reserve larger models for complex reasoning. +. Review conversation history in the *Inspector* tab to identify unnecessary tool calls. +. Optimize tool implementations: +.. Add caching where appropriate +.. Reduce query complexity +.. Return only needed data (use pagination, filters) +. Clear the conversation history if the context is very large. + +**Prevention:** + +* Right-size model selection based on task complexity +* Design tools to execute quickly (< 2 seconds ideal) +* Set appropriate max iterations to prevent excessive exploration +* Monitor token usage and conversation length + +=== High token costs + +**Symptoms:** Token usage is higher than expected, costs are increasing rapidly. + +**Causes:** + +* Max iterations configured too high +* Agent making unnecessary tool calls +* Large tool results filling context window +* Long conversation history not being managed +* Using expensive models for simple tasks + +**Solution:** + +. Review token usage in *Transcripts*. +. Lower max iterations for this agent. +. Optimize tool responses to return less data: ++ +[,text] +---- +Bad: Return all 10,000 customer records +Good: Return paginated results, 20 records at a time +---- + +. Add cost control guidance to system prompt: ++ +[,text] +---- +Efficiency guidelines: +- Request only the data you need +- Stop when you have enough information +- Do not call tools speculatively +---- + +. Switch to a more cost-effective model for simple queries. +. Clear conversation history periodically in the *Inspector* tab. + +**Prevention:** + +* Set appropriate max iterations (10-20 for simple, 30-40 for complex) +* Design tools to return minimal necessary data +* Monitor token usage trends +* See cost calculation guidance in xref:ai-agents:agents/concepts.adoc#cost-calculation[Cost calculation] + +== Tool execution issues + +Fix problems with timeouts, invalid parameters, and error responses. + +=== Tool timeouts + +**Symptoms:** Tools fail with timeout errors, agent receives incomplete results. + +**Causes:** + +* External API is slow or unresponsive +* Database query is too complex +* Network latency between tool and external system +* Tool processing large datasets in memory + +**Solution:** + +. Add timeout handling to tool implementation: ++ +[,yaml] +---- +http: + url: https://api.example.com/data + timeout: "5s" # Set explicit timeout +---- + +. Optimize external queries: +.. Add database indexes +.. Reduce query scope +.. Cache frequent queries +. Increase tool timeout if operation legitimately takes longer. +. Add retry logic for transient failures. + +**Prevention:** + +* Set explicit timeouts in all tool configurations +* Test tools under load +* Monitor external API performance +* Design tools to fail fast on unavailable services + +=== Invalid parameters + +**Symptoms:** Tools return validation errors about missing or incorrectly formatted parameters. + +**Causes:** + +* Tool schema doesn't match implementation +* Agent passes wrong data types +* Required parameters not marked as required in schema +* Agent misunderstands parameter purpose + +**Solution:** + +. Verify tool schema matches implementation: ++ +[,yaml] +---- +input_schema: + properties: + order_id: + type: string # Must match what tool expects + description: "Order ID in format ORD-12345" +---- + +. Add parameter validation to tools. +. Improve parameter descriptions in tool schema. +. Add examples to tool descriptions: ++ +[,yaml] +---- +description: | + Get order status by order ID. + Example: get_order_status(order_id="ORD-12345") +---- + +**Prevention:** + +* Write detailed parameter descriptions +* Include format requirements and examples +* Test tools with invalid inputs to verify error messages +* Use JSON Schema validation in tool implementations + +=== Tool returns errors + +**Symptoms:** Tools execute but return error responses or unexpected data formats. + +**Causes:** + +* External API returned error +* Tool implementation has bugs +* Data format changed in external system +* Tool lacks error handling + +**Solution:** + +. Check tool logs in MCP server. +. Test tool directly (outside agent context). +. Verify external system is operational. +. Add error handling to tool implementation: ++ +[,yaml] +---- +processors: + - try: + - http: + url: ${API_URL} + catch: + - mapping: | + root.error = "API unavailable: " + error() +---- + +. Update agent system prompt to handle this error type. + +**Prevention:** + +* Implement comprehensive error handling in tools +* Monitor external system health +* Add retries for transient failures +* Log all tool errors for analysis + +== Integration issues + +Fix problems with external applications calling agents and pipeline-to-agent integration. + +=== Agent card does not contain a URL + +**Symptoms:** Pipeline fails with error: `agent card does not contain a URL` or `failed to init processor path root.pipeline.processors.0` + +**Causes:** + +* The `agent_card_url` points to the base agent endpoint instead of the agent card JSON file + +**Solution:** + +The `agent_card_url` must point to the agent card JSON file, not the base agent endpoint. + +**Incorrect configuration:** + +[,yaml] +---- +processors: + - a2a_message: + agent_card_url: "https://your-agent-id.ai-agents.your-cluster-id.cloud.redpanda.com" + prompt: "Analyze this transaction: ${!content()}" +---- + +**Correct configuration:** + +[,yaml] +---- +processors: + - a2a_message: + agent_card_url: "https://your-agent-id.ai-agents.your-cluster-id.cloud.redpanda.com/.well-known/agent-card.json" + prompt: "Analyze this transaction: ${!content()}" +---- + +The agent card is always available at `/.well-known/agent-card.json` according to the A2A protocol standard. + +**Prevention:** + +* Always append `/.well-known/agent-card.json` to the agent endpoint URL +* Test the agent card URL in a browser before using it in pipeline configuration +* See xref:ai-agents:agents/a2a-concepts.adoc#agent-card-location[Agent card location] for details + +=== Pipeline integration failures + +**Symptoms:** Pipelines using `a2a_message` processor fail or timeout. + +**Causes:** + +* Agent is not running or restarting +* Agent timeout is too low for pipeline workload +* Authentication issues between pipeline and agent +* High event volume overwhelming agent + +**Solution:** + +. Check agent status and resource allocation. +. Increase agent resource tier for high-volume pipelines. +. Add error handling in pipeline: ++ +[,yaml] +---- +processors: + - try: + - a2a_message: + agent_card_url: "https://your-agent-url/.well-known/agent-card.json" + catch: + - log: + message: "Agent invocation failed: ${! error() }" +---- + +**Prevention:** + +* Test pipeline-agent integration with low volume first +* Size agent resources appropriately for event rate +* See integration patterns in xref:ai-agents:agents/pipeline-integration-patterns.adoc[] + +== Monitor and debug agents + +For comprehensive guidance on monitoring agent activity, analyzing conversation history, tracking token usage, and debugging issues, see xref:ai-agents:agents/monitor-agents.adoc[]. + +== Next steps + +* xref:ai-agents:agents/prompt-best-practices.adoc[] +* xref:ai-agents:agents/concepts.adoc[] +* xref:ai-agents:mcp/remote/tool-patterns.adoc[] +* xref:ai-agents:agents/architecture-patterns.adoc[] diff --git a/modules/ai-agents/pages/agents/tutorials/customer-support-agent.adoc b/modules/ai-agents/pages/agents/tutorials/customer-support-agent.adoc new file mode 100644 index 000000000..552a5bdce --- /dev/null +++ b/modules/ai-agents/pages/agents/tutorials/customer-support-agent.adoc @@ -0,0 +1,273 @@ += Learn Multi-Tool Agent Orchestration +:description: Learn how agents coordinate multiple tools, make decisions based on conversation context, and handle errors through building a customer support agent. +:page-topic-type: tutorial +:personas: agent_developer, streaming_developer +:learning-objective-1: Explain how agents use conversation context to decide which tools to invoke +:learning-objective-2: Apply tool orchestration patterns to handle multi-step workflows +:learning-objective-3: Evaluate how system prompt design affects agent tool selection + +Build a customer support agent to learn how agents orchestrate multiple tools, make context-aware decisions, and handle incomplete data. + +After completing this tutorial, you will be able to: + +* [ ] {learning-objective-1} +* [ ] {learning-objective-2} +* [ ] {learning-objective-3} + +== What you'll learn + +Agents become powerful when they coordinate multiple tools to solve complex problems. A single-tool agent can retrieve order status. A multi-tool agent can check order status, fetch tracking information, look up customer history, and decide which tools to invoke based on conversation context. + +This tutorial teaches multi-tool orchestration through a customer support scenario. + +The patterns you practice here apply to any multi-tool scenario: data analysis agents coordinating query and visualization tools, workflow automation agents chaining approval and notification tools, or research agents combining search and summarization tools. + +== The scenario + +Customer support teams handle repetitive questions: "Where is my order?", "What's my tracking number?", "Show me my order history." Human agents waste time on lookups that could be automated. + +An effective support agent needs three capabilities: + +- **Order status lookup**: Check current order state and contents +- **Shipping information**: Retrieve tracking numbers and delivery estimates +- **Order history**: Show past purchases for a customer + +The challenge: users phrase requests differently ("Where's my package?", "Track order ORD-12345", "My recent orders"), and agents must choose the right tool based on context. + +== Prerequisites + +* A xref:get-started:cluster-types/byoc/index.adoc[BYOC cluster] with Remote MCP enabled. +* xref:ai-agents:ai-gateway/gateway-quickstart.adoc[AI Gateway configured] with at least one LLM provider enabled (this tutorial uses OpenAI). + +== Design the MCP tools + +Before an agent can orchestrate tools, you need tools to orchestrate. Each tool should do one thing well, returning structured data the agent can reason about. + +You could create a single `handle_customer_request` tool that takes a natural language query and returns an answer. But, this approach fails because: + +* The agent can't inspect intermediate results +* Tool chaining becomes impossible (no way to pass order status to shipping lookup) +* Error handling is opaque + +Instead, create focused tools: + +* `get_order_status`: Returns order state and contents +* `get_shipping_info`: Returns tracking data +* `get_customer_history`: Returns past orders + +This granularity enables the agent to chain tools (check order status, see it's shipped, fetch tracking info) and handle errors at each step. + +=== Deploy the tools + +Create a Remote MCP server with the three tools. + +. Navigate to your cluster in the link:https://cloud.redpanda.com[Redpanda Cloud Console^]. +. Go to *Agentic AI* > *Remote MCP*. +. Click *Create MCP Server*. +. Configure the server: ++ +* *Name*: `customer-support-tools` +* *Description*: `Tools for customer support agent` + +. Add the following tools. For each tool, select *Processor* from the component type dropdown, then click *Lint* to validate: ++ +[tabs] +==== +get_order_status:: ++ +This tool uses the `mapping` processor to return mock data. The mock approach enables testing without external dependencies. The agent must interpret the structured response to extract order details. ++ +[,yaml] +---- +include::ai-agents:example$mcp-tools/processors/get_order_status.yaml[] +---- + +get_shipping_info:: ++ +This tool demonstrates conditional data: it only returns tracking information when the order has shipped. When an order hasn't shipped yet, the tool returns an empty result. The agent must handle this case. ++ +[,yaml] +---- +include::ai-agents:example$mcp-tools/processors/get_shipping_info.yaml[] +---- + +get_customer_history:: ++ +This tool returns multiple orders, demonstrating list-handling. The agent must format multiple results clearly for users. ++ +[,yaml] +---- +include::ai-agents:example$mcp-tools/processors/get_customer_history.yaml[] +---- +==== + +. Click *Create MCP Server* + +Wait for the server status to show *Running*. You now have three focused tools the agent can orchestrate. + +== Write the system prompt + +The system prompt teaches the agent how to orchestrate tools. Without explicit guidance, the agent must guess when to use each tool, often choosing incorrectly or ignoring tools entirely. + +=== Create the agent + +Create the customer support agent with the system prompt. + +. Go to *Agentic AI* > *AI Agents*. +. Click *Create Agent*. +. Configure the agent: ++ +* *Name*: `customer-support-agent` +* *Description*: `Helps customers track orders and shipping` +* *Resource Tier*: Medium +* *AI Gateway*: Select the gateway you configured +* *Provider*: OpenAI or Anthropic +* *Model*: OpenAI GPT-5.2 or Claude Sonnet 4.5 (models with strong reasoning) +* *MCP Server*: Select `customer-support-tools` +* *Max Iterations*: 15 + +. In the *System Prompt* field, enter this configuration: ++ +[source,text] +---- +You are a customer support agent for Acme E-commerce. + +Responsibilities: +- Help customers track their orders +- Provide shipping information and estimated delivery dates +- Look up customer order history +- Answer questions about order status + +Available tools: +- get_order_status: Use when customer asks about a specific order +- get_shipping_info: Use when customer needs tracking or delivery information +- get_customer_history: Use when customer asks about past orders or "my orders" + +When to use each tool: +- If customer provides an order ID (ORD-XXXXX), use get_order_status first +- If customer asks "where is my order?", ask for the order ID before using tools +- If order is "shipped", follow up with get_shipping_info to provide tracking details +- If customer asks about "all my orders" or past purchases, use get_customer_history + +Never: +- Expose customer payment information (credit cards, billing addresses) +- Make up tracking numbers or delivery dates +- Guarantee delivery dates (use "estimated" language) +- Process refunds or cancellations (escalate to human agent) + +Error handling: +- If order not found, ask customer to verify the order ID +- If shipping info unavailable, explain the order may not have shipped yet +- If customer history is empty, confirm the customer ID and explain no orders found + +Response format: +- Start with a friendly greeting +- Present order details in a clear, structured way +- For order status, include: order ID, status, items, total +- For shipping, include: carrier, tracking number, estimated delivery, last known location +- Always include next steps or offer additional help + +Example response structure: +1. Acknowledge the customer's question +2. Present the information from tools +3. Provide next steps or additional context +4. Ask if they need anything else +---- + +. Click *Create Agent*. + +Wait for the agent status to show *Running*. + +== Observe orchestration in action + +Open the *Inspector* tab in the Redpanda Cloud Console to interact with the agent. + +Testing reveals how the agent makes decisions. Watch the conversation panel in the built-in chat interface to see the agent's reasoning process unfold. + +=== Tool chaining based on status + +Test how the agent chains tools based on order status. + +Enter this query in *Inspector*: + +---- +Hi, I'd like to check on order ORD-12345 +---- + +Watch the conversation panel. The agent calls `get_order_status` first, sees the status is "shipped", then automatically follows up with `get_shipping_info` to provide tracking details. The agent uses the first tool's result to decide whether to invoke the second tool. + +Now try this query with a different order: + +---- +Check order ORD-67890 +---- + +This order has status "processing", so the agent calls only `get_order_status`. Since the order hasn't shipped yet, the agent skips `get_shipping_info`. The agent chains tools only when appropriate. + +=== Clarification before tool invocation + +Test how the agent handles incomplete information. + +Click *Clear context* to clear the conversation history. Then enter this query: + +---- +Where is my order? +---- + +The agent recognizes the request is missing an order ID and asks the customer to provide it. Watch the conversation panel and see that the agent calls zero tools. Instead of guessing or fabricating information, it asks a clarifying question. + +This demonstrates pre-condition checking. Effective orchestration includes knowing when NOT to invoke tools. + +=== List handling + +Test how the agent formats multiple results. + +Enter this query: + +---- +Can you show me my recent orders? My customer ID is CUST-100. +---- + +The agent calls `get_customer_history` and receives multiple orders. Watch how it formats the list clearly for the customer, showing details for each order. + +Now test the empty results case with this query: + +---- +Show my order history for customer ID CUST-999 +---- + +The agent receives an empty list and explains that no orders were found, asking the customer to verify their ID. + +=== Error recovery + +Test how the agent handles missing data. + +Enter this query: + +---- +Check order ORD-99999 +---- + +The tool returns no data for this order ID. Watch how the agent responds. It explains the order wasn't found and asks the customer to verify the order ID. Critically, the agent does not fabricate tracking numbers or order details. + +This demonstrates error recovery without hallucination. The "Never make up tracking numbers" constraint in the system prompt prevents the agent from inventing plausible-sounding but fake information. + +== Troubleshoot + +For comprehensive troubleshooting guidance, see xref:ai-agents:agents/troubleshooting.adoc[]. + +=== Test with mock data + +The mock tools in this tutorial only recognize specific test IDs: + +* Orders: ORD-12345, ORD-67890, ORD-99999 +* Customers: CUST-100, CUST-999 + +Use these documented test IDs when testing the agent. If you replace the mock tools with real API calls, verify that your API endpoints return the expected data structures. + +== Next steps + +* xref:ai-agents:mcp/remote/tool-patterns.adoc#call-external-apis[Call external APIs] +* xref:ai-agents:agents/prompt-best-practices.adoc[] +* xref:ai-agents:agents/architecture-patterns.adoc[] +* xref:ai-agents:agents/troubleshooting.adoc[] diff --git a/modules/ai-agents/pages/agents/tutorials/transaction-dispute-resolution.adoc b/modules/ai-agents/pages/agents/tutorials/transaction-dispute-resolution.adoc new file mode 100644 index 000000000..b36c37cf8 --- /dev/null +++ b/modules/ai-agents/pages/agents/tutorials/transaction-dispute-resolution.adoc @@ -0,0 +1,663 @@ += Build Multi-Agent Systems for Transaction Dispute Resolution +:description: Learn how to build multi-agent systems with domain separation, handle sensitive financial data, and monitor multi-agent execution through transaction investigation. +:page-topic-type: tutorial +:personas: agent_developer, platform_admin +:learning-objective-1: Design multi-agent systems with domain-specific sub-agents +:learning-objective-2: pass:q[Monitor multi-agent execution using *Transcripts*] +:learning-objective-3: Integrate agents with streaming pipelines for event-driven processing + +Build a transaction dispute resolution system using multi-agent architecture, secure data handling, and execution monitoring. + +After completing this tutorial, you will be able to: + +* [ ] {learning-objective-1} +* [ ] {learning-objective-2} +* [ ] {learning-objective-3} + +== What you'll learn + +This tutorial advances from xref:ai-agents:agents/tutorials/customer-support-agent.adoc[basic multi-tool orchestration] to multi-agent systems. You'll build a transaction dispute resolution system where a root agent delegates to specialized sub-agents (account, fraud, merchant, compliance), each with focused responsibilities and PII-protected data access. You'll also monitor execution using *Transcripts* and process disputes from transaction streams for automated detection. + +These patterns apply beyond banking to any domain requiring specialized expertise and data security: healthcare systems, insurance claims processing, or regulatory compliance workflows. + +== The scenario + +Banks handle thousands of dispute calls daily. Customers report unauthorized charges, billing errors, or unrecognized transactions. Each investigation requires cross-referencing multiple systems and applying consistent fraud detection logic. + +Traditionally, human agents manually open multiple systems, cross-reference data, and take notes. A 10-15 minute process prone to inconsistencies and incomplete compliance logging. + +Multi-agent automation transforms this workflow by enabling instant data aggregation from all sources, consistent logic applied every time, 10-15 second resolution, and structured results for compliance. Human agents handle only complex escalations. + +When a customer calls saying "I see a $247.83 charge from 'ACME CORP' but I never shopped there. Is this fraud?", the system must investigate account history, calculate fraud scores, verify merchant legitimacy, and make a recommendation with structured results. + +== Prerequisites + +* A xref:get-started:cluster-types/byoc/index.adoc[BYOC cluster] with Remote MCP enabled. +* xref:ai-agents:ai-gateway/gateway-quickstart.adoc[AI Gateway configured] with at least one LLM provider enabled (this tutorial uses OpenAI GPT-5.2 or Claude Sonnet 4.5 for reasoning). +* The xref:get-started:rpk-install.adoc[Redpanda CLI (`rpk`)] installed (for testing the pipeline with sample data). +* Completed xref:ai-agents:agents/tutorials/customer-support-agent.adoc[] (foundational multi-tool concepts). + +== Create MCP tools for each domain + +Before creating agents, create the tools they'll use. You'll organize tools by domain, matching each sub-agent's responsibility. + +=== Account tools + +Account tools retrieve customer and transaction data with PII protection. + +. Navigate to your cluster in the link:https://cloud.redpanda.com[Redpanda Cloud Console^]. +. Go to *Agentic AI* > *Remote MCP*. +. Click *Create MCP Server*. +. Configure the server: ++ +* *Name*: `account-tools` +* *Description*: `Customer account and transaction data retrieval` +* *Resource Tier*: XSmall + +. Add the following tools. For each tool, select *Processor* from the component type dropdown, then click *Lint* to validate: ++ +[tabs] +==== +get_customer_account:: ++ +This mock tool returns account data with sensitive fields already protected. Card numbers only include the last 4 digits, while full names remain for verification. In production, implement similar protections in your data layer. ++ +[,yaml] +---- +include::ai-agents:example$mcp-tools/processors/get_customer_account.yaml[] +---- + +get_transaction_details:: ++ +This tool returns complete transaction details including merchant information, location, and timestamp. Notice how it returns structured data the fraud agent can analyze. ++ +[,yaml] +---- +include::ai-agents:example$mcp-tools/processors/get_transaction_details.yaml[] +---- + +get_transaction_history:: ++ +This tool returns aggregated spending patterns instead of raw transaction lists. This privacy-preserving approach gives fraud analysis what it needs (typical spending by category, location patterns) without exposing individual transaction details unnecessarily. ++ +[,yaml] +---- +include::ai-agents:example$mcp-tools/processors/get_transaction_history.yaml[] +---- +==== + +. Click *Create MCP Server*. + +Wait for the server status to show *Running*. + +[NOTE] +==== +This tutorial uses XSmall resource tier for all MCP servers because the mock tools run lightweight Bloblang transformations. Production deployments with external API calls require larger tiers based on throughput needs. See xref:ai-agents:mcp/remote/scale-resources.adoc[]. +==== + +=== Fraud tools + +Fraud tools calculate risk scores and identify fraud indicators. + +. Click *Create MCP Server*. +. Configure the server: ++ +* *Name*: `fraud-tools` +* *Description*: `Fraud detection and risk scoring` +* *Resource Tier*: XSmall + +. Add the following tools. For each tool, select *Processor* from the component type dropdown, then click *Lint* to validate: ++ +[tabs] +==== +calculate_fraud_score:: ++ +This tool implements multi-factor fraud scoring with location risk (0-35 for international/unusual cities), merchant risk (0-30 for reputation/fraud reports), amount risk (0-25 for deviation from averages), velocity risk (0-15 for rapid transactions), and category risk (0-20 for unusual spending categories). The tool returns both the total score and breakdown, allowing agents to explain their reasoning. ++ +[,yaml,role="no-placeholders"] +---- +include::ai-agents:example$mcp-tools/processors/calculate_fraud_score.yaml[] +---- + +get_risk_indicators:: ++ +This tool provides detailed fraud signals with severity levels. Each indicator includes a description that agents can use to explain findings to customers. ++ +[,yaml] +---- +include::ai-agents:example$mcp-tools/processors/get_risk_indicators.yaml[] +---- +==== + +. Click *Create MCP Server*. + +Wait for the server status to show *Running*. + +=== Merchant tools + +Merchant tools verify business legitimacy and analyze merchant categories. + +. Click *Create MCP Server*. +. Configure the server: ++ +* *Name*: `merchant-tools` +* *Description*: `Merchant verification and category analysis` +* *Resource Tier*: XSmall + +. Add the following tools. For each tool, select *Processor* from the component type dropdown, then click *Lint* to validate: ++ +[tabs] +==== +verify_merchant:: ++ +This tool returns reputation scores, fraud report counts, business verification status, and red flags. Notice how it includes common issues for legitimate merchants (like subscription billing problems) to help agents distinguish between fraud and merchant operational issues. ++ +[,yaml] +---- +include::ai-agents:example$mcp-tools/processors/verify_merchant.yaml[] +---- + +get_merchant_category:: ++ +This tool decodes MCC (Merchant Category Codes) and provides typical transaction ranges for each category. This helps identify mismatches (like a grocery store charging $2000). ++ +[,yaml] +---- +include::ai-agents:example$mcp-tools/processors/get_merchant_category.yaml[] +---- +==== + +. Click *Create MCP Server*. + +Wait for the server status to show *Running*. + +=== Compliance tools + +Compliance tools handle audit logging and regulatory requirements. + +. Click *Create MCP Server*. +. Configure the server: ++ +* *Name*: `compliance-tools` +* *Description*: `Audit logging and regulatory compliance` +* *Resource Tier*: XSmall + +. Add the following tools. For each tool, select *Processor* from the component type dropdown, then click *Lint* to validate: ++ +[tabs] +==== +log_audit_event:: ++ +This tool creates audit records for every investigation. In production, this would write to an immutable audit log. For this tutorial, it returns a confirmation with the audit ID. ++ +[,yaml] +---- +include::ai-agents:example$mcp-tools/processors/log_audit_event.yaml[] +---- + +check_regulatory_requirements:: ++ +This tool returns applicable regulations, customer rights, bank obligations, and required documentation for different dispute types. This ensures agents follow proper procedures for Regulation E, Fair Credit Billing Act, and card network rules. ++ +[,yaml] +---- +include::ai-agents:example$mcp-tools/processors/check_regulatory_requirements.yaml[] +---- +==== + +. Click *Create MCP Server*. + +Wait for the server status to show *Running*. You now have four MCP servers with nine total tools, organized by domain. + +== Create the root agent with subagents + +The root agent orchestrates sub-agents and makes final recommendations. You'll configure the root agent first, then add four specialized sub-agents within the same form. + +[IMPORTANT] +==== +Sub-agents inherit the LLM provider, model, resource tier, and max iterations from the root agent. This tutorial uses GPT-5 Mini and max iterations of 15 to optimize performance. Using slower models (GPT-5.2, Claude Sonnet 4.5) or high max iterations (50+) will cause sub-agents to execute slowly. Each sub-agent call could take 60-90 seconds instead of 10-15 seconds. +==== + +. Go to *Agentic AI* > *AI Agents*. +. Click *Create Agent*. +. Configure the root agent: ++ +* *Name*: `dispute-resolution-agent` +* *Description*: `Orchestrates transaction dispute investigations` +* *Resource Tier*: Large +* *AI Gateway*: Select the gateway you configured +* *Provider*: OpenAI +* *Model*: GPT-5 Mini (fast, cost-effective for structured workflows) +* *Max Iterations*: 15 + +. In the *System Prompt* field, enter: ++ +[source,text] +---- +include::ai-agents:example$agents/dispute-root-agent-prompt.txt[] +---- + +. Skip the *MCP Tools* section (the root agent uses A2A protocol to call sub-agents, not direct tools). + +. In the *Subagents* section, click *+ Add Subagent*. + +=== Add account agent subagent + +The account agent retrieves customer account and transaction data. + +. Configure the subagent: ++ +* *Name*: `account-agent` +* *Description*: `Retrieves customer account and transaction data` + +. In the subagent's *System Prompt* field, enter: ++ +[source,text] +---- +include::ai-agents:example$agents/account-agent-prompt.txt[] +---- + +. In the subagent's *MCP Tools* section, select `account-tools`. + +=== Add fraud agent subagent + +The fraud agent calculates fraud risk scores and identifies fraud indicators. + +. Click *+ Add Subagent* again. +. Configure the subagent: ++ +* *Name*: `fraud-agent` +* *Description*: `Calculates fraud risk scores and identifies fraud indicators` + +. In the subagent's *System Prompt* field, enter: ++ +[source,text] +---- +include::ai-agents:example$agents/fraud-agent-prompt.txt[] +---- + +. In the subagent's *MCP Tools* section, select `fraud-tools`. + +=== Add merchant agent subagent + +The merchant agent verifies merchant legitimacy and reputation. + +. Click *+ Add Subagent* again. +. Configure the subagent: ++ +* *Name*: `merchant-agent` +* *Description*: `Verifies merchant legitimacy and reputation` + +. In the subagent's *System Prompt* field, enter: ++ +[source,text] +---- +include::ai-agents:example$agents/merchant-agent-prompt.txt[] +---- + +. In the subagent's *MCP Tools* section, select `merchant-tools`. + +=== Add compliance agent subagent + +The compliance agent handles audit logging and regulatory requirements. + +. Click *+ Add Subagent* again. +. Configure the subagent: ++ +* *Name*: `compliance-agent` +* *Description*: `Handles audit logging and regulatory requirements` + +. In the subagent's *System Prompt* field, enter: ++ +[source,text] +---- +include::ai-agents:example$agents/compliance-agent-prompt.txt[] +---- + +. In the subagent's *MCP Tools* section, select `compliance-tools`. + +. Click *Create Agent* to create the root agent with all four subagents. + +Wait for the agent status to show *Running*. + +== Test investigation scenarios + +Test the multi-agent system with realistic dispute scenarios. Each scenario demonstrates different patterns: clear fraud, legitimate transactions, escalation cases, and edge cases. + +. Go to *Agentic AI* > *AI Agents*. +. Click on `dispute-resolution-agent`. +. Open the *Inspector* tab. + +=== Clear fraud case + +Test how the system handles obvious fraud. + +Enter this query: + +[source,text] +---- +I see a $1,847.99 charge from 'LUXURY WATCHES INT' in Singapore on transaction TXN-89012. I've never been to Singapore and don't buy watches. My customer ID is CUST-1001. This is fraud. +---- + +Watch the conversation panel as the investigation progresses. You'll see the root agent call each sub-agent in sequence. After all sub-agents complete (30-90 seconds), the agent sends its final response to the chat. + +The final response should clearly state the transaction is fraudulent, summarize findings from each sub-agent, and provide a list of actions the agent is going to take. + +This flow demonstrates multi-agent coordination for high-confidence fraud decisions with realistic banking communication. + +=== Escalation required + +Test how the system handles ambiguous cases requiring human review. + +Click *Clear context*. Then enter: + +[source,text] +---- +I see three $29.99 charges from 'EXAMPLE STREAMING' last month, but I only subscribed once. My customer ID is CUST-1002 and one of the transactions is TXN-89014. +---- + +Watch the conversation panel as the agent investigates. After the sub-agent calls complete, the agent should send a response with a realistic escalation. + +This demonstrates the escalation pattern when evidence is ambiguous and requires human review. + +== Monitor multi-agent execution + +*Inspector* shows real-time progress in the conversation panel, but *Transcripts* provides detailed post-execution analysis with timing, token usage, and full trace hierarchy. + +. In the left navigation, click *Transcripts*. +. Select a recent transcript from your fraud case test. + +In the trace hierarchy, you'll see: + +* Root agent invocation (top-level span) +* Multiple `invoke_agent` spans for each sub-agent call +* Individual LLM calls within each agent +* MCP tool invocations within sub-agents + +In the summary panel, check: + +* *Duration*: Total investigation time (typically 5-15 seconds) +* *Token Usage*: Cost tracking across all agents +* *LLM Calls*: How many reasoning steps were needed + +This visibility helps you: + +* Verify sub-agents are being called in the right order +* Identify slow sub-agents that need optimization +* Track costs per investigation for budgeting + +For detailed trace structure, see xref:ai-agents:observability/concepts.adoc#agent-trace-hierarchy[Agent trace hierarchy]. + +== Integrate with streaming pipeline + +Process disputes automatically from transaction streams. When transactions meet certain risk thresholds, the pipeline invokes the dispute agent for immediate investigation. + +=== Create a secret for the agent card URL + +The pipeline needs the agent card URL to invoke the dispute resolution agent. + +. Go to *Agentic AI* > *AI Agents*. +. Click on `dispute-resolution-agent`. +. Open the *A2A* tab. +. Copy the agent URL displayed at the top. +. Go to *Connect* > *Secrets*. +. Click *Create Secret*. +. Create the secret: ++ +* *Name*: `DISPUTE_AGENT_CARD_URL` +* *Value*: Paste the agent URL and append `/.well-known/agent-card.json` to the end ++ +For example, if the agent URL is: ++ +---- +https://abc123.ai-agents.def456.cloud.redpanda.com +---- ++ +The secret value should be: ++ +---- +https://abc123.ai-agents.def456.cloud.redpanda.com/.well-known/agent-card.json +---- + +. Click *Create Secret*. + +=== Create topics for transaction data + +Create the topics the pipeline will use for input and output. + +. Go to *Topics* in the Redpanda Cloud Console. +. Click *Create Topic*. +. Create the input topic: ++ +* *Name*: `bank.transactions` +* *Partitions*: 3 +* *Replication factor*: 3 + +. Click *Create Topic* again. +. Create the output topic: ++ +* *Name*: `bank.dispute_results` +* *Partitions*: 3 +* *Replication factor*: 3 + +=== Create a SASL user for topic access + +The pipeline needs SASL credentials to read from and write to Redpanda topics. + +. Go to *Security* > *Users* in the Redpanda Cloud Console. +. Click *Create User*. +. Configure the user: ++ +* *Username*: `dispute-pipeline-user` +* *Password*: Generate a secure password +* *Mechanism*: SCRAM-SHA-256 + +. Save the username and password. You'll need them for the pipeline secrets. + +. Click *Create*. + +. Click *Create ACLs* to grant permissions. + +. Click the *Clusters* tab for cluster permissions and select *Allow all*. + +. Click *Add rule* to add another ACL. + +. Click the *Topics* tab for topic permissions: ++ +* *Principal*: `dispute-pipeline-user` +* *Host*: Allow all hosts (`*`) +* *Resource Type*: Topic +* *Selector*: Topic names starting with `bank.` +* *Operations*: Allow all + +. Click *Add rule* to add another ACL. + +. Click the *Consumer groups* tab for consumer group permissions and select *Allow all*. + +. Click *Create*. + +=== Create secrets for SASL authentication + +The pipeline needs SASL credentials stored as secrets to authenticate with Redpanda topics. + +. Go to *Connect* > *Secrets* in the Redpanda Cloud Console (if not already there). +. Click *Create Secret*. +. Create two secrets with these values: ++ +* *Name*: `DISPUTE_PIPELINE_USERNAME`, *Value*: `dispute-pipeline-user` +* *Name*: `DISPUTE_PIPELINE_PASSWORD`, *Value*: The password you created for `dispute-pipeline-user` + +=== Create the pipeline + +. Go to *Connect* in the Redpanda Cloud Console. +. Click *Create a pipeline*. +. In the numbered steps, click *4 Add permissions*. +. Select *Service Account*. ++ +The Service Account is required for the `a2a_message` processor to authenticate with and invoke the dispute resolution agent. Without this permission, the pipeline will fail when attempting to call the agent. + +. Click *Next*. +. Name the pipeline `dispute-pipeline`. +. Paste this configuration and click *Create Pipeline*: ++ +[,yaml,role="no-placeholders"] +---- +include::ai-agents:example$pipelines/dispute-pipeline.yaml[] +---- + +This pipeline: + +* Consumes transactions from `bank.transactions` topic +* Filters for high-value transactions (>$500) or pre-flagged transactions +* Calculates preliminary risk score based on location, amount, velocity, and category +* Routes transactions with risk score ≥40 to the dispute-resolution-agent via A2A +* Outputs investigation results to `bank.dispute_results` topic + +=== Test the pipeline + +. Authenticate with your Redpanda Cloud cluster: ++ +[,bash] +---- +rpk cloud login +---- + +. Create a test transaction that will trigger the agent investigation: ++ +[,bash] +---- +echo '{ + "transaction_id": "TXN-89012", + "customer_id": "CUST-1001", + "amount": 1847.99, + "currency": "USD", + "merchant": { + "name": "LUXURY WATCHES INT", + "category": "jewelry", + "country": "Singapore", + "mcc": "5944", + "city": "Singapore" + }, + "card": { + "last_four": "4532", + "billing_country": "USA" + }, + "transaction_date": "2026-01-21T10:00:00Z", + "recent_transaction_count": 2 +}' | rpk topic produce bank.transactions +---- ++ +This transaction will trigger agent investigation because: ++ +* International transaction (Singapore vs USA): +40 risk points +* Amount is greater than $1000: +30 risk points +* Jewelry category (MCC 5944): +20 risk points +* **Total preliminary risk score: 90** (well above the 40 threshold) + +. Wait a minute for the pipeline to process the transaction. You can monitor the progress in *Transcripts*. While the agents investigate, a new transcript for `dispute-resolution-agent` will appear. Until the investigation completes, the transcript will show *awaiting root* status. + +. Consume the results: ++ +[,bash] +---- +rpk topic consume bank.dispute_results --offset end -n 1 +---- ++ +You'll see the complete transaction with agent investigation results: ++ +[,json,role="no-wrap"] +---- +{ + "agent_investigation": { + "confidence": "high", + "fraud_score": 91, + "reasoning": "Transaction is an international purchase with no recent international activity, from a merchant with strong fraud indicators, and the amount is a large outlier for this account; immediate block and investigation recommended.", + "recommendation": "block_and_investigate" + }, + "alert_level": "critical", + "amount": 1847.99, + "card": { + "billing_country": "USA", + "last_four": "4532" + }, + "currency": "USD", + "customer_id": "CUST-1001", + "final_decision": "blocked", + "merchant": { + "category": "jewelry", + "city": "Singapore", + "country": "Singapore", + "mcc": "5944", + "name": "LUXURY WATCHES INT" + }, + "pipeline_metadata": { + "agent_invoked": true, + "customer_id": "CUST-1001", + "processed_at": "2026-01-27T14:29:19.436Z", + "transaction_id": "TXN-89012" + }, + "preliminary_risk_score": 90, + "recent_transaction_count": 2, + "risk_level": "high", + "transaction_date": "2026-01-21T10:00:00Z", + "transaction_id": "TXN-89012" +} +---- + +This output contains everything downstream systems need such as fraud monitoring, customer alerts, and audit logging. + +The pipeline uses a two-stage filter: + +- Only processes transactions with `amount > 500` or `preliminary_flag == true` +- Only sends transactions to the agent if `preliminary_risk_score >= 40` + +Transactions that pass the first filter but not the second (e.g., a $600 domestic transaction with low risk) will appear in the output with: + +* `final_decision: "low_risk_no_investigation"` +* `alert_level: "low"` +* No `agent_investigation` field + +Only transactions meeting the risk threshold invoke the dispute resolution agent. + +=== Trace pipeline execution to agent transcripts + +Use the pipeline metadata timestamp to find the corresponding agent execution in the *Transcripts* view. + +. Note the `processed_at` timestamp from the pipeline output (for example: `2026-01-26T18:30:45.000Z`). +. Go to *Agentic AI* > *Transcripts*. +. Find transcripts for `dispute-resolution-agent` that match your timestamp. + +[NOTE] +==== +The search function does not search through prompt content or attribute values. Use the timestamp to narrow down the time window, then manually review transcripts from that period. +==== + +In the transcript details, you'll see: + +* The full prompt sent to the agent (including transaction ID and details) +* Each sub-agent invocation (account-agent, fraud-agent, merchant-agent, compliance-agent) +* Token usage and execution time for the investigation +* The complete JSON response returned to the pipeline + +== Troubleshoot + +For comprehensive troubleshooting guidance, see xref:ai-agents:agents/troubleshooting.adoc[]. + +=== Test with mock data + +The mock tools in this tutorial use hardcoded customer and transaction IDs for testing: + +* Customer IDs: `CUST-1001`, `CUST-1002`, `CUST-1003` +* Transaction IDs: `TXN-89012`, `TXN-89013`, `TXN-89014`, `TXN-89015` + +Use these documented test IDs when testing in *Inspector* or the pipeline. The sub-agents' mock tools require valid IDs to return transaction details, account history, and fraud indicators. Using other IDs (like `TXN-TEST-001` or `CUST-9999`) will cause the tools to return "not found" errors, and the root agent won't be able to complete its investigation. + +For production deployments, replace the mock tools with API calls to your account, fraud detection, merchant verification, and compliance systems. + +== Next steps + +* xref:ai-agents:agents/architecture-patterns.adoc[] +* xref:ai-agents:agents/integration-overview.adoc[] +* xref:ai-agents:agents/pipeline-integration-patterns.adoc[] +* xref:ai-agents:agents/monitor-agents.adoc[] +* xref:ai-agents:mcp/remote/best-practices.adoc[] diff --git a/modules/ai-agents/pages/ai-gateway/admin/setup-guide.adoc b/modules/ai-agents/pages/ai-gateway/admin/setup-guide.adoc new file mode 100644 index 000000000..c3e20e1b2 --- /dev/null +++ b/modules/ai-agents/pages/ai-gateway/admin/setup-guide.adoc @@ -0,0 +1,324 @@ += AI Gateway Setup Guide +:description: Complete setup guide for administrators to enable providers, configure models, create gateways, and set up routing policies. +:page-topic-type: how-to +:personas: platform_admin +:learning-objective-1: Enable LLM providers and models in the catalog +:learning-objective-2: Create and configure gateways with routing policies, rate limits, and spend limits +:learning-objective-3: Set up MCP tool aggregation for AI agents + +include::ai-agents:partial$ai-gateway-byoc-note.adoc[] + +This guide walks administrators through the complete setup process for AI Gateway, from enabling LLM providers to configuring routing policies and MCP tool aggregation. + +After completing this guide, you will be able to: + +* [ ] Enable LLM providers and models in the catalog +* [ ] Create and configure gateways with routing policies, rate limits, and spend limits +* [ ] Set up MCP tool aggregation for AI agents + +== Prerequisites + +* Access to the Redpanda Cloud Console with administrator privileges +* API keys for at least one LLM provider (OpenAI or Anthropic) +* (Optional) MCP server endpoints if you plan to use tool aggregation + +== Enable a provider + +Providers represent upstream services (Anthropic, OpenAI) and associated credentials. Providers are disabled by default and must be enabled explicitly by an administrator. + +. In the Redpanda Cloud Console, navigate to *AI Gateway* → *Providers*. +. Select a provider (for example, Anthropic or OpenAI). +. On the *Configuration* tab for the provider, click *Add configuration*. +. Enter your API Key for the provider. ++ +TIP: Store provider API keys securely. Each provider configuration can have multiple API keys for rotation and redundancy. + +. Click *Save* to enable the provider. + +Repeat this process for each LLM provider you want to make available through AI Gateway. + +== Enable models + +The model catalog is the set of models made available through the gateway. Models are disabled by default. After enabling a provider, you can enable its models. + +The infrastructure that serves the model differs based on the provider you select. For example, OpenAI has different reliability and availability metrics than Anthropic. When you consider all metrics, you can design your gateway to use different providers for different use cases. + +. Navigate to *AI Gateway* → *Models*. +. Review the list of available models from enabled providers. +. For each model you want to expose through gateways, toggle it to *Enabled*. ++ +Common models to enable: ++ +-- +* `openai/gpt-4o` - OpenAI's most capable model +* `openai/gpt-4o-mini` - Cost-effective OpenAI model +* `anthropic/claude-sonnet-3.5` - Balanced Anthropic model +* `anthropic/claude-opus-4` - Anthropic's most capable model +-- + +. Click *Save changes*. + +Only enabled models will be accessible through gateways. You can enable or disable models at any time without affecting existing gateways. + +=== Model naming convention + +Model requests must use the `vendor/model_id` format in the model property of the request body. This format allows AI Gateway to route requests to the appropriate provider. + +Examples: + +* `openai/gpt-4o` +* `anthropic/claude-sonnet-3.5` +* `openai/gpt-4o-mini` + +== Create a gateway + +A gateway is a logical configuration boundary (policies + routing + observability) on top of a single deployment. It's a "virtual gateway" that you can create per team, environment (staging/production), product, or customer. + +. Navigate to *AI Gateway* → *Gateways*. +. Click *Create Gateway*. +. Configure the gateway: ++ +-- +* *Name*: Choose a descriptive name (for example, `production-gateway`, `team-ml-gateway`, `staging-gateway`) +* *Workspace*: Select the workspace this gateway belongs to ++ +TIP: A workspace is conceptually similar to a resource group in Redpanda streaming. ++ +* *Description* (optional): Add context about this gateway's purpose +* *Tags* (optional): Add metadata for organization and filtering +-- + +. Click *Create*. + +. After creation, note the following information: ++ +-- +* *Gateway ID*: Unique identifier (for example, `gw_abc123`) - users include this in the `rp-aigw-id` header +* *Gateway Endpoint*: Base URL for API requests (for example, `https://gw.ai.panda.com`) +-- + +You'll share the Gateway ID and Endpoint with users who need to access this gateway. + +== Configure LLM routing + +On the gateway details page, select the *LLM* tab to configure rate limits, spend limits, routing, and provider pools with fallback options. + +The LLM routing pipeline visually represents the request lifecycle: + +. *Rate Limit*: Global rate limit (for example, 100 requests/second) +. *Spend Limit / Monthly Budget*: Monthly budget with blocking enforcement (for example, $15K/month) +. *Routing*: Primary provider pool with optional fallback provider pools + +=== Configure rate limits + +Rate limits control how many requests can be processed within a time window. + +. In the *LLM* tab, locate the *Rate Limit* section. +. Click *Add rate limit*. +. Configure the limit: ++ +-- +* *Requests per second*: Maximum requests per second (for example, `100`) +* *Burst allowance* (optional): Allow temporary bursts above the limit +-- + +. Click *Save*. + +Rate limits apply to all requests through this gateway, regardless of model or provider. + +=== Configure spend limits and budgets + +Spend limits prevent runaway costs by blocking requests after a monthly budget is exceeded. + +. In the *LLM* tab, locate the *Spend Limit* section. +. Click *Configure budget*. +. Set the budget: ++ +-- +* *Monthly budget*: Maximum spend per month (for example, `$15000`) +* *Enforcement*: Choose *Block* to reject requests after the budget is exceeded, or *Alert* to notify but allow requests +* *Notification threshold* (optional): Alert when X% of budget is consumed (for example, `80%`) +-- + +. Click *Save*. + +Budget tracking uses estimated costs based on token usage and public provider pricing. + +=== Configure routing and provider pools + +Provider pools define which LLM providers handle requests, with support for primary and fallback configurations. + +. In the *LLM* tab, locate the *Routing* section. +. Click *Add provider pool*. +. Configure the primary pool: ++ +-- +* *Name*: For example, `primary-anthropic` +* *Providers*: Select one or more providers (for example, Anthropic) +* *Models*: Choose which models to include (for example, `anthropic/claude-sonnet-3.5`) +* *Load balancing*: If multiple providers are selected, choose distribution strategy (round-robin, weighted, etc.) +-- + +. (Optional) Click *Add fallback pool* to configure automatic failover: ++ +-- +* *Name*: For example, `fallback-openai` +* *Providers*: Select fallback provider (for example, OpenAI) +* *Models*: Choose fallback models (for example, `openai/gpt-4o`) +* *Trigger conditions*: When to activate fallback: + ** Rate limit exceeded (429 from primary) + ** Timeout (primary provider slow) + ** Server errors (5xx from primary) +-- + +. Configure routing rules using CEL expressions (optional): ++ +For simple routing, select *Route all requests to primary pool*. ++ +For advanced routing based on request properties, use CEL expressions. See xref:ai-gateway/cel-routing-cookbook.adoc[] for examples. ++ +Example CEL expression for tier-based routing: ++ +[source,cel] +---- +request.headers["x-user-tier"] == "premium" + ? "anthropic/claude-opus-4" + : "anthropic/claude-sonnet-3.5" +---- + +. Click *Save routing configuration*. + +TIP: Provider pool (UI) = Backend pool (API) + +=== Load balancing and multi-provider distribution + +If a provider pool contains multiple providers, you can distribute traffic to balance load or optimize for cost/performance: + +* *Round-robin*: Distribute evenly across all providers +* *Weighted*: Assign weights (for example, 80% to Anthropic, 20% to OpenAI) +* *Least latency*: Route to fastest provider based on recent performance +* *Cost-optimized*: Route to cheapest provider for each model + +== Configure MCP tools (optional) + +If your users will build AI agents that need access to tools via MCP (Model Context Protocol), configure MCP tool aggregation. + +On the gateway details page, select the *MCP* tab to configure tool discovery and execution. The MCP proxy aggregates multiple MCP servers, allowing agents to find and call tools through a single endpoint. + +=== Add MCP servers + +. In the *MCP* tab, click *Add MCP server*. +. Configure the server: ++ +-- +* *Server name*: Human-readable identifier (for example, `database-server`, `slack-server`) +* *Server URL*: Endpoint for the MCP server (for example, `https://mcp-database.example.com`) +* *Authentication*: Configure authentication if required (bearer token, API key, mTLS) +* *Enabled tools*: Select which tools from this server to expose (or *All tools*) +-- + +. Click *Test connection* to verify connectivity. +. Click *Save* to add the server to this gateway. + +Repeat for each MCP server you want to aggregate. + +=== Configure deferred tool loading + +Deferred tool loading dramatically reduces token costs by initially exposing only a search tool and orchestrator, rather than listing all available tools. + +. In the *MCP* tab, locate *Deferred Loading*. +. Toggle *Enable deferred tool loading* to *On*. +. Configure behavior: ++ +-- +* *Initially expose*: Search tool + orchestrator only +* *Load on demand*: Tools are retrieved when agents query for them +* *Token savings*: Expect 80-90% reduction in token usage for tool definitions +-- + +. Click *Save*. + +See xref:ai-gateway/mcp-aggregation-guide.adoc[] for detailed information about MCP aggregation. + +=== Configure the MCP orchestrator + +The MCP orchestrator is a built-in MCP server that enables programmatic tool calling. Agents can generate JavaScript code to call multiple tools in a single orchestrated step, reducing the number of round trips. + +Example: A workflow requiring 47 file reads can be reduced from 49 round trips to just 1 round trip using the orchestrator. + +The orchestrator is enabled by default when you enable MCP tools. You can configure: + +* *Execution timeout*: Maximum time for orchestrator workflows (for example, 30 seconds) +* *Memory limit*: Maximum memory for JavaScript execution (for example, 128MB) +* *Allowed operations*: Restrict which MCP tools can be called from orchestrator workflows + +== Verify your setup + +After completing the setup, verify that the gateway is working correctly: + +=== Test the gateway endpoint + +[source,bash] +---- +curl https://{GATEWAY_ENDPOINT}/v1/models \ + -H "Authorization: Bearer ${REDPANDA_CLOUD_TOKEN}" \ + -H "rp-aigw-id: ${GATEWAY_ID}" +---- + +Expected result: List of enabled models. + +=== Send a test request + +[source,bash] +---- +curl https://{GATEWAY_ENDPOINT}/v1/chat/completions \ + -H "Authorization: Bearer ${REDPANDA_CLOUD_TOKEN}" \ + -H "rp-aigw-id: ${GATEWAY_ID}" \ + -H "Content-Type: application/json" \ + -d '{ + "model": "openai/gpt-4o-mini", + "messages": [{"role": "user", "content": "Hello, AI Gateway!"}], + "max_tokens": 50 + }' +---- + +Expected result: Successful completion response. + +=== Check observability + +. Navigate to *AI Gateway* → *Gateways* → Select your gateway → *Analytics*. +. Verify that your test request appears in the request logs. +. Check metrics: ++ +-- +* Request count: Should show your test request +* Token usage: Should show tokens consumed +* Estimated cost: Should show calculated cost +-- + +== Share access with users + +Now that your gateway is configured, share access with users (builders): + +. Provide the *Gateway ID* (for example, `gw_abc123`) +. Provide the *Gateway Endpoint* (for example, `https://gw.ai.panda.com`) +. Share API credentials (Redpanda Cloud tokens with appropriate permissions) +. (Optional) Document available models and any routing policies +. (Optional) Share rate limits and budget information + +Users can then discover and connect to the gateway using the information provided. See xref:ai-gateway/builders/discover-gateways.adoc[] for user documentation. + +== Next steps + +*Configure and optimize:* + +// * xref:ai-gateway/admin/manage-gateways.adoc[Manage Gateways] - List, edit, and delete gateways +* xref:ai-gateway/cel-routing-cookbook.adoc[CEL Routing Cookbook] - Advanced routing patterns +// * xref:ai-gateway/admin/networking-configuration.adoc[Networking Configuration] - Configure private endpoints and connectivity + +//*Monitor and observe:* +// + +*Integrate tools:* + +* xref:ai-gateway/integrations/index.adoc[Integrations] - Admin guides for Claude Code, Cursor, and other tools diff --git a/modules/ai-agents/pages/ai-gateway/builders/connect-your-agent.adoc b/modules/ai-agents/pages/ai-gateway/builders/connect-your-agent.adoc new file mode 100644 index 000000000..11c99a0e3 --- /dev/null +++ b/modules/ai-agents/pages/ai-gateway/builders/connect-your-agent.adoc @@ -0,0 +1,560 @@ += Connect Your Agent +:description: Integrate your AI agent or application with Redpanda AI Gateway for unified LLM access. +:page-topic-type: how-to +:personas: app_developer +:learning-objective-1: Configure your application to use AI Gateway with OpenAI-compatible SDKs +:learning-objective-2: Make LLM requests through the gateway and handle responses appropriately +:learning-objective-3: Validate your integration end-to-end + +include::ai-agents:partial$ai-gateway-byoc-note.adoc[] + +This guide shows you how to connect your AI agent or application to a Redpanda AI Gateway. You'll configure your client SDK, make your first request, and validate the integration. + +After completing this guide, you will be able to: + +* [ ] Configure your application to use AI Gateway with OpenAI-compatible SDKs +* [ ] Make LLM requests through the gateway and handle responses appropriately +* [ ] Validate your integration end-to-end + +== Prerequisites + +* You have discovered an available gateway and noted its Gateway ID and Endpoint ++ +If not, see xref:ai-gateway/builders/discover-gateways.adoc[]. + +* You have a Redpanda Cloud API token with access to the gateway +* You have a development environment with your chosen programming language + +== Integration overview + +Connecting to AI Gateway requires three configuration changes: + +. *Change the base URL*: Point to the gateway endpoint instead of the provider's API +. *Add authentication*: Use your Redpanda Cloud token instead of provider API keys +. *Add the gateway ID header*: Include `rp-aigw-id` to identify which gateway to use + +That's it. Your existing application code doesn't need to change. + +== Quick start + +=== Environment variables + +Set these environment variables for consistent configuration: + +[source,bash] +---- +export REDPANDA_GATEWAY_URL="https://gw.ai.panda.com" +export REDPANDA_GATEWAY_ID="gw_abc123" +export REDPANDA_API_KEY="your-redpanda-cloud-token" +---- + +Replace with your actual gateway endpoint, ID, and API token. + +=== Python (OpenAI SDK) + +[source,python] +---- +import os +from openai import OpenAI + +# Configure client to use AI Gateway +client = OpenAI( + base_url=os.getenv("REDPANDA_GATEWAY_URL"), + api_key=os.getenv("REDPANDA_API_KEY"), + default_headers={ + "rp-aigw-id": os.getenv("REDPANDA_GATEWAY_ID") + } +) + +# Make a request (same as before) +response = client.chat.completions.create( + model="openai/gpt-4o-mini", # Note: vendor/model_id format + messages=[{"role": "user", "content": "Hello, AI Gateway!"}], + max_tokens=100 +) + +print(response.choices[0].message.content) +---- + +=== Python (Anthropic SDK) + +The Anthropic SDK can also route through AI Gateway using the OpenAI-compatible endpoint: + +[source,python] +---- +import os +from anthropic import Anthropic + +client = Anthropic( + base_url=os.getenv("REDPANDA_GATEWAY_URL"), + api_key=os.getenv("REDPANDA_API_KEY"), + default_headers={ + "rp-aigw-id": os.getenv("REDPANDA_GATEWAY_ID") + } +) + +# Make a request +message = client.messages.create( + model="anthropic/claude-sonnet-3.5", + max_tokens=100, + messages=[{"role": "user", "content": "Hello, AI Gateway!"}] +) + +print(message.content[0].text) +---- + +=== Node.js (OpenAI SDK) + +[source,javascript] +---- +import OpenAI from 'openai'; + +const openai = new OpenAI({ + baseURL: process.env.REDPANDA_GATEWAY_URL, + apiKey: process.env.REDPANDA_API_KEY, + defaultHeaders: { + 'rp-aigw-id': process.env.REDPANDA_GATEWAY_ID + } +}); + +// Make a request +const response = await openai.chat.completions.create({ + model: 'openai/gpt-4o-mini', + messages: [{ role: 'user', content: 'Hello, AI Gateway!' }], + max_tokens: 100 +}); + +console.log(response.choices[0].message.content); +---- + +=== cURL + +For testing or shell scripts: + +[source,bash] +---- +curl ${REDPANDA_GATEWAY_URL}/v1/chat/completions \ + -H "Authorization: Bearer ${REDPANDA_API_KEY}" \ + -H "rp-aigw-id: ${REDPANDA_GATEWAY_ID}" \ + -H "Content-Type: application/json" \ + -d '{ + "model": "openai/gpt-4o-mini", + "messages": [{"role": "user", "content": "Hello, AI Gateway!"}], + "max_tokens": 100 + }' +---- + +== Model naming convention + +When making requests through AI Gateway, use the `vendor/model_id` format for the model parameter: + +* `openai/gpt-4o` +* `openai/gpt-4o-mini` +* `anthropic/claude-sonnet-3.5` +* `anthropic/claude-opus-4` + +This format tells AI Gateway which provider to route the request to. + +Example: + +[source,python] +---- +# Route to OpenAI +response = client.chat.completions.create( + model="openai/gpt-4o", + messages=[...] +) + +# Route to Anthropic (same client, different model) +response = client.chat.completions.create( + model="anthropic/claude-sonnet-3.5", + messages=[...] +) +---- + +// To see which models are available in your gateway, see xref:ai-gateway/builders/available-models.adoc[]. + +== Handle responses + +Responses from AI Gateway follow the OpenAI API format: + +[source,python] +---- +response = client.chat.completions.create( + model="openai/gpt-4o-mini", + messages=[{"role": "user", "content": "Explain AI Gateway"}], + max_tokens=200 +) + +# Access the response +message_content = response.choices[0].message.content +finish_reason = response.choices[0].finish_reason # 'stop', 'length', etc. + +# Token usage +prompt_tokens = response.usage.prompt_tokens +completion_tokens = response.usage.completion_tokens +total_tokens = response.usage.total_tokens + +print(f"Response: {message_content}") +print(f"Tokens: {prompt_tokens} prompt + {completion_tokens} completion = {total_tokens} total") +---- + +== Handle errors + +AI Gateway returns standard HTTP status codes: + +[source,python] +---- +from openai import OpenAI, OpenAIError + +client = OpenAI( + base_url=os.getenv("REDPANDA_GATEWAY_URL"), + api_key=os.getenv("REDPANDA_API_KEY"), + default_headers={"rp-aigw-id": os.getenv("REDPANDA_GATEWAY_ID")} +) + +try: + response = client.chat.completions.create( + model="openai/gpt-4o-mini", + messages=[{"role": "user", "content": "Hello"}] + ) + print(response.choices[0].message.content) + +except OpenAIError as e: + if e.status_code == 400: + print("Bad request - check model name and parameters") + elif e.status_code == 401: + print("Authentication failed - check API token") + elif e.status_code == 404: + print("Model not found - check available models") + elif e.status_code == 429: + print("Rate limit exceeded - slow down requests") + elif e.status_code >= 500: + print("Gateway or provider error - retry with exponential backoff") + else: + print(f"Error: {e}") +---- + +Common error codes: + +* *400*: Bad request (invalid parameters, malformed JSON) +* *401*: Authentication failed (invalid or missing API token) +* *403*: Forbidden (no access to this gateway) +* *404*: Model not found (model not enabled in gateway) +* *429*: Rate limit exceeded (too many requests) +* *500/502/503*: Server error (gateway or provider issue) + +== Streaming responses + +AI Gateway supports streaming for real-time token generation: + +[source,python] +---- +response = client.chat.completions.create( + model="openai/gpt-4o-mini", + messages=[{"role": "user", "content": "Write a short poem"}], + stream=True # Enable streaming +) + +# Process chunks as they arrive +for chunk in response: + if chunk.choices[0].delta.content: + print(chunk.choices[0].delta.content, end='', flush=True) + +print() # New line after streaming completes +---- + +== Switch between providers + +One of AI Gateway's key benefits is easy provider switching without code changes: + +[source,python] +---- +# Try OpenAI +response = client.chat.completions.create( + model="openai/gpt-4o", + messages=[{"role": "user", "content": "Explain quantum computing"}] +) + +# Try Anthropic (same code, different model) +response = client.chat.completions.create( + model="anthropic/claude-sonnet-3.5", + messages=[{"role": "user", "content": "Explain quantum computing"}] +) +---- + +Compare responses, latency, and cost to determine the best model for your use case. + +== Validate your integration + +=== Test connectivity + +[source,python] +---- +import os +from openai import OpenAI + +def test_gateway_connection(): + """Test basic connectivity to AI Gateway""" + client = OpenAI( + base_url=os.getenv("REDPANDA_GATEWAY_URL"), + api_key=os.getenv("REDPANDA_API_KEY"), + default_headers={"rp-aigw-id": os.getenv("REDPANDA_GATEWAY_ID")} + ) + + try: + # Simple test request + response = client.chat.completions.create( + model="openai/gpt-4o-mini", + messages=[{"role": "user", "content": "test"}], + max_tokens=10 + ) + print("✓ Gateway connection successful") + return True + except Exception as e: + print(f"✗ Gateway connection failed: {e}") + return False + +if __name__ == "__main__": + test_gateway_connection() +---- + +=== Test multiple models + +[source,python] +---- +def test_models(): + """Test multiple models through the gateway""" + models = [ + "openai/gpt-4o-mini", + "anthropic/claude-sonnet-3.5" + ] + + for model in models: + try: + response = client.chat.completions.create( + model=model, + messages=[{"role": "user", "content": "Say hello"}], + max_tokens=10 + ) + print(f"✓ {model}: {response.choices[0].message.content}") + except Exception as e: + print(f"✗ {model}: {e}") +---- + +=== Check request logs + +After making requests, verify they appear in observability: + +. Navigate to *AI Gateway* → *Gateways* → Select your gateway → *Logs* +. Filter by your request timestamp +. Verify your requests are logged with correct model, tokens, and cost + +// See xref:ai-gateway/builders/monitor-your-usage.adoc[] for details. + +== Integrate with AI development tools + +=== Claude Code + +Configure Claude Code to use AI Gateway: + +[source,bash] +---- +claude mcp add --transport http redpanda-aigateway https://gw.ai.panda.com/mcp \ + --header "Authorization: Bearer ${REDPANDA_API_KEY}" \ + --header "rp-aigw-id: ${REDPANDA_GATEWAY_ID}" +---- + +Or edit `~/.claude/config.json`: + +[source,json] +---- +{ + "mcpServers": { + "redpanda-ai-gateway": { + "transport": "http", + "url": "https://gw.ai.panda.com/mcp", + "headers": { + "Authorization": "Bearer your-api-key", + "rp-aigw-id": "gw_abc123" + } + } + } +} +---- + +See xref:ai-gateway/integrations/claude-code-user.adoc[] for complete setup. + +=== VS Code Continue Extension + +Edit `~/.continue/config.json`: + +[source,json] +---- +{ + "models": [ + { + "title": "AI Gateway - GPT-4", + "provider": "openai", + "model": "openai/gpt-4o", + "apiBase": "https://gw.ai.panda.com", + "apiKey": "your-redpanda-api-key", + "requestOptions": { + "headers": { + "rp-aigw-id": "gw_abc123" + } + } + } + ] +} +---- + +See xref:ai-gateway/integrations/continue-user.adoc[] for complete setup. + +=== Cursor IDE + +. Open Cursor Settings (*Cursor* → *Settings* or `Cmd+,`) +. Navigate to *AI* settings +. Add custom OpenAI-compatible provider: + +[source,json] +---- +{ + "cursor.ai.providers.openai.apiBase": "https://gw.ai.panda.com", + "cursor.ai.providers.openai.defaultHeaders": { + "rp-aigw-id": "gw_abc123" + } +} +---- + +See xref:ai-gateway/integrations/cursor-user.adoc[] for complete setup. + +== Best practices + +=== Use environment variables + +Store configuration in environment variables, not hardcoded in code: + +[source,python] +---- +# Good +base_url = os.getenv("REDPANDA_GATEWAY_URL") + +# Bad +base_url = "https://gw.ai.panda.com" # Don't hardcode +---- + +=== Implement retry logic + +Implement exponential backoff for transient errors: + +[source,python] +---- +import time +from openai import OpenAI, OpenAIError + +def make_request_with_retry(client, max_retries=3): + for attempt in range(max_retries): + try: + return client.chat.completions.create( + model="openai/gpt-4o-mini", + messages=[{"role": "user", "content": "Hello"}] + ) + except OpenAIError as e: + if e.status_code >= 500 and attempt < max_retries - 1: + wait_time = 2 ** attempt # Exponential backoff + print(f"Retrying in {wait_time}s...") + time.sleep(wait_time) + else: + raise +---- + +=== Monitor your usage + +Regularly check your usage to avoid unexpected costs: + +[source,python] +---- +# Track tokens in your application +total_tokens = 0 +request_count = 0 + +for request in requests: + response = client.chat.completions.create(...) + total_tokens += response.usage.total_tokens + request_count += 1 + +print(f"Total tokens: {total_tokens} across {request_count} requests") +---- + +// See xref:ai-gateway/builders/monitor-your-usage.adoc[] for detailed monitoring. + +=== Handle rate limits gracefully + +Respect rate limits and implement backoff: + +[source,python] +---- +try: + response = client.chat.completions.create(...) +except OpenAIError as e: + if e.status_code == 429: + # Rate limited - wait and retry + retry_after = int(e.response.headers.get('Retry-After', 60)) + print(f"Rate limited. Waiting {retry_after}s...") + time.sleep(retry_after) + # Retry request +---- + +== Troubleshooting + +=== "Authentication failed" + +Problem: 401 Unauthorized + +Solutions: + +* Verify your API token is correct and not expired +* Check that the token has access to the specified gateway +* Ensure the `Authorization` header is formatted correctly: `Bearer ` + +=== "Model not found" + +Problem: 404 Model not found + +Solutions: + +* Verify the model name uses `vendor/model_id` format +// * Check available models: See xref:ai-gateway/builders/available-models.adoc[] +* Confirm the model is enabled in your gateway (contact administrator) + +=== "Rate limit exceeded" + +Problem: 429 Too Many Requests + +Solutions: + +* Reduce request rate +* Implement exponential backoff +* Contact administrator to review rate limits +* Consider using a different gateway if available + +=== "Connection timeout" + +Problem: Request times out + +Solutions: + +* Check network connectivity to the gateway endpoint +* Verify the gateway endpoint URL is correct +* Check if the gateway is operational (contact administrator) +* Increase client timeout if processing complex requests + +== Next steps + +Now that your agent is connected: + +// * xref:ai-gateway/builders/available-models.adoc[Available Models] - Learn about model selection and routing +// * xref:ai-gateway/builders/use-mcp-tools.adoc[Use MCP Tools] - Access tools from MCP servers (if enabled) +// * xref:ai-gateway/builders/monitor-your-usage.adoc[Monitor Your Usage] - Track requests and costs +* xref:ai-gateway/integrations/index.adoc[Integrations] - Configure specific tools and IDEs diff --git a/modules/ai-agents/pages/ai-gateway/builders/discover-gateways.adoc b/modules/ai-agents/pages/ai-gateway/builders/discover-gateways.adoc new file mode 100644 index 000000000..7e280a95e --- /dev/null +++ b/modules/ai-agents/pages/ai-gateway/builders/discover-gateways.adoc @@ -0,0 +1,304 @@ += Discover Available Gateways +:description: Find which AI Gateways you can access and their configurations. +:page-topic-type: how-to +:personas: app_developer +:learning-objective-1: List all AI Gateways you have access to and retrieve their endpoints and IDs +:learning-objective-2: View which models and MCP tools are available through each gateway +:learning-objective-3: Test gateway connectivity before integration + +include::ai-agents:partial$ai-gateway-byoc-note.adoc[] + +As a builder, you need to know which gateways are available to you before integrating your agent or application. This page shows you how to discover accessible gateways, understand their configurations, and verify connectivity. + +After reading this page, you will be able to: + +* [ ] List all AI Gateways you have access to and retrieve their endpoints and IDs +* [ ] View which models and MCP tools are available through each gateway +* [ ] Test gateway connectivity before integration + +== Before you begin + +* You have a Redpanda Cloud account with access to at least one AI Gateway +* You have access to the Redpanda Cloud Console or API credentials + +== List your accessible gateways + +=== Using the Console + +. Navigate to *AI Gateway* in the Redpanda Cloud Console. +. View the *My Gateways* tab (or *Gateways* if you're an administrator). +. Review the list of gateways you can access: ++ +For each gateway, you'll see: ++ +-- +* *Gateway Name*: Human-readable name (for example, `production-gateway`, `team-ml-gateway`) +* *Gateway ID*: Unique identifier used in the `rp-aigw-id` header (for example, `gw_abc123`) +* *Gateway Endpoint*: Base URL for API requests (for example, `https://gw.ai.panda.com`) +* *Status*: Whether the gateway is active and accepting requests +* *Available Models*: Which LLM models you can access +* *MCP Tools*: Whether MCP tool aggregation is enabled +-- + +=== Using the API + +You can also list gateways programmatically: + +[source,bash] +---- +curl https://api.redpanda.com/v1/ai-gateway/gateways \ + -H "Authorization: Bearer ${REDPANDA_CLOUD_TOKEN}" +---- + +Response: + +[source,json] +---- +{ + "gateways": [ + { + "id": "gw_abc123", + "name": "production-gateway", + "endpoint": "https://gw.ai.panda.com", + "status": "active", + "workspace_id": "ws_xyz789", + "created_at": "2025-01-15T10:30:00Z" + }, + { + "id": "gw_def456", + "name": "staging-gateway", + "endpoint": "https://gw-staging.ai.panda.com", + "status": "active", + "workspace_id": "ws_xyz789", + "created_at": "2025-01-10T08:15:00Z" + } + ] +} +---- + +== Understand gateway information + +Each gateway provides specific information you'll need for integration: + +=== Gateway ID + +The Gateway ID is a unique identifier that you include in the `rp-aigw-id` header with every request. This tells AI Gateway which gateway configuration to use for routing, policies, and observability. + +Example: +[source,bash] +---- +rp-aigw-id: gw_abc123 +---- + +=== Gateway Endpoint + +The endpoint is the base URL where you send all API requests. This replaces direct provider URLs (like `api.openai.com` or `api.anthropic.com`). + +Example: +[source,bash] +---- +https://gw.ai.panda.com +---- + +Your application configures this as the `base_url` in your SDK client. + +=== Available Models + +Each gateway exposes specific models based on administrator configuration. Models use the `vendor/model_id` format: + +* `openai/gpt-4o` +* `anthropic/claude-sonnet-3.5` +* `openai/gpt-4o-mini` + +To see which models are available through a specific gateway: + +[source,bash] +---- +curl https://{GATEWAY_ENDPOINT}/v1/models \ + -H "Authorization: Bearer ${REDPANDA_CLOUD_TOKEN}" \ + -H "rp-aigw-id: ${GATEWAY_ID}" +---- + +Response: + +[source,json] +---- +{ + "object": "list", + "data": [ + { + "id": "openai/gpt-4o", + "object": "model", + "owned_by": "openai" + }, + { + "id": "anthropic/claude-sonnet-3.5", + "object": "model", + "owned_by": "anthropic" + }, + { + "id": "openai/gpt-4o-mini", + "object": "model", + "owned_by": "openai" + } + ] +} +---- + +=== Rate Limits and Quotas + +Each gateway may have configured rate limits and monthly budgets. Check the console or contact your administrator to understand: + +* Requests per minute/hour/day +* Monthly spend limits +* Token usage quotas + +These limits help control costs and ensure fair resource allocation across teams. + +=== MCP Tools + +If MCP aggregation is enabled for your gateway, you can access tools from multiple MCP servers through a single endpoint. + +To discover available MCP tools: + +[source,bash] +---- +curl https://{GATEWAY_ENDPOINT}/mcp/tools \ + -H "Authorization: Bearer ${REDPANDA_CLOUD_TOKEN}" \ + -H "rp-aigw-id: ${GATEWAY_ID}" \ + -H "rp-aigw-mcp-deferred: true" +---- + +With deferred loading enabled, you'll receive search and orchestrator tools initially. You can then query for specific tools as needed. + +// See xref:ai-gateway/builders/use-mcp-tools.adoc[] for more details. + +== Check gateway availability + +Before integrating your application, verify that you can successfully connect to the gateway: + +=== Test connectivity + +[source,bash] +---- +curl https://{GATEWAY_ENDPOINT}/v1/models \ + -H "Authorization: Bearer ${REDPANDA_CLOUD_TOKEN}" \ + -H "rp-aigw-id: ${GATEWAY_ID}" \ + -v +---- + +Expected result: HTTP 200 response with a list of available models. + +=== Test a simple request + +Send a minimal chat completion request to verify end-to-end functionality: + +[source,bash] +---- +curl https://{GATEWAY_ENDPOINT}/v1/chat/completions \ + -H "Authorization: Bearer ${REDPANDA_CLOUD_TOKEN}" \ + -H "rp-aigw-id: ${GATEWAY_ID}" \ + -H "Content-Type: application/json" \ + -d '{ + "model": "openai/gpt-4o-mini", + "messages": [{"role": "user", "content": "Hello"}], + "max_tokens": 10 + }' +---- + +Expected result: HTTP 200 response with a completion. + +=== Troubleshoot connectivity issues + +If you cannot connect to a gateway: + +. *Verify authentication*: Ensure your API token is valid and has not expired +. *Check gateway ID*: Confirm you're using the correct `rp-aigw-id` value +. *Verify endpoint URL*: Check for typos in the gateway endpoint +. *Check permissions*: Confirm with your administrator that you have access to this gateway +. *Review network connectivity*: Ensure your network allows outbound HTTPS connections + +== Choose the right gateway + +If you have access to multiple gateways, consider which one to use based on your needs: + +=== By environment + +Organizations often create separate gateways for different environments: + +* *Production gateway*: Higher rate limits, access to all models, monitoring enabled +* *Staging gateway*: Lower rate limits, restricted models, aggressive cost controls +* *Development gateway*: Minimal limits, all models for experimentation + +Choose the gateway that matches your deployment environment. + +=== By team or project + +Gateways may be organized by team or project for cost tracking and isolation: + +* *team-ml-gateway*: For machine learning team +* *team-product-gateway*: For product team +* *customer-facing-gateway*: For production customer workloads + +Use the gateway designated for your team to ensure proper cost attribution. + +=== By capability + +Different gateways may have different features enabled: + +* *Gateway with MCP tools*: Use if your agent needs to call tools +* *Gateway without MCP*: Use for simple LLM completions +* *Gateway with specific models*: Use if you need access to particular models + +== Example: Complete discovery workflow + +Here's a complete workflow to discover and validate gateway access: + +[source,bash] +---- +#!/bin/bash + +# Set your API token +export REDPANDA_CLOUD_TOKEN="your-token-here" + +# Step 1: List all accessible gateways +echo "=== Discovering gateways ===" +curl -s https://api.redpanda.com/v1/ai-gateway/gateways \ + -H "Authorization: Bearer ${REDPANDA_CLOUD_TOKEN}" \ + | jq '.gateways[] | {name: .name, id: .id, endpoint: .endpoint}' + +# Step 2: Select a gateway (example) +export GATEWAY_ID="gw_abc123" +export GATEWAY_ENDPOINT="https://gw.ai.panda.com" + +# Step 3: List available models +echo -e "\n=== Available models ===" +curl -s ${GATEWAY_ENDPOINT}/v1/models \ + -H "Authorization: Bearer ${REDPANDA_CLOUD_TOKEN}" \ + -H "rp-aigw-id: ${GATEWAY_ID}" \ + | jq '.data[] | .id' + +# Step 4: Test with a simple request +echo -e "\n=== Testing request ===" +curl -s ${GATEWAY_ENDPOINT}/v1/chat/completions \ + -H "Authorization: Bearer ${REDPANDA_CLOUD_TOKEN}" \ + -H "rp-aigw-id: ${GATEWAY_ID}" \ + -H "Content-Type: application/json" \ + -d '{ + "model": "openai/gpt-4o-mini", + "messages": [{"role": "user", "content": "Say hello"}], + "max_tokens": 10 + }' \ + | jq '.choices[0].message.content' + +echo -e "\n=== Gateway validated successfully ===" +---- + +== Next steps + +Now that you've discovered your available gateways: + +* xref:ai-gateway/builders/connect-your-agent.adoc[Connect Your Agent] - Integrate your application +// * xref:ai-gateway/builders/available-models.adoc[Available Models] - Learn about model selection and routing +// * xref:ai-gateway/builders/use-mcp-tools.adoc[Use MCP Tools] - Access tools from MCP servers +// * xref:ai-gateway/builders/monitor-your-usage.adoc[Monitor Your Usage] - Track requests and costs diff --git a/modules/ai-agents/pages/ai-gateway/cel-routing-cookbook.adoc b/modules/ai-agents/pages/ai-gateway/cel-routing-cookbook.adoc new file mode 100644 index 000000000..57d997342 --- /dev/null +++ b/modules/ai-agents/pages/ai-gateway/cel-routing-cookbook.adoc @@ -0,0 +1,953 @@ += CEL Routing Cookbook +:description: CEL routing cookbook for Redpanda AI Gateway with common patterns, examples, and best practices. +:page-topic-type: cookbook +:personas: app_developer, platform_admin +:learning-objective-1: Write CEL expressions to route requests based on user tier or custom headers +:learning-objective-2: Test CEL routing logic using the UI editor or test requests +:learning-objective-3: Troubleshoot common CEL errors using safe patterns + +include::ai-agents:partial$ai-gateway-byoc-note.adoc[] + +Redpanda AI Gateway uses CEL (Common Expression Language) for dynamic request routing. CEL expressions evaluate request properties (headers, body, context) and determine which model or provider should handle each request. + +CEL enables: + +* User-based routing (free vs premium tiers) +* Content-based routing (by prompt topic, length, complexity) +* Environment-based routing (staging vs production models) +* Cost controls (reject expensive requests in test environments) +* A/B testing (route percentage of traffic to new models) +* Geographic routing (by region header) +* Custom business logic (any condition you can express) + +== CEL basics + +=== What is CEL? + +CEL (Common Expression Language) is a non-Turing-complete expression language designed for fast, safe evaluation. It's used by Google (Firebase, Cloud IAM), Kubernetes, Envoy, and other systems. + +Key properties: + +* Safe: Cannot loop infinitely or access system resources +* Fast: Evaluates in microseconds +* Readable: Similar to Python/JavaScript expressions +* Type-safe: Errors caught at configuration time, not runtime + +=== CEL syntax primer + +Comparison operators: + +[source,cel] +---- +== // equal +!= // Not equal +< // Less than +> // Greater than +<= // Less than or equal +>= // Greater than or equal +---- + + +Logical operators: + +[source,cel] +---- +&& // AND +|| // OR +! // NOT +---- + + +Ternary operator (most common pattern): + +[source,cel] +---- +condition ? value_if_true : value_if_false +---- + + +Functions: + +[source,cel] +---- +.size() // Length of string or array +.contains("text") // String contains substring +.startsWith("x") // String starts with +.endsWith("x") // String ends with +.matches("regex") // Regex match +has(field) // Check if field exists +---- + + +Examples: + +[source,cel] +---- +// Simple comparison +request.headers["tier"] == "premium" + +// Ternary (if-then-else) +request.headers["tier"] == "premium" ? "openai/gpt-4o" : "openai/gpt-4o-mini" + +// Logical AND +request.headers["tier"] == "premium" && request.headers["region"] == "us" + +// String contains +request.body.messages[0].content.contains("urgent") + +// Size check +request.body.messages.size() > 10 +---- + + +== Request object schema + +CEL expressions evaluate against the `request` object, which contains: + +// PLACEHOLDER: Confirm exact schema + +=== `request.headers` (map) + +All HTTP headers (lowercase keys). + +[source,cel] +---- +request.headers["x-user-tier"] // Custom header +request.headers["x-customer-id"] // Custom header +request.headers["user-agent"] // Standard header +request.headers["x-request-id"] // Standard header +---- + + +NOTE: Header names are case-insensitive in HTTP, but CEL requires lowercase keys. + +=== `request.body` (object) + +The JSON request body (for `/chat/completions`). + +[source,cel] +---- +request.body.model // String: Requested model +request.body.messages // Array: Conversation messages +request.body.messages[0].role // String: "system", "user", "assistant" +request.body.messages[0].content // String: Message content +request.body.messages.size() // Int: Number of messages +request.body.max_tokens // Int: Max completion tokens (if set) +request.body.temperature // Float: Temperature (if set) +request.body.stream // Bool: Streaming enabled (if set) +---- + + +NOTE: Fields are optional. Use `has()` to check existence: + +[source,cel] +---- +has(request.body.max_tokens) ? request.body.max_tokens : 1000 +---- + + +=== `request.path` (string) + +The request path. + +[source,cel] +---- +request.path == "/v1/chat/completions" +request.path.startsWith("/v1/") +---- + + +=== `request.method` (string) + +The HTTP method. + +[source,cel] +---- +request.method == "POST" +---- + + +// PLACEHOLDER: Are there other fields? User context? Gateway context? Timestamp? + +== CEL routing patterns + +Each pattern follows this structure: + +* When to use: Scenario description +* Expression: CEL code +* What happens: Routing behavior +* Verify: How to test +* Cost/performance impact: Implications + +=== Tier-based routing + +When to use: Different user tiers (free, pro, enterprise) should get different model quality + +Expression: + +[source,cel] +---- +request.headers["x-user-tier"] == "enterprise" ? "openai/gpt-4o" : +request.headers["x-user-tier"] == "pro" ? "anthropic/claude-sonnet-3.5" : +"openai/gpt-4o-mini" +---- + + +What happens: + +* Enterprise users → GPT-4o (best quality) +* Pro users → Claude Sonnet 3.5 (balanced) +* Free users → GPT-4o-mini (cost-effective) + +Verify: + +[source,python] +---- +# Test enterprise +response = client.chat.completions.create( + model="auto", # PLACEHOLDER: How to trigger CEL routing? + messages=[{"role": "user", "content": "Test"}], + extra_headers={"x-user-tier": "enterprise"} +) +# Check logs: Should route to openai/gpt-4o + +# Test free +response = client.chat.completions.create( + model="auto", + messages=[{"role": "user", "content": "Test"}], + extra_headers={"x-user-tier": "free"} +) +# Check logs: Should route to openai/gpt-4o-mini +---- + + +Cost impact: + +* Enterprise: ~$5.00 per 1K requests +* Pro: ~$3.50 per 1K requests +* Free: ~$0.50 per 1K requests + +Use case: SaaS product with tiered pricing where model quality is a differentiator + +=== Environment-based routing + +When to use: Prevent staging from using expensive models + +Expression: + +[source,cel] +---- +request.headers["x-environment"] == "production" + ? "openai/gpt-4o" + : "openai/gpt-4o-mini" +---- + + +What happens: + +* Production → GPT-4o (best quality) +* Staging/dev → GPT-4o-mini (10x cheaper) + +Verify: + +[source,python] +---- +# Set environment header +response = client.chat.completions.create( + model="auto", + messages=[{"role": "user", "content": "Test"}], + extra_headers={"x-environment": "staging"} +) +# Check logs: Should route to gpt-4o-mini +---- + + +Cost impact: + +* Prevents staging from inflating costs +* Example: Staging with 100K test requests/day + * GPT-4o: $500/day ($15K/month) + * GPT-4o-mini: $50/day ($1.5K/month) + * *Savings: $13.5K/month* + +Use case: Protect against runaway staging costs + + +=== Content-length guard rails + +When to use: Block or downgrade long prompts to prevent cost spikes + +Expression (Block): + +[source,cel] +---- +request.body.messages.size() > 10 || request.body.max_tokens > 4000 + ? "reject" + : "openai/gpt-4o" +---- + + +What happens: +* Requests with >10 messages or >4000 max_tokens → Rejected with 400 error +* Normal requests → GPT-4o + +Expression (Downgrade): + +[source,cel] +---- +request.body.messages.size() > 10 || request.body.max_tokens > 4000 + ? "openai/gpt-4o-mini" // Cheaper model + : "openai/gpt-4o" // Normal model +---- + + +What happens: + +* Long conversations → Downgraded to cheaper model +* Short conversations → Premium model + +Verify: + +[source,python] +---- +# Test rejection +response = client.chat.completions.create( + model="auto", + messages=[{"role": "user", "content": f"Message {i}"} for i in range(15)], + max_tokens=5000 +) +# Should return 400 error (rejected) + +# Test normal +response = client.chat.completions.create( + model="auto", + messages=[{"role": "user", "content": "Short message"}], + max_tokens=100 +) +# Should route to gpt-4o +---- + + +Cost impact: + +* Prevents unexpected bills from verbose prompts +* Example: Block requests >10K tokens (would cost $0.15 each) + +Use case: Staging cost controls, prevent prompt injection attacks that inflate token usage + +=== Topic-based routing + +When to use: Route different question types to specialized models + +Expression: + +[source,cel] +---- +request.body.messages[0].content.contains("code") || +request.body.messages[0].content.contains("debug") || +request.body.messages[0].content.contains("programming") + ? "openai/gpt-4o" // Better at code + : "anthropic/claude-sonnet-3.5" // Better at general writing +---- + + +What happens: + +* Coding questions → GPT-4o (optimized for code) +* General questions → Claude Sonnet (better prose) + +Verify: + +[source,python] +---- +# Test code question +response = client.chat.completions.create( + model="auto", + messages=[{"role": "user", "content": "Debug this Python code: ..."}] +) +# Check logs: Should route to gpt-4o + +# Test general question +response = client.chat.completions.create( + model="auto", + messages=[{"role": "user", "content": "Write a blog post about AI"}] +) +# Check logs: Should route to claude-sonnet-3.5 +---- + + +Cost impact: + +* Optimize model selection for task type +* Could improve quality without increasing costs + +Use case: Multi-purpose chatbot with both coding and general queries + + +=== Geographic/regional routing + +When to use: Route by user region for compliance or latency optimization + +Expression: + +[source,cel] +---- +request.headers["x-user-region"] == "eu" + ? "openai/gpt-4o-eu" // PLACEHOLDER: If regional models exist + : "openai/gpt-4o" +---- + + +What happens: + +* EU users → EU-region model (GDPR compliance) +* Other users → Default region + +Verify: + +[source,python] +---- +response = client.chat.completions.create( + model="auto", + messages=[{"role": "user", "content": "Test"}], + extra_headers={"x-user-region": "eu"} +) +# Check logs: Should route to EU model +---- + + +Cost impact: Neutral (same model, different region) + +Use case: GDPR compliance, data residency requirements + + +=== Customer-specific routing + +When to use: Different customers have different model access (enterprise features) + +Expression: + +[source,cel] +---- +request.headers["x-customer-id"] == "customer_vip_123" + ? "anthropic/claude-opus-4" // Most expensive, best quality + : "anthropic/claude-sonnet-3.5" // Standard +---- + + +What happens: + +* VIP customer → Best model +* Standard customers → Normal model + +Verify: + +[source,python] +---- +response = client.chat.completions.create( + model="auto", + messages=[{"role": "user", "content": "Test"}], + extra_headers={"x-customer-id": "customer_vip_123"} +) +# Check logs: Should route to claude-opus-4 +---- + + +Cost impact: + +* VIP: ~$7.50 per 1K requests +* Standard: ~$3.50 per 1K requests + +Use case: Enterprise contracts with premium model access + + +=== A/B testing (percentage-based routing) + +When to use: Test new models with a percentage of traffic + +// PLACEHOLDER: Confirm if CEL can access random functions or if A/B testing requires different mechanism + +Expression (if random is available): + +[source,cel] +---- +// PLACEHOLDER: Verify CEL random function availability +random() < 0.10 + ? "anthropic/claude-opus-4" // 10% traffic to new model + : "openai/gpt-4o" // 90% traffic to existing model +---- + + +Alternative (hash-based): + +[source,cel] +---- +// Use customer ID hash for stable routing +hash(request.headers["x-customer-id"]) % 100 < 10 + ? "anthropic/claude-opus-4" + : "openai/gpt-4o" +---- + + +What happens: + +* 10% of requests → New model (Opus 4) +* 90% of requests → Existing model (GPT-4o) + +Verify: + +[source,python] +---- +# Send 100 requests, count which model was used +for i in range(100): + response = client.chat.completions.create( + model="auto", + messages=[{"role": "user", "content": f"Test {i}"}], + extra_headers={"x-customer-id": f"customer_{i}"} + ) +# Check logs: ~10 should use opus-4, ~90 should use gpt-4o +---- + + +Cost impact: + +* Allows safe, incremental rollout of new models +* Monitor quality/cost for new model before full adoption + +Use case: Evaluate new models in production with real traffic + +=== Complexity-based routing + +When to use: Route simple queries to cheap models, complex queries to expensive models + +Expression: + +[source,cel] +---- +request.body.messages.size() == 1 && +request.body.messages[0].content.size() < 100 + ? "openai/gpt-4o-mini" // Simple, short question + : "openai/gpt-4o" // Complex or long conversation +---- + + +What happens: + +* Single short message (<100 chars) → Cheap model +* Multi-turn or long messages → Premium model + +Verify: + +[source,python] +---- +# Test simple +response = client.chat.completions.create( + model="auto", + messages=[{"role": "user", "content": "Hi"}] # 2 chars +) +# Check logs: Should route to gpt-4o-mini + +# Test complex +response = client.chat.completions.create( + model="auto", + messages=[ + {"role": "user", "content": "Long question here..." * 10}, + {"role": "assistant", "content": "Response"}, + {"role": "user", "content": "Follow-up"} + ] +) +# Check logs: Should route to gpt-4o +---- + + +Cost impact: + +* Can reduce costs significantly if simple queries are common +* Example: 50% of queries are simple, save 90% on those = 45% total savings + +Use case: FAQ chatbot with mix of simple lookups and complex questions + +=== Time-based routing + +When to use: Use cheaper models during off-peak hours + +// PLACEHOLDER: Confirm if CEL has access to current timestamp + +Expression (if time functions available): + +[source,cel] +---- +// PLACEHOLDER: Verify CEL time function availability +now().hour >= 22 || now().hour < 6 // 10pm - 6am + ? "openai/gpt-4o-mini" // Off-peak: cheaper model + : "openai/gpt-4o" // Peak hours: best model +---- + + +What happens: + +* Off-peak hours (10pm-6am) → Cheap model +* Peak hours (6am-10pm) → Premium model + +Cost impact: + +* Optimize for user experience during peak usage +* Save costs during low-traffic hours + +Use case: Consumer apps with time-zone-specific usage patterns + + +=== Fallback chain (multi-level) + +When to use: Complex fallback logic beyond simple primary/secondary + +Expression: + +[source,cel] +---- +request.headers["x-priority"] == "critical" + ? "openai/gpt-4o" // First choice for critical + : request.headers["x-user-tier"] == "premium" + ? "anthropic/claude-sonnet-3.5" // Second choice for premium + : "openai/gpt-4o-mini" // Default for everyone else +---- + + +What happens: + +* Critical requests → Always GPT-4o +* Premium non-critical → Claude Sonnet +* Everyone else → GPT-4o-mini + +Verify: Test with different header combinations + +Cost impact: Ensures SLA for critical requests while optimizing costs elsewhere + +Use case: Production systems with SLA requirements + + +== Advanced CEL patterns + +=== Default values with `has()` + +Problem: Field might not exist in request + +Expression: + +[source,cel] +---- +has(request.body.max_tokens) && request.body.max_tokens > 2000 + ? "openai/gpt-4o" // Long response expected + : "openai/gpt-4o-mini" // Short response +---- + + +What happens: Safely checks if `max_tokens` exists before comparing + +=== Multiple conditions with parentheses + +Expression: + +[source,cel] +---- +(request.headers["x-user-tier"] == "premium" || + request.headers["x-customer-id"] == "vip_123") && +request.headers["x-environment"] == "production" + ? "openai/gpt-4o" + : "openai/gpt-4o-mini" +---- + + +What happens: Premium users OR VIP customer, AND production → GPT-4o + +=== Regex matching + +Expression: + +[source,cel] +---- +request.body.messages[0].content.matches("(?i)(urgent|asap|emergency)") + ? "openai/gpt-4o" // Route urgent requests to best model + : "openai/gpt-4o-mini" +---- + + +What happens: Messages containing "urgent", "ASAP", or "emergency" (case-insensitive) → GPT-4o + +=== String array contains + +Expression: + +[source,cel] +---- +["customer_1", "customer_2", "customer_3"].exists(c, c == request.headers["x-customer-id"]) + ? "openai/gpt-4o" // Whitelist of customers + : "openai/gpt-4o-mini" +---- + + +What happens: Only specific customers get premium model + +=== Reject invalid requests + +Expression: + +[source,cel] +---- +!has(request.body.messages) || request.body.messages.size() == 0 + ? "reject" // PLACEHOLDER: Confirm "reject" is supported + : "openai/gpt-4o" +---- + + +What happens: Requests without messages are rejected (400 error) + +== Test CEL expressions + +=== Option 1: CEL editor in UI (if available) + +// PLACEHOLDER: Add screenshot if UI has CEL editor with test mode + +1. Navigate to Gateways → Routing Rules +2. Enter CEL expression +3. Click "Test" +4. Input test headers/body +5. View evaluated result + +=== Option 2: Send test requests + +[source,python] +---- +def test_cel_routing(headers, messages): + """Test CEL routing with specific headers and messages""" + response = client.chat.completions.create( + model="auto", # PLACEHOLDER: Confirm trigger for CEL routing + messages=messages, + extra_headers=headers, + max_tokens=10 # Keep it cheap + ) + + # Check logs to see which model was used + print(f"Headers: {headers}") + print(f"Routed to: {response.model}") # PLACEHOLDER: Does response include actual model? + +# Test tier-based routing +test_cel_routing( + {"x-user-tier": "premium"}, + [{"role": "user", "content": "Test"}] +) +test_cel_routing( + {"x-user-tier": "free"}, + [{"role": "user", "content": "Test"}] +) +---- + + +=== Option 3: CLI test (if available) + +[source,bash] +---- +# PLACEHOLDER: If CLI tool exists for testing CEL +rpk cloud ai-gateway test-cel \ + --gateway-id gw_abc123 \ + --expression 'request.headers["tier"] == "premium" ? "openai/gpt-4o" : "openai/gpt-4o-mini"' \ + --header 'tier: premium' \ + --body '{"messages": [{"role": "user", "content": "Test"}]}' + +# Expected output: openai/gpt-4o +---- + + +== Common CEL errors + +=== Error: "unknown field" + +Symptom: + +[source,text] +---- +Error: Unknown field 'request.headers.x-user-tier' +---- + + +Cause: Wrong syntax (dot notation instead of bracket notation for headers) + +Fix: + +[source,cel] +---- +// Wrong +request.headers.x-user-tier + +// Correct +request.headers["x-user-tier"] +---- + + +=== Error: "type mismatch" + +Symptom: + +[source,text] +---- +Error: Type mismatch: expected bool, got string +---- + + +Cause: Forgot comparison operator + +Fix: + +[source,cel] +---- +// Wrong (returns string) +request.headers["tier"] + +// Correct (returns bool) +request.headers["tier"] == "premium" +---- + + +=== Error: "field does not exist" + +Symptom: + +[source,text] +---- +Error: No such key: max_tokens +---- + + +Cause: Accessing field that doesn't exist in request + +Fix: +[source,cel] +---- +// Wrong (crashes if max_tokens not in request) +request.body.max_tokens > 1000 + +// Correct (checks existence first) +has(request.body.max_tokens) && request.body.max_tokens > 1000 +---- + + +=== Error: "index out of bounds" + +Symptom: + +[source,text] +---- +Error: Index 0 out of bounds for array of size 0 +---- + + +Cause: Accessing array element that doesn't exist + +Fix: + +[source,cel] +---- +// Wrong (crashes if messages empty) +request.body.messages[0].content.contains("test") + +// Correct (checks size first) +request.body.messages.size() > 0 && request.body.messages[0].content.contains("test") +---- + + +== CEL performance considerations + +=== Expression complexity + +Fast (<1ms evaluation): + +[source,cel] +---- +request.headers["tier"] == "premium" ? "openai/gpt-4o" : "openai/gpt-4o-mini" +---- + + +Slower (~5-10ms evaluation): + +[source,cel] +---- +request.body.messages[0].content.matches("complex.*regex.*pattern") +---- + + +Recommendation: Keep expressions simple. Complex regex can add latency. + +=== Number of evaluations + +Each request evaluates CEL expression once. Total latency impact: +* Simple expression: <1ms +* Complex expression: ~5-10ms + +*Acceptable for most use cases.* + +== CEL function reference + +// PLACEHOLDER: Comprehensive list of available CEL functions in AI Gateway + +=== String functions + +[cols="2,3,3"] +|=== +| Function | Description | Example + +| `size()` +| String length +| `"hello".size() == 5` + +| `contains(s)` +| String contains +| `"hello".contains("ell")` + +| `startsWith(s)` +| String starts with +| `"hello".startsWith("he")` + +| `endsWith(s)` +| String ends with +| `"hello".endsWith("lo")` + +| `matches(regex)` +| Regex match +| `"hello".matches("h.*o")` +|=== + +=== Array functions + +[cols="2,3,3"] +|=== +| Function | Description | Example + +| `size()` +| Array length +| `[1,2,3].size() == 3` + +| `exists(x, cond)` +| Any element matches +| `[1,2,3].exists(x, x > 2)` + +| `all(x, cond)` +| All elements match +| `[1,2,3].all(x, x > 0)` +|=== + +=== Utility functions + +[cols="2,3,3"] +|=== +| Function | Description | Example + +| `has(field)` +| Field exists +| `has(request.body.max_tokens)` +|=== + +// PLACEHOLDER: Other functions like hash(), random(), now()? + +== Next steps + +* *Apply CEL routing*: See the gateway configuration options available in the Redpanda Cloud console. diff --git a/modules/ai-agents/pages/ai-gateway/gateway-architecture.adoc b/modules/ai-agents/pages/ai-gateway/gateway-architecture.adoc new file mode 100644 index 000000000..5295f2134 --- /dev/null +++ b/modules/ai-agents/pages/ai-gateway/gateway-architecture.adoc @@ -0,0 +1,149 @@ += AI Gateway Architecture +:description: Technical architecture of Redpanda AI Gateway, including request lifecycle, supported providers, deployment models, and implementation details. +:page-topic-type: concept +:personas: app_developer, platform_admin +:learning-objective-1: Describe the three architectural planes of AI Gateway +:learning-objective-2: Explain the request lifecycle through policy evaluation stages +:learning-objective-3: Identify supported providers, features, and current limitations + +include::ai-agents:partial$ai-gateway-byoc-note.adoc[] + +This page provides technical details about AI Gateway's architecture, request processing, and capabilities. For an introduction to AI Gateway and the problems it solves, see xref:ai-agents:ai-gateway/what-is-ai-gateway.adoc[] + +== Architecture overview + +AI Gateway consists of three planes: a control plane for configuration and management, a data plane for request processing and routing, and an observability plane for monitoring and analytics. + +// PLACEHOLDER: Add architecture diagram showing: +// 1. Control Plane: +// - Workspace management +// - Provider/model configuration +// - Gateway creation and policy definition +// - Admin console +// +// 2. Data Plane: +// - Request ingestion +// - Policy evaluation (rate limits → spend limits → routing → execution) +// - Provider pool selection and failover +// - MCP aggregation layer +// - Response logging and metrics +// +// 3. Observability Plane: +// - Request logs storage +// - Metrics aggregation +// - Dashboard UI + +=== Control plane + +The control plane manages gateway configuration and policy definition: + +* **Workspace management**: Multi-tenant isolation with separate namespaces for different teams or environments +* **Provider configuration**: Enable and configure LLM providers (OpenAI, Anthropic, etc.) +* **Gateway creation**: Define gateways with specific routing rules, budgets, and rate limits +* **Policy definition**: Create CEL-based routing policies, spend limits, and rate limits +* **MCP server registration**: Configure which MCP servers are available to agents + +=== Data plane + +The data plane handles all runtime request processing: + +* **Request ingestion**: Accept requests via OpenAI-compatible API endpoints +* **Authentication**: Validate API keys and gateway access +* **Policy evaluation**: Apply rate limits, spend limits, and routing policies +* **Provider pool management**: Select primary or fallback providers based on availability +* **MCP aggregation**: Aggregate tools from multiple MCP servers with deferred loading +* **Response transformation**: Normalize provider-specific responses to OpenAI format +* **Metrics collection**: Record token usage, latency, and cost for every request + +=== Observability plane + +The observability plane provides monitoring and analytics: + +* **Request logs**: Store full request/response history with prompt and completion content +* **Metrics aggregation**: Calculate token usage, costs, latency percentiles, and error rates +* **Dashboard UI**: Display real-time and historical analytics per gateway, model, or provider +* **Cost tracking**: Estimate spend based on provider pricing and token consumption + +== Request lifecycle + +When a request flows through AI Gateway, it passes through several policy and routing stages before reaching the LLM provider. Understanding this lifecycle helps you configure policies effectively and troubleshoot issues: + +. Application sends request to gateway endpoint with `rp-aigw-id` header +. Gateway authenticates request +. Rate limit policy evaluates (allow/deny) +. Spend limit policy evaluates (allow/deny) +. Routing policy evaluates (which model/provider to use) +. Provider pool selects backend (primary/fallback) +. Request forwarded to LLM provider +. Response returned to application +. Request logged with tokens, cost, latency, status + +Each policy evaluation happens synchronously in the request path. If rate limits or spend limits reject the request, the gateway returns an error immediately without calling the LLM provider, which helps you control costs. + +=== MCP tool request lifecycle + +For MCP tool requests, the lifecycle differs slightly to support deferred tool loading: + +. Application discovers tools via `/mcp` endpoint +. Gateway aggregates tools from approved MCP servers +. Application receives search + orchestrator tools (deferred loading) +. Application invokes specific tool +. Gateway routes to appropriate MCP server +. Tool execution result returned +. Request logged with execution time, status + +The gateway only loads and exposes specific tools when requested, which dramatically reduces the token overhead compared to loading all tools upfront. + +== Supported features + +=== LLM providers + +* OpenAI +* Anthropic +* // PLACEHOLDER: Google, AWS Bedrock, Azure OpenAI, others? + +=== API compatibility + +* OpenAI-compatible `/v1/chat/completions` endpoint +* // PLACEHOLDER: Streaming support? +* // PLACEHOLDER: Embeddings support? +* // PLACEHOLDER: Other endpoints? + +=== Policy features + +* CEL-based routing expressions +* Rate limiting (// PLACEHOLDER: per-gateway, per-header, per-tenant?) +* Monthly spend limits (// PLACEHOLDER: per-gateway, per-workspace?) +* Provider pools with automatic failover +* // PLACEHOLDER: Caching support? + +=== MCP support + +* MCP server aggregation +* Deferred tool loading (often 80-90% token reduction depending on configuration) +* JavaScript orchestrator for multi-step workflows +* // PLACEHOLDER: Tool execution sandboxing? + +=== Observability + +* Request logs with full prompt/response history +* Token usage tracking +* Estimated cost per request +* Latency metrics +* // PLACEHOLDER: Metrics export? OpenTelemetry support? + +== Current limitations + +* // PLACEHOLDER: List current limitations, for example: +** // - Custom model deployments (Azure OpenAI BYOK, AWS Bedrock custom models) +** // - Response caching +** // - Prompt templates/versioning +** // - Guardrails (PII detection, content moderation) +** // - Multi-region active-active deployment +** // - Metrics export to external systems +** // - Budget alerts/notifications + +== Next steps + +* xref:ai-agents:ai-gateway/gateway-quickstart.adoc[]: Route your first request through AI Gateway +* xref:ai-agents:ai-gateway/mcp-aggregation-guide.adoc[]: Configure MCP server aggregation for AI agents diff --git a/modules/ai-agents/pages/ai-gateway/gateway-quickstart.adoc b/modules/ai-agents/pages/ai-gateway/gateway-quickstart.adoc new file mode 100644 index 000000000..ffcf58aeb --- /dev/null +++ b/modules/ai-agents/pages/ai-gateway/gateway-quickstart.adoc @@ -0,0 +1,569 @@ += AI Gateway Quickstart +:description: Get started with AI Gateway by configuring providers, creating your first gateway, and routing requests through unified LLM endpoints. +:page-topic-type: quickstart +:personas: app_developer, platform_admin +:learning-objective-1: Enable an LLM provider and create your first gateway +:learning-objective-2: Route your first request through AI Gateway and verify it works +:learning-objective-3: View request logs and token usage in the observability dashboard + +include::ai-agents:partial$ai-gateway-byoc-note.adoc[] + +Redpanda AI Gateway provides unified access to multiple Large Language Model (LLM) providers and Model Context Protocol (MCP) servers through a single endpoint. This quickstart walks you through configuring your first gateway and routing requests through it. + +== Prerequisites + +Before starting, ensure you have: + +* Access to the AI Gateway UI (provided by your administrator) +* Admin permissions to configure providers and gateways +* API key for at least one LLM provider (OpenAI or Anthropic) +* Python 3.8+, Node.js 18+, or cURL (for testing) + +== Configure a provider + +Providers represent upstream LLM services (OpenAI, Anthropic) and their associated credentials. Providers are disabled by default and must be enabled explicitly. + +. In AI Gateways, navigate to *Providers*. +. Select a provider (for example, OpenAI or Anthropic). +. On the *Configuration* tab, click *Add configuration* and enter your API Key. +. Verify the provider status shows "Active". + +AI Gateway currently supports: + +* OpenAI +* Anthropic + +== Enable models + +After enabling a provider, enable the specific models you want to make available through your gateways. + +. Navigate to *Models*. +. Enable the models you want to use (for example, `gpt-4o`, `gpt-4o-mini`, `claude-3-5-sonnet-20241022`). +. Verify the models appear as "Enabled" in the catalog. + +TIP: Different providers have different reliability and cost characteristics. When choosing models, consider your use case requirements for quality, speed, and cost. + +=== Model naming convention + +Requests through AI Gateway must use the `vendor/model_id` format. For example: + +* OpenAI models: `openai/gpt-4o`, `openai/gpt-4o-mini` +* Anthropic models: `anthropic/claude-3-5-sonnet-20241022`, `anthropic/claude-opus-4-1-20250805` + +This format allows the gateway to route requests to the correct provider. + +== Create a gateway + +A gateway is a logical configuration boundary that defines routing policies, rate limits, spend limits, and observability scope. You can create separate gateways per team, environment (staging/production), or customer. + +. Navigate to *Gateways*. +. Click *Create Gateway*. +. Configure the gateway: ++ +* *Name*: Choose a descriptive name (for example, `my-first-gateway`) +* *Workspace*: Select a workspace (conceptually similar to a resource group) +* *Description*: Optional metadata for documentation + +. After creation, copy the *Gateway Endpoint* and *Gateway ID* from the gateway detail page. You'll need these for sending requests. + +Your gateway endpoint format: +---- +Gateway Endpoint: https://gw.ai.panda.com +Gateway ID: gw_abc123... +---- + +Common gateway patterns: + +* *Environment separation*: Create separate gateways for staging and production +* *Team isolation*: One gateway per team for budget tracking +* *Customer multi-tenancy*: One gateway per customer for isolated policies + +== Send your first request + +Now that you've configured a provider and created a gateway, send a test request to verify everything works. + +[tabs] +==== +Python:: ++ +-- +[source,python] +---- +from openai import OpenAI + +# Configure client to use AI Gateway +client = OpenAI( + base_url="https://gw.ai.panda.com", # Your gateway endpoint + api_key="", # Your Redpanda API key + default_headers={ + "rp-aigw-id": "gw_abc123..." # Your gateway ID + } +) + +# Send a request (note the vendor/model_id format) +response = client.chat.completions.create( + model="openai/gpt-4o-mini", # Format: {provider}/{model} + messages=[ + {"role": "user", "content": "Say 'Hello from AI Gateway!'"} + ], + max_tokens=20 +) + +print(response.choices[0].message.content) +# Expected output: Hello from AI Gateway! +---- +-- + +TypeScript/JavaScript:: ++ +-- +[source,typescript] +---- +import OpenAI from 'openai'; + +const client = new OpenAI({ + baseURL: 'https://gw.ai.panda.com', + apiKey: process.env.REDPANDA_API_KEY, + defaultHeaders: { + 'rp-aigw-id': 'gw_abc123...' + } +}); + +const response = await client.chat.completions.create({ + model: 'openai/gpt-4o-mini', + messages: [ + { role: 'user', content: 'Say "Hello from AI Gateway!"' } + ], + max_tokens: 20 +}); + +console.log(response.choices[0].message.content); +// Expected output: Hello from AI Gateway! +---- +-- + +cURL:: ++ +-- +[source,bash] +---- +curl https://gw.ai.panda.com/v1/chat/completions \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer ${REDPANDA_API_KEY}" \ + -H "rp-aigw-id: gw_abc123..." \ + -d '{ + "model": "openai/gpt-4o-mini", + "messages": [ + {"role": "user", "content": "Say \"Hello from AI Gateway!\""} + ], + "max_tokens": 20 + }' +---- + +Expected response: + +[source,json] +---- +{ + "id": "chatcmpl-...", + "object": "chat.completion", + "created": 1704844800, + "model": "openai/gpt-4o-mini", + "choices": [ + { + "index": 0, + "message": { + "role": "assistant", + "content": "Hello from AI Gateway!" + }, + "finish_reason": "stop" + } + ], + "usage": { + "prompt_tokens": 8, + "completion_tokens": 5, + "total_tokens": 13 + } +} +---- +-- +==== + +=== Troubleshooting + +If your request fails, check these common issues: + +* *401 Unauthorized*: Verify your API key is valid +* *404 Not Found*: Confirm the base URL matches your gateway endpoint +* *Model not found*: Ensure the model is enabled in Step 2 +* *Missing rp-aigw-id*: Add the gateway ID header to your request + +== Verify in observability dashboard + +Confirm your request appears in the AI Gateway observability dashboard. + +// PLACEHOLDER: Add UI navigation path + +. Navigate to the observability dashboard for your gateway. +. Filter by: ++ +* *Gateway*: `my-first-gateway` +* *Model*: `openai/gpt-4o-mini` +* *Time range*: Last 5 minutes + +. Verify the request log shows: ++ +* *Model*: `openai/gpt-4o-mini` +* *Provider*: OpenAI +* *Status*: 200 (success) +* *Prompt tokens*: ~8 +* *Completion tokens*: ~5 +* *Estimated cost*: Based on provider pricing +* *Latency*: Response time in milliseconds + +. Click the request to expand and view: ++ +* Full prompt and response content +* Request headers +* Routing decision details + +If your request doesn't appear: + +* Wait a few seconds for logs to populate (there may be a brief delay) +* Verify the gateway ID in your request matches the gateway you're viewing +* Check that your client received a successful response + +== Configure LLM routing (optional) + +Configure rate limits, spend limits, and provider pools with failover. + +On the Gateways page, select the *LLM* tab to configure routing policies. The LLM routing pipeline represents the request lifecycle: + +. *Rate Limit*: Control request throughput (for example, 100 requests/second) +. *Spend Limit*: Set monthly budget caps (for example, $15K/month with blocking enforcement) +. *Provider Pools*: Define primary and fallback providers + +=== Configure provider pool with fallback + +For high availability, configure a fallback provider that activates when the primary fails: + +. Add a second provider (for example, Anthropic). +. In your gateway's *LLM* routing configuration: ++ +* *Primary pool*: OpenAI (preferred for quality) +* *Fallback pool*: Anthropic (activates on rate limits, timeouts, or errors) + +. Save the configuration. + +The gateway automatically routes to the fallback when it detects: + +* Rate limit exceeded +* Request timeout +* 5xx server errors from primary provider + +Monitor the fallback rate in observability to detect primary provider issues early. + +== Configure MCP tools (optional) + +If you're using AI agents, configure MCP (Model Context Protocol) tool aggregation. + +On the Gateways page, select the *MCP* tab to configure tool discovery and execution. The MCP proxy aggregates multiple MCP servers behind a single endpoint, allowing agents to discover and call tools through the gateway. + +Configure the MCP settings: + +* *Display name*: Descriptive name for the provider pool +* *Model*: Choose which model handles tool execution +* *Load balancing*: If multiple providers are available, select a strategy (for example, round robin) + +=== Available MCP tools + +The gateway provides these built-in MCP tools: + +* *Data catalog API*: Query your data catalog +* *Memory store*: Persistent storage for agent state +* *Vector search*: Semantic search over embeddings +* *MCP orchestrator*: Built-in tool for programmatic multi-tool workflows + +The *MCP orchestrator* enables agents to generate JavaScript code that calls multiple tools in a single orchestrated step, reducing round trips. For example, a workflow requiring 47 file reads can be reduced from 49 round trips to just 1. + +To add external tools (for example, Slack, GitHub), add their MCP server endpoints to your gateway configuration. + +=== Deferred tool loading + +When many tools are aggregated, listing all tools upfront can consume significant tokens. With *deferred tool loading*, the MCP gateway initially returns only: + +* A tool search capability +* The MCP orchestrator + +Agents then search for specific tools they need, retrieving only that subset. This can reduce token usage by 80-90% when you have many tools configured. + +// REVIEWERS: When/how exactly do you use the orchestrator? Also what happens after they create a gateway? Please provide an example of how to validate end-to-end routing against the gateway endpoint! + +// REVIEWERS: How do users connect to the ADP catalog + MCP servers exposed through RPCN? + +== Configure CEL routing rule (optional) + +Use CEL (Common Expression Language) expressions to route requests dynamically based on headers, content, or other request properties. + +The AI Gateway uses CEL for flexible routing without code changes. Use CEL to: + +* Route premium users to better models +* Apply different rate limits based on user tiers +* Enforce policies based on request content + +=== Add a routing rule + +In your gateway's routing configuration: + +. Add a CEL expression to route based on user tier: ++ +[source,cel] +---- +# Route based on user tier header +request.headers["x-user-tier"] == "premium" + ? "openai/gpt-4o" + : "openai/gpt-4o-mini" +---- + +. Save the rule. + +The gateway editor helps you discover available request fields (headers, path, body, and so on). + +=== Test the routing rule + +Send requests with different headers to verify routing: + +*Premium user request*: + +[source,python] +---- +response = client.chat.completions.create( + model="openai/gpt-4o", # Will be routed based on CEL rule + messages=[{"role": "user", "content": "Hello"}], + extra_headers={"x-user-tier": "premium"} +) +# Should route to gpt-4o (premium model) +---- + +*Free user request*: + +[source,python] +---- +response = client.chat.completions.create( + model="openai/gpt-4o-mini", + messages=[{"role": "user", "content": "Hello"}], + extra_headers={"x-user-tier": "free"} +) +# Should route to gpt-4o-mini (cost-effective model) +---- + +Check the observability dashboard to verify: + +* The correct model was selected based on the header value +* The routing decision explanation shows which CEL rule matched + +=== Common CEL patterns + +Route based on model family: + +[source,cel] +---- +request.body.model.startsWith("anthropic/") +---- + +Apply a rule to all requests: + +[source,cel] +---- +true +---- + +Guard for field existence: + +[source,cel] +---- +has(request.body.max_tokens) && request.body.max_tokens > 1000 +---- + +For more CEL examples, see xref:ai-agents:ai-gateway/cel-routing-cookbook.adoc[]. + +== Connect AI tools to your gateway + +The AI Gateway provides standardized endpoints that work with various AI development tools. This section shows how to configure popular tools. + +=== MCP endpoint + +If you've configured MCP tools in your gateway, AI agents can connect to the aggregated MCP endpoint: + +* *MCP endpoint URL*: `https://gw.ai.panda.com/mcp` +* *Required headers*: +** `Authorization: Bearer ` +** `rp-aigw-id: ` + +This endpoint aggregates all MCP servers configured in your gateway. + +=== Environment variables + +For consistent configuration, set these environment variables: + +[source,bash] +---- +export REDPANDA_GATEWAY_URL="https://gw.ai.panda.com" +export REDPANDA_GATEWAY_ID="" +export REDPANDA_API_KEY="" +---- + +=== Claude Code + +Configure Claude Code using HTTP transport for the MCP connection: + +[source,bash] +---- +claude mcp add --transport http redpanda-aigateway https://gw.ai.panda.com/mcp \ + --header "Authorization: Bearer " \ + --header "rp-aigw-id: " +---- + +Alternatively, edit `~/.claude/config.json`: + +[source,json] +---- +{ + "mcpServers": { + "redpanda-ai-gateway": { + "transport": "http", + "url": "https://gw.ai.panda.com/mcp", + "headers": { + "Authorization": "Bearer ", + "rp-aigw-id": "" + } + } + }, + "apiProviders": { + "redpanda": { + "baseURL": "https://gw.ai.panda.com", + "headers": { + "rp-aigw-id": "" + } + } + } +} +---- + +For detailed Claude Code setup, see xref:ai-agents:ai-gateway/integrations/claude-code-user.adoc[]. + +=== Continue.dev + +Edit your Continue config file (`~/.continue/config.json`): + +[source,json] +---- +{ + "models": [ + { + "title": "Redpanda AI Gateway - GPT-4", + "provider": "openai", + "model": "openai/gpt-4", + "apiBase": "https://gw.ai.panda.com", + "apiKey": "", + "requestOptions": { + "headers": { + "rp-aigw-id": "" + } + } + }, + { + "title": "Redpanda AI Gateway - Claude", + "provider": "anthropic", + "model": "anthropic/claude-3-5-sonnet-20241022", + "apiBase": "https://gw.ai.panda.com", + "apiKey": "", + "requestOptions": { + "headers": { + "rp-aigw-id": "" + } + } + } + ] +} +---- + +For detailed Continue setup, see xref:ai-agents:ai-gateway/integrations/continue-user.adoc[]. + +=== Cursor IDE + +Configure Cursor in Settings (*Cursor* → *Settings* or `Cmd+,`): + +[source,json] +---- +{ + "cursor.ai.providers.openai.apiBase": "https://gw.ai.panda.com", + "cursor.ai.providers.openai.defaultHeaders": { + "rp-aigw-id": "" + } +} +---- + +For detailed Cursor setup, see xref:ai-agents:ai-gateway/integrations/cursor-user.adoc[]. + +=== Custom applications + +For custom applications using OpenAI or Anthropic SDKs: + +*Python with OpenAI SDK*: + +[source,python] +---- +from openai import OpenAI + +client = OpenAI( + base_url="https://gw.ai.panda.com", + api_key="", + default_headers={ + "rp-aigw-id": "" + } +) +---- + +*Python with Anthropic SDK*: + +[source,python] +---- +from anthropic import Anthropic + +client = Anthropic( + base_url="https://gw.ai.panda.com", + api_key="", + default_headers={ + "rp-aigw-id": "" + } +) +---- + +*Node.js with OpenAI SDK*: + +[source,javascript] +---- +import OpenAI from 'openai'; + +const openai = new OpenAI({ + baseURL: 'https://gw.ai.panda.com', + apiKey: process.env.REDPANDA_API_KEY, + defaultHeaders: { + 'rp-aigw-id': '' + } +}); +---- + +== Next steps + +Explore advanced AI Gateway features: + +* xref:ai-agents:ai-gateway/cel-routing-cookbook.adoc[]: Advanced CEL routing patterns for traffic distribution and cost optimization +* xref:ai-agents:ai-gateway/mcp-aggregation-guide.adoc[]: Configure MCP server aggregation and deferred tool loading +* xref:ai-agents:ai-gateway/integrations/index.adoc[]: Connect more AI development tools + +Learn about the architecture: + +* xref:ai-agents:ai-gateway/gateway-architecture.adoc[]: Technical architecture, request lifecycle, and deployment models +* xref:ai-agents:ai-gateway/what-is-ai-gateway.adoc[]: Problems AI Gateway solves and common use cases diff --git a/modules/ai-agents/pages/ai-gateway/index.adoc b/modules/ai-agents/pages/ai-gateway/index.adoc new file mode 100644 index 000000000..d8be560a2 --- /dev/null +++ b/modules/ai-agents/pages/ai-gateway/index.adoc @@ -0,0 +1,8 @@ += AI Gateway +:description: Unified access layer for LLM providers and AI tools with centralized routing, policy enforcement, cost management, and observability. +:page-layout: index +:personas: platform_admin, app_developer, evaluator + +include::ai-agents:partial$ai-gateway-byoc-note.adoc[] + +Redpanda AI Gateway provides a unified access layer for LLM providers and AI tools that sits between your applications and the AI services they use. It delivers centralized routing, policy enforcement, cost management, and observability for all your AI traffic. \ No newline at end of file diff --git a/modules/ai-agents/pages/ai-gateway/integrations/claude-code-admin.adoc b/modules/ai-agents/pages/ai-gateway/integrations/claude-code-admin.adoc new file mode 100644 index 000000000..08ab8c01d --- /dev/null +++ b/modules/ai-agents/pages/ai-gateway/integrations/claude-code-admin.adoc @@ -0,0 +1,501 @@ += Configure AI Gateway for Claude Code +:description: Configure Redpanda AI Gateway to support Claude Code clients. +:page-topic-type: how-to +:personas: platform_admin +:learning-objective-1: Configure AI Gateway endpoints for Claude Code connectivity +:learning-objective-2: Set up authentication and access control for Claude Code clients +:learning-objective-3: Deploy MCP tool aggregation for Claude Code tool discovery + +include::ai-agents:partial$ai-gateway-byoc-note.adoc[] + +Configure Redpanda AI Gateway to support Claude Code clients accessing LLM providers and MCP tools through a unified endpoint. + +After reading this page, you will be able to: + +* [ ] Configure AI Gateway endpoints for Claude Code connectivity. +* [ ] Set up authentication and access control for Claude Code clients. +* [ ] Deploy MCP tool aggregation for Claude Code tool discovery. + +== Prerequisites + +* AI Gateway deployed on a BYOC cluster running Redpanda version 25.3 or later +* Administrator access to the AI Gateway UI +* At least one LLM provider API key (OpenAI or Anthropic) +* Understanding of xref:ai-agents:ai-gateway/gateway-architecture.adoc[AI Gateway concepts] + +== Architecture overview + +Claude Code connects to AI Gateway through two primary endpoints: + +* LLM endpoint: `https://{CLUSTER_ID}.cloud.redpanda.com/ai-gateway/v1` for chat completions +* MCP endpoint: `https://{CLUSTER_ID}.cloud.redpanda.com/ai-gateway/mcp` for tool discovery and execution + +The gateway handles: + +. Authentication via bearer tokens in the `Authorization` header +. Gateway selection via the `rp-aigw-id` header +. Model routing using the `vendor/model_id` format +. MCP server aggregation for multi-tool workflows +. Request logging and cost tracking per gateway + +== Enable LLM providers + +Claude Code requires access to LLM providers through the gateway. Enable at least one provider. + +=== Configure Anthropic + +Claude Code uses Anthropic models by default. To enable Anthropic: + +. Navigate to *AI Gateway* > *Providers* in the Redpanda Cloud console +. Select *Anthropic* from the provider list +. Click *Add configuration* +. Enter your Anthropic API key +. Click *Save* + +The gateway can now route requests to Anthropic models. + +=== Configure OpenAI + +To enable OpenAI as a provider: + +. Navigate to *AI Gateway* > *Providers* +. Select *OpenAI* from the provider list +. Click *Add configuration* +. Enter your OpenAI API key +. Click *Save* + +=== Enable models in the catalog + +After enabling providers, enable specific models: + +. Navigate to *AI Gateway* > *Models* +. Enable the models you want Claude Code clients to access ++ +Common models for Claude Code: ++ +* `anthropic/claude-opus-4-5` +* `anthropic/claude-sonnet-4-5` +* `openai/gpt-4o` +* `openai/o1-mini` + +. Click *Save* + +Models appear in the catalog with the `vendor/model_id` format that Claude Code uses in requests. + +== Create a gateway for Claude Code clients + +Create a dedicated gateway to isolate Claude Code traffic and apply specific policies. + +=== Gateway configuration + +. Navigate to *AI Gateway* > *Gateways* +. Click *Create Gateway* +. Enter gateway details: ++ +[cols="1,2"] +|=== +|Field |Value + +|Name +|`claude-code-gateway` (or your preferred name) + +|Workspace +|Select the workspace for access control grouping + +|Description +|Gateway for Claude Code IDE clients +|=== + +. Click *Create* +. Copy the gateway ID from the gateway details page + +The gateway ID is required in the `rp-aigw-id` header for all requests. + +=== Configure LLM routing + +Set up routing policies for Claude Code requests. + +==== Basic routing with failover + +Configure a primary provider with automatic failover: + +. Navigate to the gateway's *LLM* tab +. Under *Routing*, click *Add route* +. Configure the route: ++ +[source,cel] +---- +true # Matches all requests +---- + +. Add a *Primary provider pool*: ++ +* Provider: Anthropic +* Model: All enabled Anthropic models +* Load balancing: Round robin (if multiple Anthropic configurations exist) + +. Add a *Fallback provider pool*: ++ +* Provider: OpenAI +* Model: All enabled OpenAI models +* Failover conditions: Rate limits, timeouts, 5xx errors + +. Click *Save* + +Claude Code requests route to Anthropic by default and fail over to OpenAI if Anthropic is unavailable. + +==== User-based routing + +Route requests based on user identity (if Claude Code passes user identifiers): + +[source,cel] +---- +request.headers["x-user-tier"][0] == "premium" +---- + +Create separate routes: + +* Premium route: Claude Opus 4.5 (highest quality) +* Standard route: Claude Sonnet 4.5 (balanced cost and quality) + +=== Apply rate limits + +Prevent runaway usage from Claude Code clients: + +. Navigate to the gateway's *LLM* tab +. Under *Rate Limit*, configure: ++ +[cols="1,2"] +|=== +|Setting |Recommended Value + +|Global rate limit +|100 requests per minute + +|Per-user rate limit +|10 requests per minute (if using user headers) +|=== + +. Click *Save* + +The gateway blocks requests exceeding these limits and returns HTTP 429 errors. + +=== Set spending limits + +Control LLM costs: + +. Under *Spend Limit*, configure: ++ +[cols="1,2"] +|=== +|Setting |Value + +|Monthly budget +|$5,000 (adjust based on expected usage) + +|Enforcement +|Block requests after budget exceeded +|=== + +. Click *Save* + +The gateway tracks estimated costs per request and blocks traffic when the monthly budget is exhausted. + +== Configure MCP tool aggregation + +Enable Claude Code to discover and use tools from multiple MCP servers through a single endpoint. + +=== Add MCP servers + +. Navigate to the gateway's *MCP* tab +. Click *Add MCP Server* +. Enter server details: ++ +[cols="1,2"] +|=== +|Field |Value + +|Display name +|Descriptive name (for example, `redpanda-data-catalog`) + +|Endpoint URL +|MCP server endpoint (for example, xref:ai-agents:mcp/remote/overview.adoc[Remote MCP server] URL) + +|Authentication +|Bearer token or other authentication mechanism +|=== + +. Click *Save* + +Repeat for each MCP server you want to aggregate. + +=== Enable deferred tool loading + +Reduce token costs by deferring tool discovery: + +. Under *MCP Settings*, enable *Deferred tool loading* +. Click *Save* + +When enabled: + +* Claude Code initially receives only a search tool and orchestrator tool +* Claude Code queries for specific tools by name when needed +* Token usage decreases by 80-90% for agents with many tools configured + +=== Add the MCP orchestrator + +The MCP orchestrator reduces multi-step workflows to single calls: + +. Under *MCP Settings*, enable *MCP Orchestrator* +. Configure: ++ +[cols="1,2"] +|=== +|Setting |Value + +|Orchestrator model +|Select a model with strong code generation capabilities (for example, `anthropic/claude-sonnet-4-5`) + +|Execution timeout +|30 seconds +|=== + +. Click *Save* + +Claude Code can now invoke the orchestrator tool to execute complex, multi-step operations in a single request. + +== Configure authentication + +Claude Code clients authenticate using bearer tokens. + +=== Generate API tokens + +. Navigate to *Security* > *API Tokens* in the Redpanda Cloud console +. Click *Create Token* +. Enter token details: ++ +[cols="1,2"] +|=== +|Field |Value + +|Name +|`claude-code-access` + +|Scopes +|`ai-gateway:read`, `ai-gateway:write` + +|Expiration +|Set appropriate expiration based on security policies +|=== + +. Click *Create* +. Copy the token (it appears only once) + +Distribute this token to Claude Code users through secure channels. + +=== Token rotation + +Implement token rotation for security: + +. Create a new token before the existing token expires +. Distribute the new token to users +. Monitor usage of the old token in (observability dashboard) +. Revoke the old token after all users have migrated + +== Configure Claude Code clients + +Provide these instructions to users configuring Claude Code. + +=== CLI configuration + +Users can configure Claude Code using the CLI: + +[source,bash] +---- +claude mcp add \ + --transport http \ + redpanda-aigateway \ + https://{CLUSTER_ID}.cloud.redpanda.com/ai-gateway/mcp \ + --header "Authorization: Bearer YOUR_API_TOKEN" \ + --header "rp-aigw-id: GATEWAY_ID" +---- + +Replace: + +* `{CLUSTER_ID}`: Your Redpanda cluster ID +* `YOUR_API_TOKEN`: The API token generated earlier +* `GATEWAY_ID`: The gateway ID from gateway creation + +=== Configuration file + +Alternatively, users can edit `~/.claude.json` (user-level) or `.mcp.json` (project-level): + +[source,json] +---- +{ + "mcpServers": { + "redpanda-ai-gateway": { + "type": "http", + "url": "https://{CLUSTER_ID}.cloud.redpanda.com/ai-gateway/mcp", + "headers": { + "Authorization": "Bearer YOUR_API_TOKEN", + "rp-aigw-id": "GATEWAY_ID" + } + } + } +} +---- + +This configuration: + +* Connects Claude Code to the aggregated MCP endpoint +* Includes authentication and gateway identification headers + +== Monitor Claude Code usage + +Track Claude Code activity through gateway observability features. + +=== View request logs + +. Navigate to *AI Gateway* > *Observability* > *Logs* +. Filter by gateway ID: `claude-code-gateway` +. Review: ++ +* Request timestamps and duration +* Model used per request +* Token usage (prompt and completion tokens) +* Estimated cost per request +* HTTP status codes and errors + +=== Analyze metrics + +. Navigate to *AI Gateway* > *Observability* > *Metrics* +. Select the Claude Code gateway +. Review: ++ +[cols="1,2"] +|=== +|Metric |Purpose + +|Request volume +|Identify usage patterns and peak times + +|Token usage +|Track consumption trends + +|Estimated spend +|Monitor costs against budget + +|Latency (p50, p95, p99) +|Detect performance issues + +|Error rate +|Identify failing requests or misconfigured clients +|=== + + +=== Query logs via API + +Programmatically access logs for integration with monitoring systems: + +[source,bash] +---- +curl https://{CLUSTER_ID}.cloud.redpanda.com/api/ai-gateway/logs \ + -H "Authorization: Bearer YOUR_API_TOKEN" \ + -H "Content-Type: application/json" \ + -d '{ + "gateway_id": "GATEWAY_ID", + "start_time": "2026-01-01T00:00:00Z", + "end_time": "2026-01-14T23:59:59Z", + "limit": 100 + }' +---- + +== Security considerations + +Apply these security best practices for Claude Code deployments. + +=== Limit token scope + +Create tokens with minimal required scopes: + +* `ai-gateway:read`: Required for MCP tool discovery +* `ai-gateway:write`: Required for LLM requests and tool execution + +Avoid granting broader scopes like `admin` or `cluster:write`. + +=== Implement network restrictions + +If Claude Code clients connect from known IP ranges, configure network policies: + +. Use cloud provider security groups to restrict access to AI Gateway endpoints +. Allowlist only the IP ranges where Claude Code clients operate +. Monitor for unauthorized access attempts in request logs + +=== Enforce token expiration + +Set short token lifetimes for high-security environments: + +* Development environments: 90 days +* Production environments: 30 days + +Automate token rotation to reduce manual overhead. + +=== Audit tool access + +Review which MCP tools Claude Code clients can access: + +. Periodically audit the MCP servers configured in the gateway +. Remove unused or deprecated MCP servers +. Monitor tool execution logs for unexpected behavior + +== Troubleshooting + +Common issues and solutions when configuring AI Gateway for Claude Code. + +=== Claude Code cannot connect to gateway + +Symptom: Connection errors when Claude Code tries to discover tools or send LLM requests. + +Causes and solutions: + +* **Invalid gateway ID**: Verify the `rp-aigw-id` header matches the gateway ID from the console +* **Expired token**: Generate a new API token and update the Claude Code configuration +* **Network connectivity**: Verify the cluster endpoint is accessible from the client network +* **Provider not enabled**: Ensure at least one LLM provider is enabled and has models in the catalog + +=== Tools not appearing in Claude Code + +Symptom: Claude Code does not discover MCP tools. + +Causes and solutions: + +* **MCP servers not configured**: Add MCP server endpoints in the gateway's MCP tab +* **Deferred loading enabled but search failing**: Check that the search tool is correctly configured +* **MCP server authentication failing**: Verify MCP server authentication credentials in the gateway configuration + +=== High costs or token usage + +Symptom: Token usage and costs exceed expectations. + +Causes and solutions: + +* **Deferred tool loading disabled**: Enable deferred tool loading to reduce tokens by 80-90% +* **No rate limits**: Apply per-minute rate limits to prevent runaway usage +* **Missing spending limits**: Set monthly budget limits with blocking enforcement +* **Expensive models**: Route to cost-effective models (for example, Claude Sonnet instead of Opus) for non-critical requests + +=== Requests failing with 429 errors + +Symptom: Claude Code receives HTTP 429 Too Many Requests errors. + +Causes and solutions: + +* **Rate limit exceeded**: Review and increase rate limits if usage is legitimate +* **Upstream provider rate limits**: Check if the upstream LLM provider is rate-limiting; configure failover pools +* **Budget exhausted**: Verify monthly spending limit has not been reached + +== Next steps + +* xref:ai-agents:ai-gateway/cel-routing-cookbook.adoc[]: Implement advanced routing rules +* xref:ai-agents:mcp/remote/overview.adoc[]: Deploy Remote MCP servers for custom tools diff --git a/modules/ai-agents/pages/ai-gateway/integrations/claude-code-user.adoc b/modules/ai-agents/pages/ai-gateway/integrations/claude-code-user.adoc new file mode 100644 index 000000000..3cc98eae8 --- /dev/null +++ b/modules/ai-agents/pages/ai-gateway/integrations/claude-code-user.adoc @@ -0,0 +1,421 @@ += Configure Claude Code with AI Gateway +:description: Configure Claude Code to use Redpanda AI Gateway for unified LLM access and MCP tool aggregation. +:page-topic-type: how-to +:personas: ai_agent_developer, app_developer +:learning-objective-1: Configure Claude Code to connect to AI Gateway endpoints +:learning-objective-2: Set up MCP server integration through AI Gateway +:learning-objective-3: Verify Claude Code is routing requests through the gateway + +include::ai-agents:partial$ai-gateway-byoc-note.adoc[] + +After xref:ai-agents:ai-gateway/gateway-quickstart.adoc[configuring your AI Gateway], set up Claude Code to route LLM requests and access MCP tools through the gateway's unified endpoints. + +After reading this page, you will be able to: + +* [ ] Configure Claude Code to connect to AI Gateway endpoints. +* [ ] Set up MCP server integration through AI Gateway. +* [ ] Verify Claude Code is routing requests through the gateway. + +== Prerequisites + +Before configuring Claude Code, ensure you have: + +* Claude Code CLI installed (download from https://github.com/anthropics/claude-code[Anthropic's GitHub^]) +* An active Redpanda AI Gateway with: +** At least one LLM provider enabled (see xref:ai-agents:ai-gateway/gateway-quickstart.adoc#step-1-enable-a-provider[Enable a provider]) +** A gateway created and configured (see xref:ai-agents:ai-gateway/gateway-quickstart.adoc#step-3-create-a-gateway[Create a gateway]) +* Your AI Gateway credentials: +** Gateway endpoint URL (for example, `https://gw.ai.panda.com`) +** Gateway ID (for example, `gateway-abc123`) +** API key with access to the gateway + +== Configuration methods + +Claude Code supports two configuration approaches for connecting to AI Gateway: + +[cols="1,2,2"] +|=== +|Method |Best for |Trade-offs + +|CLI command +|Quick setup, single gateway +|Must re-run if configuration changes + +|Configuration file +|Multiple gateways, complex setups, version control +|Manual file editing required +|=== + +Choose the method that matches your workflow. The CLI command is faster for getting started, while the configuration file provides more flexibility for production use. + +== Configure using CLI + +The `claude mcp add` command configures Claude Code to connect to your AI Gateway's MCP endpoint. + +=== Add MCP server connection + +[,bash] +---- +claude mcp add \ + --transport http \ + redpanda-aigateway \ + https://gw.ai.panda.com/mcp \ + --header "Authorization: Bearer YOUR_API_KEY" \ + --header "rp-aigw-id: GATEWAY_ID" +---- + +Replace the following values: + +* `https://gw.ai.panda.com/mcp` - Your gateway's MCP endpoint +* `YOUR_API_KEY` - Your Redpanda API key +* `GATEWAY_ID` - Your gateway ID from the AI Gateway UI + +This command configures the HTTP transport for MCP, which allows Claude Code to discover and invoke tools from all MCP servers configured in your gateway. + +=== Configure LLM routing through gateway + +To route Claude Code's LLM requests through the gateway instead of directly to Anthropic: + +[,bash] +---- +claude config set \ + --api-provider redpanda \ + --base-url https://gw.ai.panda.com \ + --header "rp-aigw-id: GATEWAY_ID" +---- + +This routes all Claude model requests through your gateway, giving you centralized observability and policy enforcement. + +== Configure using configuration file + +For more complex configurations or when managing multiple gateways, edit the Claude Code configuration file directly. + +=== Locate configuration file + +Claude Code stores configuration in: + +* macOS/Linux: `~/.claude.json` (user-level) or `.mcp.json` (project-level) +* Windows: `%USERPROFILE%\.claude.json` + +=== Basic configuration + +Create or edit `~/.claude.json` with the following structure: + +[,json] +---- +{ + "mcpServers": { + "redpanda-ai-gateway": { + "type": "http", + "url": "https://gw.ai.panda.com/mcp", + "headers": { + "Authorization": "Bearer YOUR_API_KEY", + "rp-aigw-id": "GATEWAY_ID" + } + } + } +} +---- + +Replace placeholder values: + +* `YOUR_API_KEY` - Your Redpanda API key +* `GATEWAY_ID` - Your gateway ID + +=== Multiple gateway configuration + +To configure different gateways for development and production: + +[,json] +---- +{ + "mcpServers": { + "redpanda-staging": { + "type": "http", + "url": "https://gw.staging.ai.panda.com/mcp", + "headers": { + "Authorization": "Bearer STAGING_API_KEY", + "rp-aigw-id": "staging-gateway-123" + } + }, + "redpanda-production": { + "type": "http", + "url": "https://gw.ai.panda.com/mcp", + "headers": { + "Authorization": "Bearer PROD_API_KEY", + "rp-aigw-id": "prod-gateway-456" + } + } + } +} +---- + +Switch between gateways by selecting the appropriate MCP server when using Claude Code. + +=== Configuration with environment variables + +For sensitive credentials, use environment variables instead of hardcoding values: + +[,json] +---- +{ + "mcpServers": { + "redpanda-ai-gateway": { + "type": "http", + "url": "${REDPANDA_GATEWAY_URL}/mcp", + "headers": { + "Authorization": "Bearer ${REDPANDA_API_KEY}", + "rp-aigw-id": "${REDPANDA_GATEWAY_ID}" + } + } + } +} +---- + +NOTE: Claude Code supports `${VAR}` interpolation syntax in the `mcpServers` section. The variables `REDPANDA_GATEWAY_URL`, `REDPANDA_GATEWAY_ID`, and `REDPANDA_API_KEY` will be resolved from environment variables at runtime. + +Set environment variables before launching Claude Code: + +[,bash] +---- +export REDPANDA_GATEWAY_URL="https://gw.ai.panda.com" +export REDPANDA_GATEWAY_ID="gateway-abc123" +export REDPANDA_API_KEY="your-api-key" +---- + +On Windows (PowerShell): + +[,powershell] +---- +$env:REDPANDA_GATEWAY_URL = "https://gw.ai.panda.com" +$env:REDPANDA_GATEWAY_ID = "gateway-abc123" +$env:REDPANDA_API_KEY = "your-api-key" +---- + +== Verify configuration + +After configuring Claude Code, verify it connects correctly to your AI Gateway. + +=== Test MCP tool discovery + +List available MCP tools to confirm Claude Code can reach your gateway's MCP endpoint: + +[,bash] +---- +claude mcp list +---- + +Expected output should show: + +* The `redpanda-ai-gateway` server connection +* Status: Connected +* Available tools from your configured MCP servers + +If deferred tool loading is enabled in your gateway, you'll see a search tool and the MCP orchestrator tool instead of all tools upfront. + +=== Verify gateway routing + +Check that requests route through the gateway by monitoring the AI Gateway dashboard: + +. Open the Redpanda Cloud Console +. Navigate to your gateway's observability dashboard +. Send a test request from Claude Code: ++ +[,bash] +---- +echo "Write a simple Python hello world function" | claude +---- + +. Refresh the dashboard and verify: +** Request appears in the logs +** Model shows as `anthropic/claude-sonnet-4-5` (or your configured model) +** Request succeeded (status 200) +** Token usage and estimated cost are recorded + +If the request doesn't appear in the dashboard, see <>. + +== Advanced configuration + +=== Custom request timeout + +Configure timeout for MCP requests in the configuration file: + +[,json] +---- +{ + "mcpServers": { + "redpanda-ai-gateway": { + "type": "http", + "url": "https://gw.ai.panda.com/mcp", + "headers": { + "Authorization": "Bearer YOUR_API_KEY", + "rp-aigw-id": "GATEWAY_ID" + }, + "timeout": 30000 + } + } +} +---- + +The `timeout` value is in milliseconds. Default is 10000 (10 seconds). Increase this for MCP tools that perform long-running operations. + +=== Debug mode + +Enable debug logging to troubleshoot connection issues: + +[,bash] +---- +export CLAUDE_DEBUG=1 +claude +---- + +Debug mode shows: + +* HTTP request and response headers +* MCP tool discovery messages +* Gateway routing decisions (if exposed in response headers) +* Error details + +[[troubleshooting]] +== Troubleshooting + +=== MCP server not connecting + +**Symptom**: `claude mcp list` shows "Connection failed" or no tools available. + +**Causes and solutions**: + +. **Incorrect endpoint URL** ++ +Verify your MCP endpoint is correct. It should be `{gateway-url}/mcp`, not just `{gateway-url}`. ++ +[,bash] +---- +# Correct +https://gw.ai.panda.com/mcp + +# Incorrect +https://gw.ai.panda.com +---- + +. **Authentication failure** ++ +Check that your API key is valid and has access to the gateway: ++ +[,bash] +---- +curl -H "Authorization: Bearer YOUR_API_KEY" \ + -H "rp-aigw-id: GATEWAY_ID" \ + https://gw.ai.panda.com/mcp +---- ++ +You should receive a valid MCP protocol response. If you get `401 Unauthorized`, regenerate your API key in the Redpanda Cloud Console. + +. **Gateway ID mismatch** ++ +Verify your gateway ID matches exactly (case-sensitive). Copy it directly from the AI Gateway UI rather than typing it manually. + +. **Network connectivity issues** ++ +Test basic connectivity to the gateway endpoint: ++ +[,bash] +---- +curl -I https://gw.ai.panda.com/mcp +---- ++ +If this times out, check your network configuration, firewall rules, or VPN connection. + +=== Requests not appearing in gateway dashboard + +**Symptom**: Claude Code works, but you don't see requests in the AI Gateway observability dashboard. + +**Causes and solutions**: + +. **Wrong gateway configured** ++ +Verify that the `rp-aigw-id` header in your configuration matches the gateway you're viewing in the dashboard. + +. **Log ingestion delay** ++ +Gateway logs can take 5-10 seconds to appear in the dashboard. Wait briefly and refresh. + +. **Model name format error** ++ +Ensure requests use the `vendor/model_id` format (for example, `anthropic/claude-sonnet-4-5`), not just the model name (for example, `claude-sonnet-4-5`). + +=== High latency after gateway integration + +**Symptom**: Requests are slower after routing through the gateway. + +**Causes and solutions**: + +. **Gateway geographic distance** ++ +If your gateway is in a different region than you or the upstream provider, this adds network latency. Check gateway region in the Redpanda Cloud Console. + +. **Provider pool failover** ++ +If your gateway is configured with fallback providers, check the logs to see if requests are failing over. Failover adds latency. + +. **MCP tool aggregation overhead** ++ +Aggregating tools from multiple MCP servers adds processing time. Use deferred tool loading to reduce this overhead (see xref:ai-agents:ai-gateway/mcp-aggregation-guide.adoc[]). + +. **Rate limiting** ++ +If you're hitting rate limits, the gateway may be queuing requests. Check the observability dashboard for rate limit metrics. + +=== Configuration file not loading + +**Symptom**: Changes to `.claude.json` don't take effect. + +**Solutions**: + +. **Restart Claude Code** ++ +Configuration changes require restarting Claude Code: ++ +[,bash] +---- +# Kill any running Claude Code processes +pkill claude + +# Start Claude Code again +claude +---- + +. **Validate JSON syntax** ++ +Ensure your `.claude.json` is valid JSON. Use a JSON validator: ++ +[,bash] +---- +python3 -m json.tool ~/.claude.json +---- + +. **Check file permissions** ++ +Verify Claude Code can read the configuration file: ++ +[,bash] +---- +ls -la ~/.claude.json +---- ++ +The file should be readable by your user. If not, fix permissions: ++ +[,bash] +---- +chmod 600 ~/.claude.json +---- + +== Next steps + +* xref:ai-agents:ai-gateway/mcp-aggregation-guide.adoc[]: Configure deferred tool loading to reduce token costs +* xref:ai-agents:ai-gateway/cel-routing-cookbook.adoc[]: Use CEL expressions to route Claude Code requests based on context + +== Related pages + +* xref:ai-agents:ai-gateway/gateway-quickstart.adoc[]: Create and configure your AI Gateway +* xref:ai-agents:ai-gateway/gateway-architecture.adoc[]: Learn about AI Gateway architecture and benefits diff --git a/modules/ai-agents/pages/ai-gateway/integrations/cline-admin.adoc b/modules/ai-agents/pages/ai-gateway/integrations/cline-admin.adoc new file mode 100644 index 000000000..3092e84e2 --- /dev/null +++ b/modules/ai-agents/pages/ai-gateway/integrations/cline-admin.adoc @@ -0,0 +1,587 @@ += Configure AI Gateway for Cline +:description: Configure Redpanda AI Gateway to support Cline clients. +:page-topic-type: how-to +:personas: platform_admin +:learning-objective-1: Configure AI Gateway endpoints for Cline connectivity +:learning-objective-2: Set up authentication and access control for Cline clients +:learning-objective-3: Deploy MCP tool aggregation for Cline tool discovery + +include::ai-agents:partial$ai-gateway-byoc-note.adoc[] + +Configure Redpanda AI Gateway to support Cline (formerly Claude Dev) clients accessing LLM providers and MCP tools through a unified endpoint. + +After reading this page, you will be able to: + +* [ ] Configure AI Gateway endpoints for Cline connectivity. +* [ ] Set up authentication and access control for Cline clients. +* [ ] Deploy MCP tool aggregation for Cline tool discovery. + +== Prerequisites + +* AI Gateway deployed on a BYOC cluster running Redpanda version 25.3 or later +* Administrator access to the AI Gateway UI +* At least one LLM provider API key (Anthropic or OpenAI) +* Understanding of xref:ai-agents:ai-gateway/gateway-architecture.adoc[AI Gateway concepts] + +== About Cline + +Cline is a VS Code extension designed for autonomous AI development workflows. It connects to Claude models through the native Anthropic API format, sending requests to `/v1/messages` endpoints. Cline supports long-running tasks, browser integration, and autonomous operations, with full MCP support for tool discovery and execution. + +Key characteristics: + +* Uses native Anthropic format (compatible with OpenAI-compatible endpoints) +* Designed for autonomous, multi-step workflows +* Supports MCP protocol for external tool integration +* Operates as a VS Code extension with persistent context +* Requires configuration similar to Claude Code + +== Architecture overview + +Cline connects to AI Gateway through two primary endpoints: + +* LLM endpoint: `https://{CLUSTER_ID}.cloud.redpanda.com/ai-gateway/v1` for chat completions +* MCP endpoint: `https://{CLUSTER_ID}.cloud.redpanda.com/ai-gateway/mcp` for tool discovery and execution + +The gateway handles: + +. Authentication via bearer tokens in the `Authorization` header +. Gateway selection via the `rp-aigw-id` header +. Model routing using the `vendor/model_id` format +. MCP server aggregation for multi-tool workflows +. Request logging and cost tracking per gateway + +== Enable LLM providers + +Cline requires access to LLM providers through the gateway. Enable at least one provider. + +=== Configure Anthropic + +Cline uses Anthropic models by default. To enable Anthropic: + +. Navigate to *AI Gateway* > *Providers* in the Redpanda Cloud console +. Select *Anthropic* from the provider list +. Click *Add configuration* +. Enter your Anthropic API key +. Click *Save* + +The gateway can now route requests to Anthropic models. + +=== Configure OpenAI + +To enable OpenAI as a provider: + +. Navigate to *AI Gateway* > *Providers* +. Select *OpenAI* from the provider list +. Click *Add configuration* +. Enter your OpenAI API key +. Click *Save* + +=== Enable models in the catalog + +After enabling providers, enable specific models: + +. Navigate to *AI Gateway* > *Models* +. Enable the models you want Cline clients to access ++ +Common models for Cline: ++ +* `anthropic/claude-opus-4-5` +* `anthropic/claude-sonnet-4-5` +* `openai/gpt-4o` +* `openai/o1-mini` + +. Click *Save* + +Models appear in the catalog with the `vendor/model_id` format that Cline uses in requests. + +== Create a gateway for Cline clients + +Create a dedicated gateway to isolate Cline traffic and apply specific policies. + +=== Gateway configuration + +. Navigate to *AI Gateway* > *Gateways* +. Click *Create Gateway* +. Enter gateway details: ++ +[cols="1,2"] +|=== +|Field |Value + +|Name +|`cline-gateway` (or your preferred name) + +|Workspace +|Select the workspace for access control grouping + +|Description +|Gateway for Cline VS Code extension clients +|=== + +. Click *Create* +. Copy the gateway ID from the gateway details page + +The gateway ID is required in the `rp-aigw-id` header for all requests. + +=== Configure LLM routing + +Set up routing policies for Cline requests. + +==== Basic routing with failover + +Configure a primary provider with automatic failover: + +. Navigate to the gateway's *LLM* tab +. Under *Routing*, click *Add route* +. Configure the route: ++ +[source,cel] +---- +true # Matches all requests +---- + +. Add a *Primary provider pool*: ++ +* Provider: Anthropic +* Model: All enabled Anthropic models +* Load balancing: Round robin (if multiple Anthropic configurations exist) + +. Add a *Fallback provider pool*: ++ +* Provider: OpenAI +* Model: All enabled OpenAI models +* Failover conditions: Rate limits, timeouts, 5xx errors + +. Click *Save* + +Cline requests route to Anthropic by default and fail over to OpenAI if Anthropic is unavailable. + +==== Workspace-based routing + +Route requests based on VS Code workspace or project context (if Cline passes workspace identifiers): + +[source,cel] +---- +request.headers["x-workspace-type"][0] == "production" +---- + +Create separate routes: + +* Production route: Claude Opus 4.5 (highest quality, critical code) +* Development route: Claude Sonnet 4.5 (balanced cost and quality) +* Experimental route: OpenAI GPT-4o (cost-effective testing) + +=== Apply rate limits + +Prevent runaway usage from autonomous Cline sessions: + +. Navigate to the gateway's *LLM* tab +. Under *Rate Limit*, configure: ++ +[cols="1,2"] +|=== +|Setting |Recommended Value + +|Global rate limit +|120 requests per minute + +|Per-user rate limit +|15 requests per minute (if using user headers) +|=== ++ +Cline can generate multiple requests during autonomous operations. Higher limits than typical interactive clients may be necessary. + +. Click *Save* + +The gateway blocks requests exceeding these limits and returns HTTP 429 errors. + +=== Set spending limits + +Control LLM costs during autonomous operations: + +. Under *Spend Limit*, configure: ++ +[cols="1,2"] +|=== +|Setting |Value + +|Monthly budget +|$8,000 (adjust based on expected autonomous usage) + +|Enforcement +|Block requests after budget exceeded +|=== ++ +Autonomous operations can consume significant tokens. Monitor spending patterns after deployment. + +. Click *Save* + +The gateway tracks estimated costs per request and blocks traffic when the monthly budget is exhausted. + +== Configure MCP tool aggregation + +Enable Cline to discover and use tools from multiple MCP servers through a single endpoint. + +=== Add MCP servers + +. Navigate to the gateway's *MCP* tab +. Click *Add MCP Server* +. Enter server details: ++ +[cols="1,2"] +|=== +|Field |Value + +|Display name +|Descriptive name (for example, `filesystem-tools`, `code-analysis-tools`) + +|Endpoint URL +|MCP server endpoint (for example, xref:ai-agents:mcp/remote/overview.adoc[Remote MCP server] URL) + +|Authentication +|Bearer token or other authentication mechanism +|=== + +. Click *Save* + +Repeat for each MCP server you want to aggregate. + +=== Enable deferred tool loading + +Reduce token costs for Cline sessions with many available tools: + +. Under *MCP Settings*, enable *Deferred tool loading* +. Click *Save* + +When enabled: + +* Cline initially receives only a search tool and orchestrator tool +* Cline queries for specific tools by name when needed +* Token usage decreases by 80-90% for configurations with many tools + +This is particularly important for Cline because autonomous operations can make many tool discovery calls. + +=== Add the MCP orchestrator + +The MCP orchestrator reduces multi-step autonomous workflows to single calls: + +. Under *MCP Settings*, enable *MCP Orchestrator* +. Configure: ++ +[cols="1,2"] +|=== +|Setting |Value + +|Orchestrator model +|Select a model with strong code generation capabilities (for example, `anthropic/claude-sonnet-4-5`) + +|Execution timeout +|45 seconds +|=== ++ +Longer timeout than typical interactive clients allows complex autonomous operations to complete. + +. Click *Save* + +Cline can now invoke the orchestrator tool to execute complex, multi-step operations in a single request, which is ideal for autonomous development workflows. + +== Configure authentication + +Cline clients authenticate using bearer tokens. + +=== Generate API tokens + +. Navigate to *Security* > *API Tokens* in the Redpanda Cloud console +. Click *Create Token* +. Enter token details: ++ +[cols="1,2"] +|=== +|Field |Value + +|Name +|`cline-access` + +|Scopes +|`ai-gateway:read`, `ai-gateway:write` + +|Expiration +|Set appropriate expiration based on security policies +|=== + +. Click *Create* +. Copy the token (it appears only once) + +Distribute this token to Cline users through secure channels. + +=== Token rotation + +Implement token rotation for security: + +. Create a new token before the existing token expires +. Distribute the new token to users +. Monitor usage of the old token in (observability dashboard) +. Revoke the old token after all users have migrated + +== Configure Cline clients + +Provide these instructions to users configuring Cline in VS Code. + +=== API provider configuration + +Users configure Cline's API provider and credentials through the Cline extension interface. + +IMPORTANT: API provider configuration (API keys, base URLs, custom headers) is managed via Cline's extension global state, not VS Code `settings.json`. These settings are stored in the extension's internal state and must be configured through the Cline UI. + +==== Configure via Cline UI + +. Open the Cline extension panel in VS Code +. Click the settings icon or gear menu +. Configure the API connection: ++ +* *API Provider*: Select "Custom" or "Anthropic" +* *API Base URL*: `https://{CLUSTER_ID}.cloud.redpanda.com/ai-gateway/v1` +* *API Key*: The API token generated earlier +* *Custom Headers*: Add `rp-aigw-id` with value `GATEWAY_ID` + +Replace: + +* `{CLUSTER_ID}`: Your Redpanda cluster ID +* `YOUR_API_TOKEN`: The API token generated earlier +* `GATEWAY_ID`: The gateway ID from gateway creation + +=== MCP server configuration + +Configure Cline to connect to the aggregated MCP endpoint through the Cline UI or by editing `cline_mcp_settings.json`. + +==== Enable MCP mode + +. Open VS Code Settings (Cmd/Ctrl + ,) +. Search for "Cline > Mcp: Mode" +. Enable the MCP mode toggle + +==== Configure MCP server via Cline UI + +. Open the Cline extension panel in VS Code +. Navigate to MCP server settings +. Add the Redpanda AI Gateway MCP server with the connection details + +==== Configure via cline_mcp_settings.json + +Alternatively, edit `cline_mcp_settings.json` (located in the Cline extension storage directory): + +[source,json] +---- +{ + "mcpServers": { + "redpanda-ai-gateway": { + "type": "streamableHttp", + "url": "https://{CLUSTER_ID}.cloud.redpanda.com/ai-gateway/mcp", + "headers": { + "Authorization": "Bearer YOUR_API_TOKEN", + "rp-aigw-id": "GATEWAY_ID" + } + } + } +} +---- + +Replace: + +* `{CLUSTER_ID}`: Your Redpanda cluster ID +* `YOUR_API_TOKEN`: The API token generated earlier +* `GATEWAY_ID`: The gateway ID from gateway creation + +This configuration connects Cline to the aggregated MCP endpoint with authentication and gateway identification headers. + +=== Configuration scope + +Cline stores configuration in the extension's global state: + +* *API Provider settings*: Stored globally per VS Code instance, applies to all workspaces +* *MCP server settings*: Can be configured per workspace using `cline_mcp_settings.json` + +For project-specific MCP server configurations (for example, development vs production gateways), place `cline_mcp_settings.json` in the workspace directory and configure different MCP servers per project. + +== Monitor Cline usage + +Track Cline activity through gateway observability features. + +=== View request logs + +. Navigate to *AI Gateway* > *Observability* > *Logs* +. Filter by gateway ID: `cline-gateway` +. Review: ++ +* Request timestamps and duration +* Model used per request +* Token usage (prompt and completion tokens) +* Estimated cost per request +* HTTP status codes and errors + +Cline autonomous operations may generate request sequences. Look for patterns to identify long-running sessions. + +=== Analyze metrics + +. Navigate to *AI Gateway* > *Observability* > *Metrics* +. Select the Cline gateway +. Review: ++ +[cols="1,2"] +|=== +|Metric |Purpose + +|Request volume +|Identify autonomous session patterns and peak times + +|Token usage +|Track consumption trends from multi-step operations + +|Estimated spend +|Monitor costs against budget (autonomous operations can be expensive) + +|Latency (p50, p95, p99) +|Detect performance issues in autonomous workflows + +|Error rate +|Identify failing requests or misconfigured clients +|=== + + +=== Query logs via API + +Programmatically access logs for integration with monitoring systems: + +[source,bash] +---- +# Set REDPANDA_API_TOKEN environment variable before running +curl https://{CLUSTER_ID}.cloud.redpanda.com/api/ai-gateway/logs \ + -H "Authorization: Bearer ${REDPANDA_API_TOKEN}" \ + -H "Content-Type: application/json" \ + -d '{ + "gateway_id": "GATEWAY_ID", + "start_time": "2026-01-01T00:00:00Z", + "end_time": "2026-01-14T23:59:59Z", + "limit": 100 + }' +---- + +NOTE: Set the `REDPANDA_API_TOKEN` environment variable to your API token before running this command. + +== Security considerations + +Apply these security best practices for Cline deployments. + +=== Limit token scope + +Create tokens with minimal required scopes: + +* `ai-gateway:read`: Required for MCP tool discovery +* `ai-gateway:write`: Required for LLM requests and tool execution + +Avoid granting broader scopes like `admin` or `cluster:write`. + +Because Cline performs autonomous operations, limit what tools it can access through MCP server selection. + +=== Implement network restrictions + +If Cline clients connect from known networks (corporate VPN, office IP ranges), configure network policies: + +. Use cloud provider security groups to restrict access to AI Gateway endpoints +. Allowlist only the IP ranges where Cline clients operate +. Monitor for unauthorized access attempts in request logs + +=== Enforce token expiration + +Set short token lifetimes for high-security environments: + +* Development environments: 90 days +* Production environments: 30 days + +Automate token rotation to reduce manual overhead. + +=== Audit tool access + +Review which MCP tools Cline clients can access: + +. Periodically audit the MCP servers configured in the gateway +. Remove unused or deprecated MCP servers +. Monitor tool execution logs for unexpected autonomous behavior +. Consider creating separate gateways for different trust levels + +Because Cline operates autonomously, carefully control which tools it can invoke. + +=== Monitor autonomous operations + +Set up alerts for unusual patterns: + +* Request rate spikes (may indicate runaway autonomous loops) +* High error rates (may indicate tool compatibility issues) +* Unexpected tool invocations (may indicate misconfigured autonomous behavior) +* Budget consumption spikes (autonomous operations can be expensive) + +== Troubleshooting + +Common issues and solutions when configuring AI Gateway for Cline. + +=== Cline cannot connect to gateway + +Symptom: Connection errors when Cline tries to discover tools or send LLM requests. + +Causes and solutions: + +* **Invalid gateway ID**: Verify the `rp-aigw-id` header matches the gateway ID from the console +* **Expired token**: Generate a new API token and update the Cline settings +* **Network connectivity**: Verify the cluster endpoint is accessible from the client network +* **Provider not enabled**: Ensure at least one LLM provider is enabled and has models in the catalog +* **VS Code settings not applied**: Reload VS Code window after changing settings (Cmd/Ctrl + Shift + P > "Reload Window") + +=== Tools not appearing in Cline + +Symptom: Cline does not discover MCP tools. + +Causes and solutions: + +* **MCP servers not configured**: Add MCP server endpoints in the gateway's MCP tab +* **Deferred loading enabled but search failing**: Check that the search tool is correctly configured +* **MCP server authentication failing**: Verify MCP server authentication credentials in the gateway configuration +* **Cline MCP configuration missing**: Ensure `cline.mcpServers` is configured in settings + +=== High costs or token usage + +Symptom: Token usage and costs exceed expectations. + +Causes and solutions: + +* **Deferred tool loading disabled**: Enable deferred tool loading to reduce tokens by 80-90% +* **Autonomous loops**: Monitor for repeated similar requests (may indicate autonomous operation stuck in a loop) +* **No rate limits**: Apply per-minute rate limits to prevent runaway autonomous usage +* **Missing spending limits**: Set monthly budget limits with blocking enforcement +* **Expensive models for autonomous work**: Route autonomous operations to cost-effective models (for example, Claude Sonnet instead of Opus) +* **Too many tools in context**: Reduce the number of aggregated MCP servers or enable deferred loading + +=== Requests failing with 429 errors + +Symptom: Cline receives HTTP 429 Too Many Requests errors. + +Causes and solutions: + +* **Rate limit exceeded**: Review and increase rate limits if autonomous usage is legitimate +* **Upstream provider rate limits**: Check if the upstream LLM provider is rate-limiting; configure failover pools +* **Budget exhausted**: Verify monthly spending limit has not been reached +* **Autonomous operation too aggressive**: Configure Cline to slow down request rate + +=== Autonomous operations timing out + +Symptom: Cline operations fail with timeout errors. + +Causes and solutions: + +* **MCP orchestrator timeout too short**: Increase orchestrator execution timeout to 60 seconds +* **Complex multi-step operations**: Break down tasks or use the orchestrator tool for better efficiency +* **Slow MCP server responses**: Check MCP server performance and consider caching + +== Next steps + +* xref:ai-agents:ai-gateway/cel-routing-cookbook.adoc[]: Implement advanced routing rules +* xref:ai-agents:mcp/remote/overview.adoc[]: Deploy Remote MCP servers for custom tools diff --git a/modules/ai-agents/pages/ai-gateway/integrations/cline-user.adoc b/modules/ai-agents/pages/ai-gateway/integrations/cline-user.adoc new file mode 100644 index 000000000..795a638eb --- /dev/null +++ b/modules/ai-agents/pages/ai-gateway/integrations/cline-user.adoc @@ -0,0 +1,761 @@ += Configure Cline with AI Gateway +:description: Configure Cline to use Redpanda AI Gateway for unified LLM access, MCP tool integration, and autonomous coding workflows. +:page-topic-type: how-to +:personas: ai_agent_developer, app_developer +:learning-objective-1: Configure Cline to connect to AI Gateway for LLM requests and MCP tools +:learning-objective-2: Set up autonomous mode with custom instructions and browser integration +:learning-objective-3: Verify Cline routes requests through the gateway and optimize for cost + +include::ai-agents:partial$ai-gateway-byoc-note.adoc[] + +After xref:ai-agents:ai-gateway/gateway-quickstart.adoc[configuring your AI Gateway], set up Cline (formerly Claude Dev) to route LLM requests and access MCP tools through the gateway's unified endpoints. + +After reading this page, you will be able to: + +* [ ] Configure Cline to connect to AI Gateway for LLM requests and MCP tools. +* [ ] Set up autonomous mode with custom instructions and browser integration. +* [ ] Verify Cline routes requests through the gateway and optimize for cost. + +== Prerequisites + +Before configuring Cline, ensure you have: + +* Cline VS Code extension installed (search for "Cline" in VS Code Extensions) +* An active Redpanda AI Gateway with: +** At least one LLM provider enabled (see xref:ai-agents:ai-gateway/gateway-quickstart.adoc#step-1-enable-a-provider[Enable a provider]) +** A gateway created and configured (see xref:ai-agents:ai-gateway/gateway-quickstart.adoc#step-3-create-a-gateway[Create a gateway]) +* Your AI Gateway credentials: +** Gateway endpoint URL (for example, `https://gw.ai.panda.com`) +** Gateway ID (for example, `gateway-abc123`) +** API key with access to the gateway + +== About Cline + +Cline is an autonomous AI coding agent for VS Code that can: + +* Read and edit files in your workspace +* Execute terminal commands +* Browse the web for documentation and research +* Create and manage complex multi-file changes +* Work autonomously with approval checkpoints + +By routing Cline through AI Gateway, you gain centralized observability, cost controls, and the ability to aggregate multiple MCP servers into a single interface. + +== Configuration overview + +Cline supports two connection types for AI Gateway: + +[cols="1,2,2"] +|=== +|Connection type |Use for |Configuration location + +|OpenAI-compatible API +|LLM requests (chat, code generation) +|Cline Settings → API Configuration + +|MCP servers +|Tool discovery and execution +|Cline Settings → MCP Servers +|=== + +Both can route through AI Gateway independently or together, depending on your needs. + +== Configure LLM routing through gateway + +Set up Cline to route all LLM requests through your AI Gateway instead of directly to providers. + +=== Open Cline settings + +. Open VS Code +. Open Command Palette (Cmd+Shift+P or Ctrl+Shift+P) +. Search for `Cline: Open Settings` +. Select `Cline: Open Settings` + +Alternatively, click the gear icon in the Cline sidebar panel. + +=== Configure API provider + +In the Cline settings interface: + +. Navigate to *API Configuration* section +. Select *API Provider*: `OpenAI Compatible` +. Set *Base URL*: `https://gw.ai.panda.com` +. Set *API Key*: Your Redpanda API key +. Expand *Advanced Settings* +. Add custom headers: ++ +[,json] +---- +{ + "rp-aigw-id": "gateway-abc123" +} +---- + +Replace the following values: + +* `https://gw.ai.panda.com` - Your gateway's base URL (without `/v1` suffix) +* `gateway-abc123` - Your gateway ID from the AI Gateway UI + +=== Select model + +In the *Model* dropdown, enter the model using the `vendor/model_id` format: + +* For Anthropic Claude: `anthropic/claude-sonnet-4-5` +* For OpenAI: `openai/gpt-4o` +* For other providers: `{provider}/{model-name}` + +The gateway routes the request based on this format. If you use a non-prefixed model name (for example, `claude-sonnet-4-5`), the gateway may not route correctly. + +=== Verify configuration + +. Click *Test Connection* in Cline settings +. Verify status shows "Connected" +. Send a test message in the Cline chat panel + +If the connection fails, see <>. + +== Configure MCP server integration + +Connect Cline to your AI Gateway's MCP endpoint to aggregate tools from multiple MCP servers. + +=== Add MCP server connection + +In the Cline settings interface: + +. Navigate to *MCP Servers* section +. Click *Add MCP Server* +. Configure the connection: ++ +[,json] +---- +{ + "name": "redpanda-ai-gateway", + "transport": "http", + "url": "https://gw.ai.panda.com/mcp", + "headers": { + "Authorization": "Bearer YOUR_API_KEY", + "rp-aigw-id": "GATEWAY_ID" + } +} +---- + +Replace placeholder values: + +* `YOUR_API_KEY` - Your Redpanda API key +* `GATEWAY_ID` - Your gateway ID + +=== Enable tool discovery + +After adding the MCP server: + +. Click *Refresh Tools* to discover available tools +. Verify that tools from your configured MCP servers appear in the tool list +. If using deferred tool loading, you'll see a search tool and MCP orchestrator tool instead of all tools upfront + +Tools are now available for Cline to use autonomously during coding sessions. + +=== Alternative: Manual configuration file + +For more control, edit the VS Code settings directly: + +. Open VS Code settings (Cmd+, or Ctrl+,) +. Search for `cline.mcpServers` +. Click *Edit in settings.json* +. Add the MCP server configuration: ++ +[,json] +---- +{ + "cline.mcpServers": [ + { + "name": "redpanda-ai-gateway", + "transport": "http", + "url": "https://gw.ai.panda.com/mcp", + "headers": { + "Authorization": "Bearer YOUR_API_KEY", + "rp-aigw-id": "GATEWAY_ID" + } + } + ] +} +---- + +Restart VS Code for changes to take effect. + +== Configure autonomous mode settings + +Optimize Cline's autonomous behavior when using AI Gateway. + +=== Set approval mode + +Control how often Cline requires your approval during autonomous tasks: + +[cols="1,2,2"] +|=== +|Mode |Behavior |Best for + +|*Always ask* +|Request approval for every action +|Testing, sensitive codebases, cost control + +|*Ask before terminal commands* +|Auto-approve file edits, ask for commands +|Trusted environments, faster iteration + +|*Autonomous* +|Complete tasks without interruption +|Well-scoped tasks, batch processing +|=== + +To set approval mode: + +. Open Cline settings +. Navigate to *Autonomous Mode* +. Select your preferred mode + +When using AI Gateway with spend limits, autonomous mode is safer because the gateway enforces budget controls even if Cline makes many requests. + +=== Configure custom instructions + +Add custom instructions to guide Cline's behavior and reduce token costs: + +. Open Cline settings +. Navigate to *Custom Instructions* +. Add instructions that reduce unnecessary requests: ++ +[,text] +---- +- Before making changes, analyze the codebase structure first +- Use existing code patterns instead of creating new ones +- Ask for clarification before large refactors +- Prefer small, incremental changes over complete rewrites +- Use MCP tools for research instead of multiple LLM calls +---- + +These instructions help Cline work more efficiently and reduce token usage. + +=== Enable browser integration + +Cline can use a browser to research documentation, which reduces the need for large context windows: + +. Open Cline settings +. Navigate to *Browser Integration* +. Enable *Allow Browser Access* +. Configure browser mode: +** *Headless* - Faster, lower resource usage +** *Visible* - See what Cline is browsing (useful for debugging) + +Browser integration is particularly useful with AI Gateway because: + +* Cline can look up current documentation instead of relying on outdated training data +* Reduces prompt token costs from pasting documentation into context +* Works with MCP tools that fetch web content + +== Verify configuration + +After configuring Cline, verify it connects correctly to your AI Gateway. + +=== Test LLM routing + +Send a test message in the Cline chat panel: + +. Open the Cline sidebar in VS Code +. Type a simple request: "Explain this file" (with a file open) +. Wait for response + +Then verify in the AI Gateway dashboard: + +. Open the Redpanda Cloud Console +. Navigate to your gateway's observability dashboard +. Filter by gateway ID +. Verify: +** Request appears in logs +** Model shows correct format (for example, `anthropic/claude-sonnet-4-5`) +** Token usage and cost are recorded + +If the request doesn't appear, see <>. + +=== Test MCP tool usage + +Verify Cline can discover and invoke MCP tools: + +. In the Cline chat, request a task that requires a tool +. For example: "Use the weather tool to check the forecast" +. Cline should: +** Discover the tool from the MCP server +** Invoke it with correct parameters +** Return the result + +Check the gateway dashboard for MCP tool invocation logs. + +=== Monitor token costs + +Track Cline's token usage to identify optimization opportunities: + +. Open the AI Gateway observability dashboard +. Filter by your gateway +. View metrics: +** Requests per hour +** Token usage per request (prompt + completion) +** Estimated cost per request + +High token costs may indicate: + +* Context windows that are too large (Cline includes many files unnecessarily) +* Repeated requests for the same information (use custom instructions to prevent this) +* Missing MCP tools that could replace multi-turn conversations + +== Advanced configuration + +=== Model selection strategies + +Different models have different cost and performance characteristics. Configure Cline to use the right model for each task: + +==== Strategy 1: Single high-quality model + +Use one premium model for all tasks. + +Configuration: + +* Model: `anthropic/claude-sonnet-4-5` +* Best for: Complex codebases, high-quality output requirements +* Cost: Higher, but consistent + +==== Strategy 2: Task-based model switching + +Use the gateway's CEL routing to automatically select models based on task complexity. + +Gateway configuration (set in AI Gateway UI): + +[,cel] +---- +// Route simple edits to cost-effective model +request.messages[0].content.contains("fix typo") || +request.messages[0].content.contains("rename") ? + "anthropic/claude-haiku" : + "anthropic/claude-sonnet-4-5" +---- + +This approach requires no changes to Cline configuration. The gateway makes routing decisions transparently. + +==== Strategy 3: Multiple Cline profiles + +Create separate VS Code workspace settings for different projects: + +.Project A (high complexity) +[,json] +---- +{ + "cline.apiProvider": "OpenAI Compatible", + "cline.baseURL": "https://gw.ai.panda.com", + "cline.model": "anthropic/claude-opus-4-5" +} +---- + +.Project B (simple tasks) +[,json] +---- +{ + "cline.apiProvider": "OpenAI Compatible", + "cline.baseURL": "https://gw.ai.panda.com", + "cline.model": "anthropic/claude-haiku" +} +---- + +=== Request timeout configuration + +For long-running tool executions or complex code generation: + +. Open VS Code settings +. Search for `cline.requestTimeout` +. Set timeout in milliseconds (default: 60000) ++ +[,json] +---- +{ + "cline.requestTimeout": 120000 +} +---- + +Increase this value if Cline times out during large refactoring tasks or when using slow MCP tools. + +=== Debug mode + +Enable debug logging to troubleshoot connection issues: + +. Open VS Code settings +. Search for `cline.debug` +. Enable debug mode: ++ +[,json] +---- +{ + "cline.debug": true +} +---- + +Debug logs appear in the VS Code Output panel: + +. Open Output panel (View → Output) +. Select "Cline" from the dropdown +. View HTTP request and response details + +Debug mode shows: + +* Full request and response payloads +* Gateway routing headers +* MCP tool discovery messages +* Error details + +=== Environment-based configuration + +Use different gateways for different environments without changing settings manually. + +IMPORTANT: VS Code's `.vscode/settings.json` does not natively support environment variable substitution with the `${VAR}` syntax shown below. You must either install an extension that provides variable substitution, replace the placeholders manually with actual values, or set environment variables before launching VS Code. + +Create workspace-specific configurations: + +.Development workspace (.vscode/settings.json) +[,json] +---- +{ + "cline.apiProvider": "OpenAI Compatible", + "cline.baseURL": "${GATEWAY_DEV_URL}", + "cline.customHeaders": { + "rp-aigw-id": "${GATEWAY_DEV_ID}" + } +} +---- + +.Production workspace (.vscode/settings.json) +[,json] +---- +{ + "cline.apiProvider": "OpenAI Compatible", + "cline.baseURL": "${GATEWAY_PROD_URL}", + "cline.customHeaders": { + "rp-aigw-id": "${GATEWAY_PROD_ID}" + } +} +---- + +Set environment variables before launching VS Code: + +[,bash] +---- +export GATEWAY_DEV_URL="https://gw.staging.ai.panda.com" +export GATEWAY_DEV_ID="staging-gateway-123" +export GATEWAY_PROD_URL="https://gw.ai.panda.com" +export GATEWAY_PROD_ID="prod-gateway-456" +---- + +On Windows (PowerShell): + +[,powershell] +---- +$env:GATEWAY_DEV_URL = "https://gw.staging.ai.panda.com" +$env:GATEWAY_DEV_ID = "staging-gateway-123" +---- + +[[troubleshooting]] +== Troubleshooting + +=== Cline shows "Connection failed" + +**Symptom**: Cline settings show connection failed, or requests return errors. + +**Causes and solutions**: + +. **Incorrect base URL** ++ +Verify your base URL does NOT include `/v1` or `/chat/completions`: ++ +[,text] +---- +# Correct +https://gw.ai.panda.com + +# Incorrect +https://gw.ai.panda.com/v1 +https://gw.ai.panda.com/chat/completions +---- ++ +Cline appends the correct path automatically. + +. **Authentication failure** ++ +Verify your API key is valid: ++ +[,bash] +---- +curl -H "Authorization: Bearer YOUR_API_KEY" \ + -H "rp-aigw-id: GATEWAY_ID" \ + https://gw.ai.panda.com/v1/models +---- ++ +You should receive a list of available models. If you get `401 Unauthorized`, regenerate your API key in the Redpanda Cloud Console. + +. **Gateway ID mismatch** ++ +Check that the `rp-aigw-id` header matches your gateway ID exactly (case-sensitive). Copy it directly from the AI Gateway UI. + +. **Network connectivity issues** ++ +Test basic connectivity: ++ +[,bash] +---- +curl -I https://gw.ai.panda.com +---- ++ +If this times out, check your network configuration, firewall rules, or VPN connection. + +=== MCP tools not appearing + +**Symptom**: Cline doesn't see tools from the MCP server, or tool discovery fails. + +**Causes and solutions**: + +. **MCP endpoint incorrect** ++ +Verify the MCP endpoint is correct. It should be `{gateway-url}/mcp`, not just `{gateway-url}`: ++ +[,text] +---- +# Correct +https://gw.ai.panda.com/mcp + +# Incorrect +https://gw.ai.panda.com +---- + +. **No MCP servers configured in gateway** ++ +Verify your gateway has at least one MCP server enabled in the AI Gateway UI. + +. **Deferred tool loading enabled** ++ +If deferred tool loading is enabled, you'll see only a search tool initially. This is expected behavior. Tools load on-demand when Cline needs them. + +. **MCP server unreachable** ++ +Test the MCP endpoint directly: ++ +[,bash] +---- +curl -H "Authorization: Bearer YOUR_API_KEY" \ + -H "rp-aigw-id: GATEWAY_ID" \ + https://gw.ai.panda.com/mcp +---- ++ +You should receive a valid MCP protocol response listing available tools. + +=== Requests not appearing in gateway dashboard + +**Symptom**: Cline works, but you don't see requests in the AI Gateway observability dashboard. + +**Causes and solutions**: + +. **Wrong gateway configured** ++ +Verify that the `rp-aigw-id` header in your Cline configuration matches the gateway you're viewing in the dashboard. + +. **Using direct provider connection** ++ +If you configured Cline with a provider's API directly (not the gateway URL), requests won't route through the gateway. Verify the base URL is your gateway endpoint. + +. **Log ingestion delay** ++ +Gateway logs can take 5-10 seconds to appear in the dashboard. Wait briefly and refresh. + +. **Model name format error** ++ +Ensure requests use the `vendor/model_id` format (for example, `anthropic/claude-sonnet-4-5`), not just the model name (for example, `claude-sonnet-4-5`). Check the model field in Cline settings. + +=== High token costs + +**Symptom**: Cline uses more tokens than expected, resulting in high costs. + +**Causes and solutions**: + +. **Large context windows** ++ +Cline may be including too many files in the context. Solutions: ++ +* Use custom instructions to limit file inclusion +* Create a `.clineignore` file to exclude unnecessary files +* Break large tasks into smaller, focused subtasks + +. **Repeated requests** ++ +Cline may be making redundant requests for the same information. Solutions: ++ +* Add custom instructions to prevent repeated analysis +* Use MCP tools to fetch external information instead of asking the LLM +* Enable caching in the gateway (if available) + +. **Wrong model selected** ++ +You may be using a premium model for simple tasks. Solutions: ++ +* Switch to a cost-effective model (for example, `anthropic/claude-haiku`) ++ +* Use gateway CEL routing to automatically select models based on task complexity + +. **MCP tool overhead** ++ +If not using deferred tool loading, all tools load with every request. Solution: ++ +* Enable deferred tool loading in your AI Gateway configuration (see xref:ai-agents:ai-gateway/mcp-aggregation-guide.adoc[]) + +=== Cline hangs or times out + +**Symptom**: Cline stops responding or shows timeout errors. + +**Causes and solutions**: + +. **Request timeout too low** ++ +Increase the timeout in VS Code settings: ++ +[,json] +---- +{ + "cline.requestTimeout": 120000 +} +---- + +. **Long-running MCP tool** ++ +Some MCP tools take time to execute. Check the gateway observability dashboard to see if tool execution is slow. + +. **Gateway rate limiting** ++ +You may be hitting rate limits. Check the dashboard for rate limit metrics and increase limits if needed. + +. **Provider outage** ++ +Check the AI Gateway dashboard for provider status. If the primary provider is down, configure failover (see xref:ai-agents:ai-gateway/gateway-quickstart.adoc#configure-provider-pool-with-fallback[Configure failover]). + +=== Settings changes not taking effect + +**Symptom**: Changes to Cline settings or VS Code configuration don't apply. + +**Solutions**: + +. **Reload VS Code** ++ +Some settings require reloading: ++ +* Open Command Palette (Cmd+Shift+P or Ctrl+Shift+P) +* Search for `Developer: Reload Window` +* Select and confirm + +. **Workspace settings override** ++ +Check if workspace settings (`.vscode/settings.json`) override user settings. Workspace settings take precedence. + +. **Invalid JSON syntax** ++ +If editing `settings.json` manually, validate JSON syntax. VS Code shows syntax errors in the editor. + +== Cost optimization tips + +=== Use the right model for each task + +Match model selection to task complexity: + +[cols="1,2,1"] +|=== +|Task type |Recommended model |Reason + +|Simple edits (typos, renames) +|`anthropic/claude-haiku` +|Low cost, fast + +|Code review, analysis +|`anthropic/claude-sonnet-3.5` +|Balanced quality and cost + +|Complex refactors, architecture +|`anthropic/claude-sonnet-4-5` or `anthropic/claude-opus-4-5` +|High quality for critical work +|=== + +Configure CEL routing in the gateway to automate model selection. + +=== Reduce context window size + +Limit the number of files Cline includes in requests: + +. Create a `.clineignore` file in your workspace root: ++ +[,text] +---- +# Exclude build artifacts +dist/ +build/ +node_modules/ + +# Exclude test files when not testing +**/*.test.js +**/*.spec.ts + +# Exclude documentation +docs/ +*.md +---- + +. Use custom instructions to guide file selection: ++ +[,text] +---- +- Only include files directly related to the task +- Ask which files to include if unsure +- Exclude test files unless specifically working on tests +---- + +=== Use MCP tools instead of large prompts + +Replace long documentation pastes with MCP tools: + +Before (high token cost): + +* User pastes API documentation into Cline chat +* Cline uses documentation to write integration code +* Thousands of tokens used for documentation + +After (low token cost): + +* Configure an MCP tool that searches API documentation +* Cline queries the tool for specific information as needed +* Only relevant sections included in context + +See xref:ai-agents:ai-gateway/mcp-aggregation-guide.adoc[] for MCP tool configuration. + +=== Enable deferred tool loading + +If using multiple MCP servers, enable deferred tool loading in your gateway configuration to reduce token costs by 80-90%. + +This loads only essential tools initially. Cline queries for additional tools on-demand. + +=== Monitor and set spend limits + +Use AI Gateway spend limits to prevent runaway costs: + +. Navigate to your gateway in the Redpanda Cloud Console +. Set monthly spend limit (for example, $500/month) +. Configure alerts before reaching limit + +The gateway automatically blocks requests that would exceed the limit. + +== Next steps + +* xref:ai-agents:ai-gateway/mcp-aggregation-guide.adoc[]: Configure deferred tool loading to reduce token costs +* xref:ai-agents:ai-gateway/cel-routing-cookbook.adoc[]: Use CEL expressions to route Cline requests based on task complexity + +== Related pages + +* xref:ai-agents:ai-gateway/gateway-quickstart.adoc[]: Create and configure your AI Gateway +* xref:ai-agents:ai-gateway/gateway-architecture.adoc[]: Learn about AI Gateway architecture and benefits +* xref:ai-agents:ai-gateway/integrations/claude-code-user.adoc[]: Configure Claude Code with AI Gateway diff --git a/modules/ai-agents/pages/ai-gateway/integrations/continue-admin.adoc b/modules/ai-agents/pages/ai-gateway/integrations/continue-admin.adoc new file mode 100644 index 000000000..6e160c87c --- /dev/null +++ b/modules/ai-agents/pages/ai-gateway/integrations/continue-admin.adoc @@ -0,0 +1,760 @@ += Configure AI Gateway for Continue.dev +:description: Configure Redpanda AI Gateway to support Continue.dev clients. +:page-topic-type: how-to +:personas: platform_admin +:learning-objective-1: Configure AI Gateway endpoints for Continue.dev connectivity +:learning-objective-2: Set up multi-provider backends with native format routing +:learning-objective-3: Deploy MCP tool aggregation for Continue.dev tool discovery + +include::ai-agents:partial$ai-gateway-byoc-note.adoc[] + +Configure Redpanda AI Gateway to support Continue.dev clients accessing multiple LLM providers and MCP tools through flexible, native-format endpoints. + +After reading this page, you will be able to: + +* [ ] Configure AI Gateway endpoints for Continue.dev connectivity. +* [ ] Set up multi-provider backends with native format routing. +* [ ] Deploy MCP tool aggregation for Continue.dev tool discovery. + +== Prerequisites + +* AI Gateway deployed on a BYOC cluster running Redpanda version 25.3 or later +* Administrator access to the AI Gateway UI +* API keys for at least one LLM provider (Anthropic, OpenAI, or others) +* Understanding of xref:ai-agents:ai-gateway/gateway-architecture.adoc[AI Gateway concepts] + +== About Continue.dev + +Continue.dev is a highly configurable open-source AI coding assistant that integrates with VS Code and JetBrains IDEs. Unlike other AI assistants, Continue.dev uses native provider API formats rather than requiring transforms to a unified format. This architectural choice provides maximum flexibility but requires specific gateway configuration. + +Key characteristics: + +* Uses native provider formats (Anthropic format for Anthropic, OpenAI format for OpenAI) +* Supports multiple LLM providers simultaneously with per-provider configuration +* Custom API endpoints via `apiBase` configuration +* Custom headers via `requestOptions.headers` +* Built-in MCP support for tool discovery and execution +* Autocomplete, chat, and inline edit modes + +== Architecture overview + +Continue.dev connects to AI Gateway differently than unified-format clients: + +* Each provider requires a separate backend configured without format transforms +* LLM endpoint: `https://{CLUSTER_ID}.cloud.redpanda.com/ai-gateway/v1/{provider}` (provider-specific paths) +* MCP endpoint: `https://{CLUSTER_ID}.cloud.redpanda.com/ai-gateway/mcp` for tool discovery and execution + +The gateway handles: + +. Authentication via bearer tokens in the `Authorization` header +. Gateway selection via the `rp-aigw-id` header +. Provider-specific request formats without transformation +. Model routing using provider-native model identifiers +. MCP server aggregation for multi-tool workflows +. Request logging and cost tracking per gateway + +== Enable LLM providers + +Continue.dev works with multiple providers. Enable the providers your users will access. + +=== Configure Anthropic + +To enable Anthropic with native format support: + +. Navigate to *AI Gateway* > *Providers* in the Redpanda Cloud console +. Select *Anthropic* from the provider list +. Click *Add configuration* +. Enter your Anthropic API key +. Under *Format*, select *Native Anthropic* (not OpenAI-compatible) +. Click *Save* + +The gateway now accepts Anthropic's native `/v1/messages` format. + +=== Configure OpenAI + +To enable OpenAI: + +. Navigate to *AI Gateway* > *Providers* +. Select *OpenAI* from the provider list +. Click *Add configuration* +. Enter your OpenAI API key +. Under *Format*, select *Native OpenAI* +. Click *Save* + +=== Configure additional providers + +Continue.dev supports many providers. For each provider: + +. Add the provider configuration in the gateway +. Ensure the format is set to the provider's native format +. Do not enable format transforms (Continue.dev handles format differences in its client code) + +Common additional providers: + +* Google Gemini (native Google format) +* Mistral AI (OpenAI-compatible format) +* Together AI (OpenAI-compatible format) +* Ollama (OpenAI-compatible format for local models) + +=== Enable models in the catalog + +After enabling providers, enable specific models: + +. Navigate to *AI Gateway* > *Models* +. Enable the models you want Continue.dev clients to access ++ +Common models for Continue.dev: ++ +* `claude-opus-4-5` (Anthropic, high quality) +* `claude-sonnet-4-5` (Anthropic, balanced) +* `gpt-4o` (OpenAI, high quality) +* `gpt-4o-mini` (OpenAI, fast autocomplete) +* `o1-mini` (OpenAI, reasoning) + +. Click *Save* + +Continue.dev uses provider-native model identifiers (for example, `claude-sonnet-4-5` not `anthropic/claude-sonnet-4-5`). + +== Create a gateway for Continue.dev clients + +Create a dedicated gateway to isolate Continue.dev traffic and apply specific policies. + +=== Gateway configuration + +. Navigate to *AI Gateway* > *Gateways* +. Click *Create Gateway* +. Enter gateway details: ++ +[cols="1,2"] +|=== +|Field |Value + +|Name +|`continue-gateway` (or your preferred name) + +|Workspace +|Select the workspace for access control grouping + +|Description +|Gateway for Continue.dev IDE clients +|=== + +. Click *Create* +. Copy the gateway ID from the gateway details page + +The gateway ID is required in the `rp-aigw-id` header for all requests. + +=== Configure provider-specific backends + +Continue.dev requires separate backend configurations for each provider because it uses native formats. + +==== Anthropic backend + +. Navigate to the gateway's *Backends* tab +. Click *Add Backend* +. Configure: ++ +[cols="1,2"] +|=== +|Field |Value + +|Backend name +|`anthropic-native` + +|Provider +|Anthropic + +|Format +|Native Anthropic (no transform) + +|Path +|`/v1/anthropic` + +|Enabled models +|All Anthropic models you enabled in the catalog +|=== + +. Click *Save* + +Continue.dev will send requests to `https://{CLUSTER_ID}.cloud.redpanda.com/ai-gateway/v1/anthropic` using Anthropic's native format. + +==== OpenAI backend + +. Click *Add Backend* +. Configure: ++ +[cols="1,2"] +|=== +|Field |Value + +|Backend name +|`openai-native` + +|Provider +|OpenAI + +|Format +|Native OpenAI (no transform) + +|Path +|`/v1/openai` + +|Enabled models +|All OpenAI models you enabled in the catalog +|=== + +. Click *Save* + +Continue.dev will send requests to `https://{CLUSTER_ID}.cloud.redpanda.com/ai-gateway/v1/openai` using OpenAI's native format. + +==== Additional provider backends + +Repeat the backend configuration process for each provider: + +* Google Gemini: `/v1/google`, native Google format +* Mistral: `/v1/mistral`, OpenAI-compatible format +* Ollama (if proxying local models): `/v1/ollama`, OpenAI-compatible format + +=== Configure LLM routing + +Set up routing policies for Continue.dev requests. + +==== Per-provider routing + +Configure routing rules that apply to each backend: + +. Navigate to the gateway's *Routing* tab +. For each backend, click *Add Route* +. Configure basic routing: ++ +[source,cel] +---- +true # Matches all requests to this backend +---- + +. Add a primary provider configuration with your Anthropic API key +. (Optional) Add a fallback configuration for redundancy if you have multiple API keys +. Click *Save* + +==== Provider failover + +For providers with multiple API keys, configure failover: + +. In the backend's routing configuration, add multiple provider configurations +. Set failover conditions: ++ +* Rate limits (HTTP 429) +* Timeouts (no response within 30 seconds) +* 5xx errors (provider unavailable) + +. Configure load balancing: Round robin across available keys +. Click *Save* + +Continue.dev requests automatically fail over to healthy API keys when the primary key experiences issues. + +=== Apply rate limits + +Prevent runaway usage from Continue.dev clients: + +. Navigate to the gateway's *Rate Limits* tab +. Configure global limits: ++ +[cols="1,2"] +|=== +|Setting |Recommended Value + +|Global rate limit +|200 requests per minute (Continue.dev autocomplete can generate many requests) + +|Per-user rate limit +|20 requests per minute (if using user identification headers) + +|Per-backend limits +|Vary by provider (autocomplete backends need higher limits) +|=== + +. Click *Save* + +The gateway blocks requests exceeding these limits and returns HTTP 429 errors. + +==== Rate limit considerations for autocomplete + +Continue.dev's autocomplete feature generates frequent, short requests. Configure higher rate limits for autocomplete-specific backends: + +* Autocomplete models (for example, `gpt-4o-mini`): 100 requests per minute per user +* Chat models (for example, `claude-sonnet-4-5`): 20 requests per minute per user + +=== Set spending limits + +Control LLM costs across all providers: + +. Navigate to the gateway's *Spend Limits* tab +. Configure: ++ +[cols="1,2"] +|=== +|Setting |Value + +|Monthly budget +|$10,000 (adjust based on expected usage) + +|Enforcement +|Block requests after budget exceeded + +|Alert threshold +|80% of budget (sends notification) +|=== + +. Click *Save* + +The gateway tracks estimated costs per request across all providers and blocks traffic when the monthly budget is exhausted. + +== Configure MCP tool aggregation + +Enable Continue.dev to discover and use tools from multiple MCP servers through a single endpoint. + +=== Add MCP servers + +. Navigate to the gateway's *MCP* tab +. Click *Add MCP Server* +. Enter server details: ++ +[cols="1,2"] +|=== +|Field |Value + +|Display name +|Descriptive name (for example, `redpanda-data-catalog`, `code-search-tools`) + +|Endpoint URL +|MCP server endpoint (for example, xref:ai-agents:mcp/remote/overview.adoc[Remote MCP server] URL) + +|Authentication +|Bearer token or other authentication mechanism +|=== + +. Click *Save* + +Repeat for each MCP server you want to aggregate. + +=== Enable deferred tool loading + +Reduce token costs for Continue.dev sessions with many available tools: + +. Under *MCP Settings*, enable *Deferred tool loading* +. Click *Save* + +When enabled: + +* Continue.dev initially receives only a search tool and orchestrator tool +* Continue.dev queries for specific tools by name when needed +* Token usage decreases by 80-90% for configurations with many tools + +This is particularly important for Continue.dev because autocomplete and chat modes both use tool discovery. + +=== Add the MCP orchestrator + +The MCP orchestrator reduces multi-step workflows to single calls: + +. Under *MCP Settings*, enable *MCP Orchestrator* +. Configure: ++ +[cols="1,2"] +|=== +|Setting |Value + +|Orchestrator model +|Select a model with strong code generation capabilities (for example, `claude-sonnet-4-5`) + +|Execution timeout +|30 seconds + +|Backend +|Select the Anthropic backend (orchestrator works best with Claude models) +|=== + +. Click *Save* + +Continue.dev can now invoke the orchestrator tool to execute complex, multi-step operations in a single request. + +== Configure authentication + +Continue.dev clients authenticate using bearer tokens. + +=== Generate API tokens + +. Navigate to *Security* > *API Tokens* in the Redpanda Cloud console +. Click *Create Token* +. Enter token details: ++ +[cols="1,2"] +|=== +|Field |Value + +|Name +|`continue-access` + +|Scopes +|`ai-gateway:read`, `ai-gateway:write` + +|Expiration +|Set appropriate expiration based on security policies +|=== + +. Click *Create* +. Copy the token (it appears only once) + +Distribute this token to Continue.dev users through secure channels. + +=== Token rotation + +Implement token rotation for security: + +. Create a new token before the existing token expires +. Distribute the new token to users +. Monitor usage of the old token in (observability dashboard) +. Revoke the old token after all users have migrated + +== Configure Continue.dev clients + +Provide these instructions to users configuring Continue.dev in their IDE. + +=== Configuration file location + +Continue.dev supports both JSON and YAML configuration formats. This guide uses YAML (`config.yaml`) because it supports MCP server configuration and environment variable interpolation: + +* VS Code: `~/.continue/config.yaml` +* JetBrains: `~/.continue/config.yaml` + +NOTE: While `config.json` is still supported for basic LLM configuration, `config.yaml` is required for MCP server integration. + +=== Multi-provider configuration + +Users configure Continue.dev with separate provider entries for each backend: + +[source,yaml] +---- +models: + - title: Claude Sonnet (Redpanda) + provider: anthropic + model: claude-sonnet-4-5 + apiBase: https://{CLUSTER_ID}.cloud.redpanda.com/ai-gateway/v1/anthropic + apiKey: YOUR_API_TOKEN + requestOptions: + headers: + rp-aigw-id: GATEWAY_ID + + - title: GPT-4o (Redpanda) + provider: openai + model: gpt-4o + apiBase: https://{CLUSTER_ID}.cloud.redpanda.com/ai-gateway/v1/openai + apiKey: YOUR_API_TOKEN + requestOptions: + headers: + rp-aigw-id: GATEWAY_ID + + - title: GPT-4o-mini (Autocomplete) + provider: openai + model: gpt-4o-mini + apiBase: https://{CLUSTER_ID}.cloud.redpanda.com/ai-gateway/v1/openai + apiKey: YOUR_API_TOKEN + requestOptions: + headers: + rp-aigw-id: GATEWAY_ID + +tabAutocompleteModel: + title: GPT-4o-mini (Autocomplete) + provider: openai + model: gpt-4o-mini + apiBase: https://{CLUSTER_ID}.cloud.redpanda.com/ai-gateway/v1/openai + apiKey: YOUR_API_TOKEN + requestOptions: + headers: + rp-aigw-id: GATEWAY_ID +---- + +Replace: + +* `{CLUSTER_ID}`: Your Redpanda cluster ID +* `YOUR_API_TOKEN`: The API token generated earlier +* `GATEWAY_ID`: The gateway ID from gateway creation + +=== MCP server configuration + +Configure Continue.dev to connect to the aggregated MCP endpoint. + +==== Recommended: Directory-based configuration + +The preferred method is to create MCP server configuration files in the `~/.continue/mcpServers/` directory: + +. Create the directory: `mkdir -p ~/.continue/mcpServers` +. Create `~/.continue/mcpServers/redpanda-ai-gateway.yaml`: ++ +[source,yaml] +---- +transport: + type: streamable-http + url: https://{CLUSTER_ID}.cloud.redpanda.com/ai-gateway/mcp + headers: + Authorization: Bearer YOUR_API_TOKEN + rp-aigw-id: GATEWAY_ID +---- ++ +IMPORTANT: For production deployments, use environment variable interpolation with `${{ secrets.VARIABLE }}` syntax instead of hardcoding tokens. See xref:ai-agents:ai-gateway/integrations/continue-user.adoc#configure-env-vars[Configure with environment variables] in the user guide for details. + +Continue.dev automatically discovers MCP server configurations in this directory. + +==== Alternative: Inline configuration + +Alternatively, embed MCP server configuration in `~/.continue/config.yaml`: + +[source,yaml] +---- +mcpServers: + - transport: + type: streamable-http + url: https://{CLUSTER_ID}.cloud.redpanda.com/ai-gateway/mcp + headers: + Authorization: Bearer YOUR_API_TOKEN + rp-aigw-id: GATEWAY_ID +---- + +Replace: + +* `{CLUSTER_ID}`: Your Redpanda cluster ID +* `YOUR_API_TOKEN`: The API token generated earlier +* `GATEWAY_ID`: The gateway ID from gateway creation + +This configuration connects Continue.dev to the aggregated MCP endpoint with authentication and gateway identification headers. + +=== Model selection strategy + +Configure different models for different Continue.dev modes: + +[cols="1,2,1"] +|=== +|Mode |Recommended Model |Reason + +|Chat +|`claude-sonnet-4-5` or `gpt-4o` +|High quality for complex questions + +|Autocomplete +|`gpt-4o-mini` +|Fast, cost-effective for frequent requests + +|Inline edit +|`claude-sonnet-4-5` +|Balanced quality and speed for code modifications + +|Embeddings +|`text-embedding-3-small` +|Cost-effective for code search +|=== + +== Monitor Continue.dev usage + +Track Continue.dev activity through gateway observability features. + +=== View request logs + +. Navigate to *AI Gateway* > *Observability* > *Logs* +. Filter by gateway ID: `continue-gateway` +. Review: ++ +* Request timestamps and duration +* Backend and model used per request +* Token usage (prompt and completion tokens) +* Estimated cost per request +* HTTP status codes and errors + +Continue.dev generates different request patterns: + +* Autocomplete: Many short requests with low token counts +* Chat: Longer requests with context and multi-turn conversations +* Inline edit: Medium-length requests with code context + +=== Analyze metrics + +. Navigate to *AI Gateway* > *Observability* > *Metrics* +. Select the Continue.dev gateway +. Review: ++ +[cols="1,2"] +|=== +|Metric |Purpose + +|Request volume by backend +|Identify which providers are most used + +|Token usage by model +|Track consumption patterns (autocomplete vs chat) + +|Estimated spend by backend +|Monitor costs across providers + +|Latency (p50, p95, p99) by backend +|Detect provider-specific performance issues + +|Error rate by backend +|Identify failing providers or misconfigured backends +|=== + + +=== Query logs via API + +Programmatically access logs for integration with monitoring systems: + +[source,bash] +---- +curl https://{CLUSTER_ID}.cloud.redpanda.com/api/ai-gateway/logs \ + -H "Authorization: Bearer YOUR_API_TOKEN" \ + -H "Content-Type: application/json" \ + -d '{ + "gateway_id": "GATEWAY_ID", + "start_time": "2026-01-01T00:00:00Z", + "end_time": "2026-01-14T23:59:59Z", + "limit": 100 + }' +---- + +== Security considerations + +Apply these security best practices for Continue.dev deployments. + +=== Limit token scope + +Create tokens with minimal required scopes: + +* `ai-gateway:read`: Required for MCP tool discovery +* `ai-gateway:write`: Required for LLM requests and tool execution + +Avoid granting broader scopes like `admin` or `cluster:write`. + +=== Implement network restrictions + +If Continue.dev clients connect from known networks, configure network policies: + +. Use cloud provider security groups to restrict access to AI Gateway endpoints +. Allowlist only the IP ranges where Continue.dev clients operate +. Monitor for unauthorized access attempts in request logs + +=== Enforce token expiration + +Set short token lifetimes for high-security environments: + +* Development environments: 90 days +* Production environments: 30 days + +Automate token rotation to reduce manual overhead. + +=== Audit tool access + +Review which MCP tools Continue.dev clients can access: + +. Periodically audit the MCP servers configured in the gateway +. Remove unused or deprecated MCP servers +. Monitor tool execution logs for unexpected behavior + +=== Protect API keys in configuration + +Continue.dev stores the API token in plain text in `config.yaml`. Remind users to: + +* Never commit `config.yaml` to version control +* Use file system permissions to restrict access (for example, `chmod 600 ~/.continue/config.yaml`) +* Rotate tokens if they suspect compromise + +== Troubleshooting + +Common issues and solutions when configuring AI Gateway for Continue.dev. + +=== Continue.dev cannot connect to gateway + +Symptom: Connection errors when Continue.dev tries to discover tools or send LLM requests. + +Causes and solutions: + +* **Invalid gateway ID**: Verify the `rp-aigw-id` header matches the gateway ID from the console +* **Expired token**: Generate a new API token and update the Continue.dev configuration +* **Wrong backend path**: Verify `apiBase` matches the backend path (for example, `/v1/anthropic` not `/v1`) +* **Network connectivity**: Verify the cluster endpoint is accessible from the client network +* **Provider not enabled**: Ensure at least one backend is configured with models enabled + +=== Model not found errors + +Symptom: Continue.dev shows "model not found" or similar errors. + +Causes and solutions: + +* **Model not enabled in catalog**: Enable the model in the gateway's model catalog +* **Model identifier mismatch**: Use provider-native names (for example, `claude-sonnet-4-5` not `anthropic/claude-sonnet-4-5`) +* **Wrong backend for model**: Verify the model is associated with the correct backend (Anthropic models with Anthropic backend) + +=== Format errors or unexpected responses + +Symptom: Responses are malformed or Continue.dev reports format errors. + +Causes and solutions: + +* **Transform enabled on backend**: Ensure backend format is set to native (no OpenAI-compatible transform for Anthropic) +* **Wrong provider for apiBase**: Verify Continue.dev's `provider` field matches the backend's provider +* **Headers not passed**: Confirm `requestOptions.headers` includes `rp-aigw-id` + +=== Autocomplete not working or slow + +Symptom: Autocomplete suggestions don't appear or are delayed. + +Causes and solutions: + +* **Wrong model for autocomplete**: Use a fast model like `gpt-4o-mini` in `tabAutocompleteModel` +* **Rate limits too restrictive**: Increase rate limits for autocomplete backend +* **High backend latency**: Check backend metrics and consider provider failover +* **Token exhaustion**: Verify spending limits haven't been reached + +=== Tools not appearing in Continue.dev + +Symptom: Continue.dev does not discover MCP tools. + +Causes and solutions: + +* **MCP configuration missing**: Ensure `mcpServers` is configured +* **MCP servers not configured in gateway**: Add MCP server endpoints in the gateway's MCP tab +* **Deferred loading enabled but search failing**: Check that the search tool is correctly configured +* **MCP server authentication failing**: Verify MCP server authentication credentials in the gateway configuration + +=== High costs or token usage + +Symptom: Token usage and costs exceed expectations. + +Causes and solutions: + +* **Autocomplete using expensive model**: Configure `tabAutocompleteModel` to use `gpt-4o-mini` instead of larger models +* **Deferred tool loading disabled**: Enable deferred tool loading to reduce tokens by 80-90% +* **No rate limits**: Apply per-minute rate limits to prevent runaway usage +* **Missing spending limits**: Set monthly budget limits with blocking enforcement +* **Chat using wrong model**: Route chat requests to cost-effective models (for example, `claude-sonnet-4-5` instead of `claude-opus-4-5`) + +=== Requests failing with 429 errors + +Symptom: Continue.dev receives HTTP 429 Too Many Requests errors. + +Causes and solutions: + +* **Rate limit exceeded**: Review and increase rate limits if usage is legitimate (autocomplete needs higher limits) +* **Upstream provider rate limits**: Check if the upstream LLM provider is rate-limiting; configure failover to alternate API keys +* **Budget exhausted**: Verify monthly spending limit has not been reached + +=== Different results from different providers + +Symptom: Same prompt produces different results when switching providers. + +This is expected behavior, not a configuration issue: + +* Different models have different capabilities and response styles +* Continue.dev uses native formats, which may include provider-specific parameters +* Users should select the appropriate model for their task (quality vs speed vs cost) + +== Next steps + +* xref:ai-agents:ai-gateway/cel-routing-cookbook.adoc[]: Implement advanced routing rules +* xref:ai-agents:mcp/remote/overview.adoc[]: Deploy Remote MCP servers for custom tools diff --git a/modules/ai-agents/pages/ai-gateway/integrations/continue-user.adoc b/modules/ai-agents/pages/ai-gateway/integrations/continue-user.adoc new file mode 100644 index 000000000..b8f282021 --- /dev/null +++ b/modules/ai-agents/pages/ai-gateway/integrations/continue-user.adoc @@ -0,0 +1,942 @@ += Configure Continue.dev with AI Gateway +:description: Configure Continue.dev to use Redpanda AI Gateway for unified LLM access, MCP tool integration, and AI-assisted coding. +:page-topic-type: how-to +:personas: ai_agent_developer, app_developer +:learning-objective-1: Configure Continue.dev to connect to AI Gateway for chat and autocomplete +:learning-objective-2: Set up MCP server integration through AI Gateway +:learning-objective-3: Optimize Continue.dev settings for cost and performance + +include::ai-agents:partial$ai-gateway-byoc-note.adoc[] + +After xref:ai-agents:ai-gateway/gateway-quickstart.adoc[configuring your AI Gateway], set up Continue.dev to route LLM requests and access MCP tools through the gateway's unified endpoints. + +After reading this page, you will be able to: + +* [ ] Configure Continue.dev to connect to AI Gateway for chat and autocomplete. +* [ ] Set up MCP server integration through AI Gateway. +* [ ] Optimize Continue.dev settings for cost and performance. + +== Prerequisites + +Before configuring Continue.dev, ensure you have: + +* Continue.dev extension installed in your code editor: +** VS Code: Search for "Continue" in Extensions +** JetBrains IDEs: Install from the JetBrains Marketplace +* An active Redpanda AI Gateway with: +** At least one LLM provider enabled (see xref:ai-agents:ai-gateway/gateway-quickstart.adoc#step-1-enable-a-provider[Enable a provider]) +** A gateway created and configured (see xref:ai-agents:ai-gateway/gateway-quickstart.adoc#step-3-create-a-gateway[Create a gateway]) +* Your AI Gateway credentials: +** Gateway endpoint URL (for example, `https://gw.ai.panda.com`) +** Gateway ID (for example, `gateway-abc123`) +** API key with access to the gateway + +== About Continue.dev + +Continue.dev is an open-source AI coding assistant that integrates with VS Code and JetBrains IDEs. It provides: + +* Chat interface for code questions and generation +* Tab autocomplete powered by LLMs +* Codebase indexing for context-aware suggestions +* Slash commands for common workflows +* Extensible architecture with custom context providers + +By routing Continue.dev through AI Gateway, you gain centralized observability, cost controls, and the ability to aggregate multiple MCP servers into a single interface. + +== Configuration files + +Continue.dev supports two configuration file formats: + +* `config.json` (legacy format) +* `config.yaml` (recommended format) + +Both files are stored in the same location: + +* VS Code: `~/.continue/` +* JetBrains: `~/.continue/` + +Create the directory if it doesn't exist: + +[,bash] +---- +mkdir -p ~/.continue +---- + +=== Choose a configuration format + +[cols="1,2,2"] +|=== +|Format |Use when |Limitations + +|`config.json` +|You need basic LLM configuration without MCP servers +|Does not support MCP server configuration or environment variable interpolation + +|`config.yaml` +|You need MCP server integration or environment variable interpolation +|Requires Continue.dev version that supports YAML (recent versions) +|=== + +TIP: Use `config.yaml` for new setups to take advantage of MCP server integration and the `${{ secrets.* }}` environment variable syntax. + +== Basic configuration + +Create or edit `~/.continue/config.json` with the following structure to connect to AI Gateway: + +[,json] +---- +{ + "models": [ + { + "title": "Redpanda AI Gateway - Claude", + "provider": "anthropic", + "model": "claude-sonnet-4-5", + "apiKey": "YOUR_REDPANDA_API_KEY", + "apiBase": "https://gw.ai.panda.com", + "requestOptions": { + "headers": { + "rp-aigw-id": "GATEWAY_ID" + } + } + } + ] +} +---- + +Replace placeholder values: + +* `YOUR_REDPANDA_API_KEY` - Your Redpanda API key +* `GATEWAY_ID` - Your gateway ID from the AI Gateway UI + +The `provider` field tells Continue.dev which SDK to use (Anthropic format), while `apiBase` routes the request through your gateway. The gateway then forwards the request to the appropriate provider based on the model name. + +== Configure multiple models + +Continue.dev can switch between different models for different tasks. Configure multiple models to optimize for quality and cost: + +[,json] +---- +{ + "models": [ + { + "title": "Gateway - Claude Sonnet (default)", + "provider": "anthropic", + "model": "claude-sonnet-4-5", + "apiKey": "YOUR_REDPANDA_API_KEY", + "apiBase": "https://gw.ai.panda.com", + "requestOptions": { + "headers": { + "rp-aigw-id": "GATEWAY_ID" + } + } + }, + { + "title": "Gateway - Claude Opus (complex tasks)", + "provider": "anthropic", + "model": "claude-opus-4-5", + "apiKey": "YOUR_REDPANDA_API_KEY", + "apiBase": "https://gw.ai.panda.com", + "requestOptions": { + "headers": { + "rp-aigw-id": "GATEWAY_ID" + } + } + }, + { + "title": "Gateway - GPT-4o", + "provider": "openai", + "model": "gpt-4o", + "apiKey": "YOUR_REDPANDA_API_KEY", + "apiBase": "https://gw.ai.panda.com", + "requestOptions": { + "headers": { + "rp-aigw-id": "GATEWAY_ID" + } + } + } + ] +} +---- + +Switch between models in Continue.dev's chat interface by clicking the model selector dropdown. + +== Configure tab autocomplete + +Continue.dev supports a separate model for tab autocomplete, which generates code suggestions as you type. Use a faster, cost-effective model for autocomplete: + +[,json] +---- +{ + "models": [ + { + "title": "Gateway - Claude Sonnet", + "provider": "anthropic", + "model": "claude-sonnet-4-5", + "apiKey": "YOUR_REDPANDA_API_KEY", + "apiBase": "https://gw.ai.panda.com", + "requestOptions": { + "headers": { + "rp-aigw-id": "GATEWAY_ID" + } + } + } + ], + "tabAutocompleteModel": { + "title": "Gateway - Claude Haiku (autocomplete)", + "provider": "anthropic", + "model": "claude-haiku", + "apiKey": "YOUR_REDPANDA_API_KEY", + "apiBase": "https://gw.ai.panda.com", + "requestOptions": { + "headers": { + "rp-aigw-id": "GATEWAY_ID" + } + } + } +} +---- + +This configuration uses Claude Sonnet for chat interactions and Claude Haiku for autocomplete. Haiku provides faster responses at lower cost, which is ideal for autocomplete where speed matters more than reasoning depth. + +== Configure with OpenAI provider format + +AI Gateway supports both native provider formats and OpenAI-compatible format. If you prefer using the OpenAI format for all models, configure Continue.dev with the `openai` provider: + +[,json] +---- +{ + "models": [ + { + "title": "Gateway - Claude Sonnet (OpenAI format)", + "provider": "openai", + "model": "anthropic/claude-sonnet-4-5", + "apiKey": "YOUR_REDPANDA_API_KEY", + "apiBase": "https://gw.ai.panda.com/v1", + "requestOptions": { + "headers": { + "rp-aigw-id": "GATEWAY_ID" + } + } + }, + { + "title": "Gateway - GPT-4o (OpenAI format)", + "provider": "openai", + "model": "openai/gpt-4o", + "apiKey": "YOUR_REDPANDA_API_KEY", + "apiBase": "https://gw.ai.panda.com/v1", + "requestOptions": { + "headers": { + "rp-aigw-id": "GATEWAY_ID" + } + } + } + ] +} +---- + +When using OpenAI provider format: + +* Set `provider` to `"openai"` +* Add `/v1` to the `apiBase` URL +* Use the `vendor/model_id` format for model names (for example, `anthropic/claude-sonnet-4-5`) + +== Configure MCP server integration + +Connect Continue.dev to your AI Gateway's MCP endpoint to aggregate tools from multiple MCP servers. + +Add the MCP configuration to `config.yaml`: + +[,yaml] +---- +models: + - title: Gateway - Claude Sonnet + provider: anthropic + model: claude-sonnet-4-5 + apiKey: YOUR_REDPANDA_API_KEY + apiBase: https://gw.ai.panda.com + requestOptions: + headers: + rp-aigw-id: GATEWAY_ID + +mcpServers: + - transport: + type: streamable-http + url: https://gw.ai.panda.com/mcp + headers: + Authorization: Bearer YOUR_REDPANDA_API_KEY + rp-aigw-id: GATEWAY_ID +---- + +After adding this configuration: + +. Restart Continue.dev (reload your editor window) +. Click the tools icon in the Continue.dev sidebar +. Verify that tools from your configured MCP servers appear + +If using deferred tool loading in your gateway, you'll see a search tool and MCP orchestrator tool instead of all tools upfront. + +[[configure-env-vars]] +== Configure with environment variables + +For sensitive credentials or multi-environment setups, use Continue.dev's secrets interpolation in `config.yaml`. + +IMPORTANT: Environment variable interpolation is only supported in `config.yaml` files. The `config.json` format does not support any form of variable substitution - all values must be hardcoded. + +[,yaml] +---- +models: + - title: Gateway - Claude Sonnet + provider: anthropic + model: claude-sonnet-4-5 + apiKey: ${{ secrets.REDPANDA_API_KEY }} + apiBase: ${{ secrets.REDPANDA_GATEWAY_URL }} + requestOptions: + headers: + rp-aigw-id: ${{ secrets.REDPANDA_GATEWAY_ID }} + +mcpServers: + - transport: + type: streamable-http + url: ${{ secrets.REDPANDA_GATEWAY_URL }}/mcp + headers: + Authorization: Bearer ${{ secrets.REDPANDA_API_KEY }} + rp-aigw-id: ${{ secrets.REDPANDA_GATEWAY_ID }} +---- + +IMPORTANT: Continue.dev uses the `${{ secrets.* }}` syntax for interpolation in `config.yaml`. Do not use the `${VAR}` shell syntax - Continue.dev treats it as a literal string rather than performing substitution. + +Set secrets in Continue.dev settings: + +. Open Continue.dev settings in your IDE +. Navigate to the "Secrets" section +. Add the following secrets: ++ +* `REDPANDA_GATEWAY_URL`: `https://gw.ai.panda.com` +* `REDPANDA_GATEWAY_ID`: `gateway-abc123` +* `REDPANDA_API_KEY`: `your-api-key` + +== Project-level configuration + +Override global settings for specific projects by creating `.continuerc.json` in your project root: + +[,json] +---- +{ + "models": [ + { + "title": "Project Gateway - Claude Haiku", + "provider": "anthropic", + "model": "claude-haiku", + "apiKey": "your_project_api_key_here", + "apiBase": "https://gw.project.ai.panda.com", + "requestOptions": { + "headers": { + "rp-aigw-id": "your_project_gateway_id_here" + } + } + } + ] +} +---- + +IMPORTANT: `.continuerc.json` does not support environment variable interpolation. You must hardcode values in this file. For dynamic configuration, use `~/.continue/config.yaml` with `${{ secrets.* }}` syntax (see <>) or create a `~/.continue/config.ts` file for programmatic environment access. + +Project-level configuration takes precedence over global configuration. Use this to: + +* Route different projects through different gateways +* Use cost-effective models for internal projects +* Use premium models for customer-facing projects +* Separate billing between projects + +== Verify configuration + +After configuring Continue.dev, verify it connects correctly to your AI Gateway. + +=== Test chat interface + +. Open Continue.dev sidebar in your editor +. Type a simple question: "What does this function do?" (with a file open) +. Wait for response + +Then verify in the AI Gateway dashboard: + +. Open the Redpanda Cloud Console +. Navigate to your gateway's observability dashboard +. Filter by gateway ID +. Verify: +** Request appears in logs +** Model shows correct format (for example, `claude-sonnet-4-5` for Anthropic native or `anthropic/claude-sonnet-4-5` for OpenAI format) +** Token usage and cost are recorded + +If the request doesn't appear, see <>. + +=== Test tab autocomplete + +. Open a code file in your editor +. Start typing a function or class definition +. Wait for autocomplete suggestions to appear + +Autocomplete requests also appear in the gateway dashboard, typically with: + +* Lower token counts than chat requests +* Higher request frequency +* The autocomplete model you configured + +=== Test MCP tool integration + +If you configured MCP servers: + +. Open Continue.dev chat +. Ask a question that requires a tool: "What's the weather forecast?" +. Continue.dev should: +** Discover the tool from the MCP server +** Invoke it with correct parameters +** Return the result + +Check the gateway dashboard for MCP tool invocation logs. + +== Advanced configuration + +=== Custom request headers + +Add custom headers for request tracking or routing: + +[,json] +---- +{ + "models": [ + { + "title": "Gateway - Claude Sonnet", + "provider": "anthropic", + "model": "claude-sonnet-4-5", + "apiKey": "YOUR_REDPANDA_API_KEY", + "apiBase": "https://gw.ai.panda.com", + "requestOptions": { + "headers": { + "rp-aigw-id": "GATEWAY_ID", + "x-user-id": "developer-123", + "x-project": "main-app" + } + } + } + ] +} +---- + +Use these headers with gateway CEL routing to: + +* Track costs per developer +* Route based on project type +* Apply different rate limits per user + +=== Temperature and max tokens + +Configure model parameters for different behaviors: + +[,json] +---- +{ + "models": [ + { + "title": "Gateway - Precise (low temperature)", + "provider": "anthropic", + "model": "claude-sonnet-4-5", + "apiKey": "YOUR_REDPANDA_API_KEY", + "apiBase": "https://gw.ai.panda.com", + "requestOptions": { + "headers": { + "rp-aigw-id": "GATEWAY_ID" + } + }, + "completionOptions": { + "temperature": 0.2, + "maxTokens": 2048 + } + }, + { + "title": "Gateway - Creative (high temperature)", + "provider": "anthropic", + "model": "claude-sonnet-4-5", + "apiKey": "YOUR_REDPANDA_API_KEY", + "apiBase": "https://gw.ai.panda.com", + "requestOptions": { + "headers": { + "rp-aigw-id": "GATEWAY_ID" + } + }, + "completionOptions": { + "temperature": 0.8, + "maxTokens": 4096 + } + } + ] +} +---- + +* Lower temperature (0.0-0.3): More deterministic, better for code generation +* Higher temperature (0.7-1.0): More creative, better for brainstorming +* `maxTokens`: Limit response length to control costs + +=== Context providers + +Configure which code context Continue.dev includes in requests: + +[,json] +---- +{ + "models": [ + { + "title": "Gateway - Claude Sonnet", + "provider": "anthropic", + "model": "claude-sonnet-4-5", + "apiKey": "YOUR_REDPANDA_API_KEY", + "apiBase": "https://gw.ai.panda.com", + "requestOptions": { + "headers": { + "rp-aigw-id": "GATEWAY_ID" + } + } + } + ], + "contextProviders": [ + { + "name": "code", + "params": { + "maxFiles": 5 + } + }, + { + "name": "diff" + }, + { + "name": "terminal" + } + ] +} +---- + +Available context providers: + +* `code`: Includes open files and highlighted code +* `diff`: Includes git diff of current changes +* `terminal`: Includes recent terminal output +* `problems`: Includes editor warnings and errors +* `folder`: Includes file tree structure + +Limiting context providers reduces token usage and costs. + +=== Slash commands + +Configure custom slash commands for common workflows: + +[,json] +---- +{ + "models": [ + { + "title": "Gateway - Claude Sonnet", + "provider": "anthropic", + "model": "claude-sonnet-4-5", + "apiKey": "YOUR_REDPANDA_API_KEY", + "apiBase": "https://gw.ai.panda.com", + "requestOptions": { + "headers": { + "rp-aigw-id": "GATEWAY_ID" + } + } + } + ], + "slashCommands": [ + { + "name": "review", + "description": "Review code for bugs and improvements", + "prompt": "Review this code for potential bugs, performance issues, and suggest improvements. Focus on:\n- Error handling\n- Edge cases\n- Code clarity\n\n{{{ input }}}" + }, + { + "name": "test", + "description": "Generate unit tests", + "prompt": "Generate comprehensive unit tests for this code. Include:\n- Happy path tests\n- Edge case tests\n- Error handling tests\n\n{{{ input }}}" + } + ] +} +---- + +Use slash commands in Continue.dev chat: + +* `/review` - Triggers code review prompt +* `/test` - Generates tests + +Custom commands help standardize prompts across teams and reduce token costs by avoiding repetitive instruction typing. + +[[troubleshooting]] +== Troubleshooting + +=== Continue.dev shows connection error + +**Symptom**: Continue.dev displays "Failed to connect" or requests return errors. + +**Causes and solutions**: + +. **Incorrect apiBase URL** ++ +Verify the URL format matches your provider choice: ++ +[,text] +---- +# Anthropic/native format (no /v1) +"apiBase": "https://gw.ai.panda.com" + +# OpenAI format (with /v1) +"apiBase": "https://gw.ai.panda.com/v1" +---- + +. **Provider mismatch** ++ +Ensure the `provider` field matches the API format you're using: ++ +* Native Anthropic: `"provider": "anthropic"` with no `/v1` in URL +* Native OpenAI: `"provider": "openai"` with `/v1` in URL +* OpenAI-compatible: `"provider": "openai"` with `/v1` in URL + +. **Authentication failure** ++ +Verify your API key is valid: ++ +[,bash] +---- +curl -H "Authorization: Bearer YOUR_API_KEY" \ + -H "rp-aigw-id: GATEWAY_ID" \ + https://gw.ai.panda.com/v1/models +---- ++ +You should receive a list of available models. If you get `401 Unauthorized`, regenerate your API key in the Redpanda Cloud Console. + +. **Gateway ID mismatch** ++ +Check that the `rp-aigw-id` header matches your gateway ID exactly (case-sensitive). Copy it directly from the AI Gateway UI. + +. **Invalid JSON syntax** ++ +Validate your `config.json` file: ++ +[,bash] +---- +python3 -m json.tool ~/.continue/config.json +---- ++ +Fix any syntax errors reported. + +=== Autocomplete not working + +**Symptom**: Tab autocomplete suggestions don't appear or are very slow. + +**Causes and solutions**: + +. **No autocomplete model configured** ++ +Verify `tabAutocompleteModel` is set in `config.json`. If missing, Continue.dev may fall back to chat model, which is slower and more expensive. + +. **Model too slow** ++ +Use a faster model for autocomplete: ++ +[,json] +---- +{ + "tabAutocompleteModel": { + "title": "Gateway - Claude Haiku", + "provider": "anthropic", + "model": "claude-haiku", + "apiKey": "YOUR_API_KEY", + "apiBase": "https://gw.ai.panda.com", + "requestOptions": { + "headers": { + "rp-aigw-id": "GATEWAY_ID" + } + } + } +} +---- + +. **Network latency** ++ +Check gateway latency in the observability dashboard. If p95 latency is over 500ms, autocomplete will feel slow. Consider: ++ +* Using a gateway in a closer geographic region +* Switching to a faster model (Haiku over Sonnet) + +. **Autocomplete disabled** ++ +Check Continue.dev settings in your editor: ++ +* VS Code: Settings → Continue → Enable Tab Autocomplete +* JetBrains: Settings → Tools → Continue → Enable Autocomplete + +=== MCP tools not appearing + +**Symptom**: Continue.dev doesn't show tools from the MCP server. + +**Causes and solutions**: + +. **MCP configuration missing** ++ +Verify the `mcpServers` section exists in `config.yaml`. + +. **Incorrect MCP endpoint** ++ +The MCP URL should be `{gateway-url}/mcp`: ++ +[,text] +---- +# Correct +"url": "https://gw.ai.panda.com/mcp" + +# Incorrect +"url": "https://gw.ai.panda.com" +---- + +. **No MCP servers in gateway** ++ +Verify your gateway has at least one MCP server configured in the AI Gateway UI. + +. **Deferred tool loading enabled** ++ +If deferred tool loading is enabled, you'll see only a search tool initially. This is expected behavior. + +. **Editor restart needed** ++ +MCP configuration changes require reloading the editor window: ++ +* VS Code: Command Palette → Developer: Reload Window +* JetBrains: File → Invalidate Caches / Restart + +=== Requests not appearing in gateway dashboard + +**Symptom**: Continue.dev works, but requests don't appear in the AI Gateway observability dashboard. + +**Causes and solutions**: + +. **Wrong gateway ID** ++ +Verify that the `rp-aigw-id` header matches the gateway you're viewing in the dashboard. + +. **Missing header** ++ +Ensure the `rp-aigw-id` header is in the `requestOptions.headers` section, not at the top level. + +. **Using direct provider connection** ++ +If `apiBase` points directly to a provider (for example, `https://api.anthropic.com`), requests won't route through the gateway. Verify it points to your gateway endpoint. + +. **Log ingestion delay** ++ +Gateway logs can take 5-10 seconds to appear in the dashboard. Wait briefly and refresh. + +=== High token costs + +**Symptom**: Continue.dev uses more tokens than expected, resulting in high costs. + +**Causes and solutions**: + +. **Too much context included** ++ +Continue.dev may be including too many files. Solutions: ++ +* Limit `maxFiles` in context providers +* Use `.continueignore` file to exclude unnecessary directories +* Close unused editor tabs before using Continue.dev + +. **Autocomplete using expensive model** ++ +Verify you're using a cost-effective model for autocomplete: ++ +[,json] +---- +{ + "tabAutocompleteModel": { + "provider": "anthropic", + "model": "claude-haiku" + } +} +---- + +. **Model parameters too high** ++ +Reduce `maxTokens` in `completionOptions` to limit response length: ++ +[,json] +---- +{ + "completionOptions": { + "maxTokens": 2048 + } +} +---- + +. **MCP overhead** ++ +If not using deferred tool loading, all tools load with every request. Enable deferred tool loading in your AI Gateway configuration (see xref:ai-agents:ai-gateway/mcp-aggregation-guide.adoc[]). + +=== Configuration changes not taking effect + +**Symptom**: Changes to `config.json` don't apply. + +**Solutions**: + +. **Reload editor window** ++ +Configuration changes require reloading: ++ +* VS Code: Command Palette → Developer: Reload Window +* JetBrains: File → Invalidate Caches / Restart + +. **Invalid JSON syntax** ++ +Validate JSON syntax: ++ +[,bash] +---- +python3 -m json.tool ~/.continue/config.json +---- + +. **Project config overriding** ++ +Check if `.continuerc.json` in your project root overrides global settings. + +. **File permissions** ++ +Verify Continue.dev can read the config file: ++ +[,bash] +---- +ls -la ~/.continue/config.json +---- ++ +Fix permissions if needed: ++ +[,bash] +---- +chmod 600 ~/.continue/config.json +---- + +== Cost optimization tips + +=== Use different models for chat and autocomplete + +Chat interactions benefit from reasoning depth, while autocomplete needs speed: + +[,json] +---- +{ + "models": [ + { + "title": "Gateway - Claude Sonnet", + "provider": "anthropic", + "model": "claude-sonnet-4-5" + } + ], + "tabAutocompleteModel": { + "title": "Gateway - Claude Haiku", + "provider": "anthropic", + "model": "claude-haiku" + } +} +---- + +This can reduce costs by 5-10x for autocomplete while maintaining quality for chat. + +=== Limit context window size + +Reduce the amount of code included in requests: + +Create `.continueignore` in your project root: + +[,text] +---- +# Exclude build artifacts +dist/ +build/ +node_modules/ + +# Exclude tests when not working on tests +**/*.test.* +**/*.spec.* + +# Exclude documentation +docs/ +*.md + +# Exclude large data files +*.json +*.csv +---- + +Then limit files in `config.json`: + +[,json] +---- +{ + "contextProviders": [ + { + "name": "code", + "params": { + "maxFiles": 3 + } + } + ] +} +---- + +=== Use MCP tools for documentation + +Instead of pasting documentation into chat, create MCP tools that fetch relevant sections on-demand. This reduces token costs by including only needed information. + +=== Monitor usage patterns + +Use the AI Gateway dashboard to identify optimization opportunities: + +. Navigate to your gateway's observability dashboard +. Filter by Continue.dev requests (use custom header if configured) +. Analyze: +** Token usage per request type (chat vs autocomplete) +** Most expensive queries +** High-frequency low-value requests + +=== Set model-specific limits + +Prevent runaway costs by configuring `maxTokens`: + +[,json] +---- +{ + "models": [ + { + "title": "Gateway - Claude Sonnet", + "provider": "anthropic", + "model": "claude-sonnet-4-5", + "completionOptions": { + "maxTokens": 2048 + } + } + ], + "tabAutocompleteModel": { + "completionOptions": { + "maxTokens": 256 + } + } +} +---- + +Autocomplete rarely needs more than 256 tokens, while chat responses can vary. + +== Next steps + +* xref:ai-agents:ai-gateway/mcp-aggregation-guide.adoc[]: Configure deferred tool loading to reduce token costs +* xref:ai-agents:ai-gateway/cel-routing-cookbook.adoc[]: Use CEL expressions to route Continue.dev requests based on context + +== Related pages + +* xref:ai-agents:ai-gateway/gateway-quickstart.adoc[]: Create and configure your AI Gateway +* xref:ai-agents:ai-gateway/gateway-architecture.adoc[]: Learn about AI Gateway architecture and benefits +* xref:ai-agents:ai-gateway/integrations/claude-code-user.adoc[]: Configure Claude Code with AI Gateway +* xref:ai-agents:ai-gateway/integrations/cline-user.adoc[]: Configure Cline with AI Gateway diff --git a/modules/ai-agents/pages/ai-gateway/integrations/cursor-admin.adoc b/modules/ai-agents/pages/ai-gateway/integrations/cursor-admin.adoc new file mode 100644 index 000000000..cfaa68595 --- /dev/null +++ b/modules/ai-agents/pages/ai-gateway/integrations/cursor-admin.adoc @@ -0,0 +1,814 @@ += Configure AI Gateway for Cursor IDE +:description: Configure Redpanda AI Gateway to support Cursor IDE clients. +:page-topic-type: how-to +:personas: platform_admin +:learning-objective-1: Configure AI Gateway endpoints for Cursor IDE connectivity +:learning-objective-2: Set up OpenAI-compatible transforms for multi-provider routing +:learning-objective-3: Deploy multi-tenant authentication strategies for Cursor clients + +include::ai-agents:partial$ai-gateway-byoc-note.adoc[] + +Configure Redpanda AI Gateway to support Cursor IDE clients accessing multiple LLM providers and MCP tools through OpenAI-compatible endpoints. + +After reading this page, you will be able to: + +* [ ] Configure AI Gateway endpoints for Cursor IDE connectivity. +* [ ] Set up OpenAI-compatible transforms for multi-provider routing. +* [ ] Deploy multi-tenant authentication strategies for Cursor clients. + +== Prerequisites + +* AI Gateway deployed on a BYOC cluster running Redpanda version 25.3 or later +* Administrator access to the AI Gateway UI +* API keys for at least one LLM provider (Anthropic, OpenAI, or others) +* Understanding of xref:ai-agents:ai-gateway/gateway-architecture.adoc[AI Gateway concepts] + +== About Cursor IDE + +Cursor is an AI-powered code editor built on VS Code that integrates multiple LLM providers for code completion, chat, and inline editing. Unlike other AI assistants, Cursor uses OpenAI's API format for all providers and routes to different models using a `vendor/model` prefix notation. + +Key characteristics: + +* Sends all requests in OpenAI-compatible format to `/v1/chat/completions` +* Routes using model prefixes (for example, `openai/gpt-4o`, `anthropic/claude-sonnet-4-5`) +* Limited support for custom headers (makes multi-tenant deployments challenging) +* Supports MCP protocol with a 40-tool limit +* Built-in code completion and chat modes +* Configuration via settings file (`~/.cursor/config.json`) + +== Architecture overview + +Cursor IDE connects to AI Gateway through standardized endpoints: + +* LLM endpoint: `https://{CLUSTER_ID}.cloud.redpanda.com/ai-gateway/v1/chat/completions` for all providers +* MCP endpoint: `https://{CLUSTER_ID}.cloud.redpanda.com/ai-gateway/mcp` for tool discovery and execution + +The gateway handles: + +. Authentication via bearer tokens in the `Authorization` header +. Gateway selection via the `rp-aigw-id` header (requires workarounds for multi-tenant scenarios) +. Model routing using vendor prefixes (for example, `anthropic/claude-sonnet-4-5`) +. Format transforms from OpenAI format to provider-native formats (for Anthropic, Google, etc.) +. MCP server aggregation for multi-tool workflows +. Request logging and cost tracking per gateway + +== Enable LLM providers + +Cursor IDE works with multiple providers through OpenAI-compatible transforms. Enable the providers your users will access. + +=== Configure Anthropic with OpenAI-compatible format + +Cursor sends OpenAI-formatted requests but can route to Anthropic models. Configure the gateway to transform these requests: + +. Navigate to *AI Gateway* > *Providers* in the Redpanda Cloud console +. Select *Anthropic* from the provider list +. Click *Add configuration* +. Enter your Anthropic API key +. Under *Format*, select *OpenAI-compatible* (enables automatic transform) +. Click *Save* + +The gateway now transforms OpenAI-format requests to Anthropic's native `/v1/messages` format. + +=== Configure OpenAI + +To enable OpenAI as a provider: + +. Navigate to *AI Gateway* > *Providers* +. Select *OpenAI* from the provider list +. Click *Add configuration* +. Enter your OpenAI API key +. Under *Format*, select *Native OpenAI* +. Click *Save* + +=== Configure additional providers + +Cursor supports many providers through OpenAI-compatible transforms. For each provider: + +. Add the provider configuration in the gateway +. Set the format to *OpenAI-compatible* (the gateway handles format transformation) +. Enable the transform layer to convert OpenAI request format to the provider's native format + +Common additional providers: + +* Google Gemini (requires OpenAI-compatible transform) +* Mistral AI (already OpenAI-compatible format) +* Together AI (already OpenAI-compatible format) + +=== Enable models in the catalog + +After enabling providers, enable specific models: + +. Navigate to *AI Gateway* > *Models* +. Enable the models you want Cursor clients to access ++ +Common models for Cursor: ++ +* `anthropic/claude-opus-4-5` +* `anthropic/claude-sonnet-4-5` +* `openai/gpt-4o` +* `openai/gpt-4o-mini` +* `openai/o1-mini` + +. Click *Save* + +Cursor uses the `vendor/model_id` format in requests. The gateway maps these to provider endpoints and applies the appropriate format transforms. + +== Create a gateway for Cursor clients + +Create a dedicated gateway to isolate Cursor traffic and apply specific policies. + +=== Gateway configuration + +. Navigate to *AI Gateway* > *Gateways* +. Click *Create Gateway* +. Enter gateway details: ++ +[cols="1,2"] +|=== +|Field |Value + +|Name +|`cursor-gateway` (or your preferred name) + +|Workspace +|Select the workspace for access control grouping + +|Description +|Gateway for Cursor IDE clients +|=== + +. Click *Create* +. Copy the gateway ID from the gateway details page + +The gateway ID is required in the `rp-aigw-id` header for all requests. + +=== Configure unified LLM routing + +Cursor sends all requests to a single endpoint (`/v1/chat/completions`) and uses model prefixes for routing. Configure the gateway to route based on the requested model prefix. + +==== Model prefix routing + +Configure routing that inspects the model field to determine the target provider: + +. Navigate to the gateway's *LLM* tab +. Under *Routing*, click *Add route* +. Configure Anthropic routing: ++ +[source,cel] +---- +request.body.model.startsWith("anthropic/") +---- + +. Add a *Primary provider pool*: ++ +* Provider: Anthropic +* Model: All enabled Anthropic models +* Transform: OpenAI to Anthropic +* Load balancing: Round robin (if multiple Anthropic configurations exist) + +. Click *Save* +. Add another route for OpenAI: ++ +[source,cel] +---- +request.body.model.startsWith("openai/") +---- + +. Add a *Primary provider pool*: ++ +* Provider: OpenAI +* Model: All enabled OpenAI models +* Transform: None (already OpenAI format) + +. Click *Save* + +Cursor requests route to the appropriate provider based on the model prefix. + +==== Default routing with fallback + +Configure a catch-all route for requests without vendor prefixes: + +[source,cel] +---- +true # Matches all requests not matched by previous routes +---- + +Add a primary provider (for example, OpenAI) with fallback to Anthropic: + +* Primary: OpenAI (for requests with no prefix) +* Fallback: Anthropic (if OpenAI is unavailable) +* Failover conditions: Rate limits, timeouts, 5xx errors + +=== Apply rate limits + +Prevent runaway usage from Cursor clients: + +. Navigate to the gateway's *LLM* tab +. Under *Rate Limit*, configure: ++ +[cols="1,2"] +|=== +|Setting |Recommended Value + +|Global rate limit +|150 requests per minute + +|Per-user rate limit +|15 requests per minute (if using user identification workarounds) +|=== + +. Click *Save* + +The gateway blocks requests exceeding these limits and returns HTTP 429 errors. + +==== Rate limit considerations for code completion + +Cursor's code completion feature generates frequent requests. Consider separate rate limits for completion vs chat: + +* Completion models (for example, `openai/gpt-4o-mini`): Higher rate limits +* Chat models (for example, `anthropic/claude-sonnet-4-5`): Standard rate limits + +Configure routing rules that apply different rate limits based on model selection. + +=== Set spending limits + +Control LLM costs across all providers: + +. Under *Spend Limit*, configure: ++ +[cols="1,2"] +|=== +|Setting |Value + +|Monthly budget +|$7,000 (adjust based on expected usage) + +|Enforcement +|Block requests after budget exceeded + +|Alert threshold +|80% of budget (sends notification) +|=== + +. Click *Save* + +The gateway tracks estimated costs per request across all providers and blocks traffic when the monthly budget is exhausted. + +== Configure MCP tool aggregation + +Enable Cursor to discover and use tools from multiple MCP servers through a single endpoint. Note that Cursor has a 40-tool limit, so carefully select which MCP servers to aggregate. + +=== Add MCP servers + +. Navigate to the gateway's *MCP* tab +. Click *Add MCP Server* +. Enter server details: ++ +[cols="1,2"] +|=== +|Field |Value + +|Display name +|Descriptive name (for example, `redpanda-data-tools`, `code-search-tools`) + +|Endpoint URL +|MCP server endpoint (for example, xref:ai-agents:mcp/remote/overview.adoc[Remote MCP server] URL) + +|Authentication +|Bearer token or other authentication mechanism +|=== + +. Click *Save* + +Repeat for each MCP server you want to aggregate, keeping in mind the 40-tool limit. + +=== Work within the 40-tool limit + +Cursor imposes a 40-tool limit on MCP integrations. To stay within this limit: + +* Aggregate only essential MCP servers +* Use deferred tool loading (see next section) +* Prioritize high-value tools over comprehensive tool sets +* Consider creating multiple gateways with different tool sets for different use cases + +Monitor the total tool count across all aggregated MCP servers: + +. Navigate to the gateway's *MCP* tab +. Review the *Total Tools* count displayed at the top +. If the count exceeds 40, remove low-priority MCP servers + +=== Enable deferred tool loading + +Reduce the effective tool count by deferring tool discovery: + +. Under *MCP Settings*, enable *Deferred tool loading* +. Click *Save* + +When enabled: + +* Cursor initially receives only a search tool and orchestrator tool (2 tools total) +* Cursor queries for specific tools by name when needed +* The underlying MCP servers can provide more than 40 tools, but only the search and orchestrator tools count against the limit +* Token usage decreases by 80-90% for configurations with many tools + +Deferred tool loading is the recommended approach for Cursor deployments with multiple MCP servers. + +=== Add the MCP orchestrator + +The MCP orchestrator reduces multi-step workflows to single calls: + +. Under *MCP Settings*, enable *MCP Orchestrator* +. Configure: ++ +[cols="1,2"] +|=== +|Setting |Value + +|Orchestrator model +|Select a model with strong code generation capabilities (for example, `anthropic/claude-sonnet-4-5`) + +|Execution timeout +|30 seconds + +|Backend +|Select the Anthropic backend (orchestrator works best with Claude models) +|=== + +. Click *Save* + +Cursor can now invoke the orchestrator tool to execute complex, multi-step operations in a single request. + +== Configure authentication + +Cursor clients authenticate using bearer tokens in the `Authorization` header. + +=== Generate API tokens + +. Navigate to *Security* > *API Tokens* in the Redpanda Cloud console +. Click *Create Token* +. Enter token details: ++ +[cols="1,2"] +|=== +|Field |Value + +|Name +|`cursor-access` + +|Scopes +|`ai-gateway:read`, `ai-gateway:write` + +|Expiration +|Set appropriate expiration based on security policies +|=== + +. Click *Create* +. Copy the token (it appears only once) + +Distribute this token to Cursor users through secure channels. + +=== Token rotation + +Implement token rotation for security: + +. Create a new token before the existing token expires +. Distribute the new token to users +. Monitor usage of the old token in (observability dashboard) +. Revoke the old token after all users have migrated + +== Multi-tenant deployment strategies + +Cursor has limited support for custom headers, making traditional `rp-aigw-id` header-based multi-tenancy challenging. Use one of these alternative strategies. + +=== Strategy 1: Tenant-specific subdomains + +Configure different subdomains for each tenant or team: + +. Set up DNS records pointing to your AI Gateway cluster: ++ +* `team-alpha.aigateway.example.com` → Gateway ID: `alpha-cursor-gateway` +* `team-beta.aigateway.example.com` → Gateway ID: `beta-cursor-gateway` + +. Configure the gateway to extract tenant identity from the `Host` header: ++ +[source,cel] +---- +request.headers["host"][0].startsWith("team-alpha") +---- + +. Distribute tenant-specific URLs to each team +. Each team configures Cursor with their specific subdomain + +This approach works with standard Cursor configuration without requiring custom headers. + +**Configuration example for Team Alpha:** + +[source,json] +---- +{ + "apiProvider": "openai", + "apiBaseUrl": "https://team-alpha.aigateway.example.com/ai-gateway/v1", + "apiKey": "TEAM_ALPHA_TOKEN" +} +---- + +=== Strategy 2: Path-based routing + +Use URL path prefixes to identify tenants: + +. Configure gateway routing to extract tenant from the request path: ++ +[source,cel] +---- +request.path.startsWith("/ai-gateway/alpha/") +---- + +. Create routing rules that map path prefixes to specific gateways or policies +. Distribute tenant-specific base URLs + +**Configuration example for Team Alpha:** + +[source,json] +---- +{ + "apiProvider": "openai", + "apiBaseUrl": "https://{CLUSTER_ID}.cloud.redpanda.com/ai-gateway/alpha/v1", + "apiKey": "TEAM_ALPHA_TOKEN" +} +---- + +This approach requires gateway-level path rewriting to remove the tenant prefix before forwarding to LLM providers. + +=== Strategy 3: Query parameter routing + +Embed tenant identity in query parameters: + +. Configure Cursor to append query parameters to the base URL: ++ +[source,json] +---- +{ + "apiProvider": "openai", + "apiBaseUrl": "https://{CLUSTER_ID}.cloud.redpanda.com/ai-gateway/v1?tenant=alpha", + "apiKey": "TEAM_ALPHA_TOKEN" +} +---- + +. Configure gateway routing to extract tenant from query parameters: ++ +[source,cel] +---- +request.url.query["tenant"][0] == "alpha" +---- + +. Create routing rules and rate limits based on the tenant parameter + +This approach works with standard Cursor configuration but exposes tenant identity in URLs. + +=== Strategy 4: API token-based routing + +Use different API tokens to identify tenants: + +. Generate separate API tokens for each tenant +. Tag tokens with metadata indicating the tenant +. Configure gateway routing based on token identity: ++ +[source,cel] +---- +request.auth.metadata["tenant"] == "alpha" +---- + +. Apply tenant-specific routing, rate limits, and spending limits + +This approach is most transparent to users but requires gateway support for token metadata inspection. + +=== Choosing a multi-tenant strategy + +[cols="1,2,2,1"] +|=== +|Strategy |Pros |Cons |Best For + +|Subdomains +|Clean, standards-based, no URL modifications +|Requires DNS configuration, certificate management +|Organizations with infrastructure control + +|Path-based +|No DNS required, flexible routing +|Requires path rewriting, tenant exposed in logs +|Simpler deployments, testing environments + +|Query parameters +|No infrastructure changes +|Tenant exposed in URLs and logs, less clean +|Quick deployments, temporary solutions + +|Token-based +|Transparent to users, centralized control +|Requires advanced gateway features +|Large deployments, strong security requirements +|=== + +== Configure Cursor IDE clients + +Provide these instructions to users configuring Cursor IDE. + +=== Configuration file location + +Cursor uses a JSON configuration file: + +* macOS: `~/.cursor/config.json` +* Linux: `~/.cursor/config.json` +* Windows: `%USERPROFILE%\.cursor\config.json` + +=== Basic configuration + +Users configure Cursor with the AI Gateway endpoint: + +[source,json] +---- +{ + "apiProvider": "openai", + "apiBaseUrl": "https://{CLUSTER_ID}.cloud.redpanda.com/ai-gateway/v1", + "apiKey": "YOUR_API_TOKEN", + "models": { + "chat": "anthropic/claude-sonnet-4-5", + "completion": "openai/gpt-4o-mini" + } +} +---- + +Replace: + +* `{CLUSTER_ID}`: Your Redpanda cluster ID +* `YOUR_API_TOKEN`: The API token generated earlier + +If using a multi-tenant strategy, adjust the `apiBaseUrl` according to your chosen approach (subdomain, path prefix, or query parameter). + +=== Model selection + +Configure different models for different Cursor modes: + +[cols="1,2,1"] +|=== +|Mode |Recommended Model |Reason + +|Chat +|`anthropic/claude-sonnet-4-5` or `openai/gpt-4o` +|High quality for complex questions + +|Code completion +|`openai/gpt-4o-mini` +|Fast, cost-effective for frequent requests + +|Inline edit +|`anthropic/claude-sonnet-4-5` +|Balanced quality and speed for code modifications +|=== + +=== MCP server configuration + +Configure Cursor to connect to the aggregated MCP endpoint: + +[source,json] +---- +{ + "experimental": { + "mcpServers": { + "redpanda-ai-gateway": { + "transport": "http", + "url": "https://{CLUSTER_ID}.cloud.redpanda.com/ai-gateway/mcp", + "headers": { + "Authorization": "Bearer YOUR_API_TOKEN" + } + } + } + } +} +---- + +If using a multi-tenant strategy, ensure the MCP URL matches the tenant configuration. + +This configuration: + +* Connects Cursor to the aggregated MCP endpoint +* Routes LLM requests through the AI Gateway with OpenAI-compatible transforms +* Includes authentication headers + +== Monitor Cursor usage + +Track Cursor activity through gateway observability features. + +=== View request logs + +. Navigate to *AI Gateway* > *Observability* > *Logs* +. Filter by gateway ID: `cursor-gateway` +. Review: ++ +* Request timestamps and duration +* Model used per request (with vendor prefix) +* Token usage (prompt and completion tokens) +* Estimated cost per request +* HTTP status codes and errors +* Transform operations (OpenAI to provider-native format) + +Cursor generates different request patterns: + +* Code completion: Many short requests with low token counts +* Chat: Longer requests with context and multi-turn conversations +* Inline edit: Medium-length requests with code context + +=== Analyze metrics + +. Navigate to *AI Gateway* > *Observability* > *Metrics* +. Select the Cursor gateway +. Review: ++ +[cols="1,2"] +|=== +|Metric |Purpose + +|Request volume by provider +|Identify which providers are most used via model prefix routing + +|Token usage by model +|Track consumption patterns (completion vs chat) + +|Estimated spend by provider +|Monitor costs across providers with transforms + +|Latency (p50, p95, p99) +|Detect transform overhead and provider-specific performance issues + +|Error rate by provider +|Identify failing providers or transform issues + +|Transform success rate +|Monitor OpenAI-to-provider format conversion success +|=== + + +=== Query logs via API + +Programmatically access logs for integration with monitoring systems: + +[source,bash] +---- +curl https://{CLUSTER_ID}.cloud.redpanda.com/api/ai-gateway/logs \ + -H "Authorization: Bearer YOUR_API_TOKEN" \ + -H "Content-Type: application/json" \ + -d '{ + "gateway_id": "GATEWAY_ID", + "start_time": "2026-01-01T00:00:00Z", + "end_time": "2026-01-14T23:59:59Z", + "limit": 100 + }' +---- + +== Security considerations + +Apply these security best practices for Cursor deployments. + +=== Limit token scope + +Create tokens with minimal required scopes: + +* `ai-gateway:read`: Required for MCP tool discovery +* `ai-gateway:write`: Required for LLM requests and tool execution + +Avoid granting broader scopes like `admin` or `cluster:write`. + +=== Implement network restrictions + +If Cursor clients connect from known networks, configure network policies: + +. Use cloud provider security groups to restrict access to AI Gateway endpoints +. Allowlist only the IP ranges where Cursor clients operate +. Monitor for unauthorized access attempts in request logs + +=== Enforce token expiration + +Set short token lifetimes for high-security environments: + +* Development environments: 90 days +* Production environments: 30 days + +Automate token rotation to reduce manual overhead. + +=== Audit tool access + +Review which MCP tools Cursor clients can access: + +. Periodically audit the MCP servers configured in the gateway +. Remove unused or deprecated MCP servers +. Monitor tool execution logs for unexpected behavior +. Ensure total tool count stays within Cursor's 40-tool limit + +=== Protect API keys in configuration + +Cursor stores the API token in plain text in `config.json`. Remind users to: + +* Never commit `config.json` to version control +* Use file system permissions to restrict access (for example, `chmod 600 ~/.cursor/config.json` on Unix-like systems) +* Rotate tokens if they suspect compromise +* Consider using environment variables for API keys (if Cursor supports this) + +=== Monitor transform operations + +Because Cursor requires OpenAI-compatible transforms for non-OpenAI providers: + +. Review transform success rates in metrics +. Monitor for transform failures that may leak request details +. Test transforms thoroughly before production deployment +. Keep transform logic updated as provider APIs evolve + +== Troubleshooting + +Common issues and solutions when configuring AI Gateway for Cursor. + +=== Cursor cannot connect to gateway + +Symptom: Connection errors when Cursor tries to discover tools or send LLM requests. + +Causes and solutions: + +* **Invalid base URL**: Verify `apiBaseUrl` matches the gateway endpoint (including multi-tenant prefix if applicable) +* **Expired token**: Generate a new API token and update the Cursor configuration +* **Network connectivity**: Verify the cluster endpoint is accessible from the client network +* **Provider not enabled**: Ensure at least one provider is enabled and has models in the catalog +* **Missing gateway ID**: If using header-based routing, verify the `rp-aigw-id` header is configured (or use alternative multi-tenant strategy) + +=== Model not found errors + +Symptom: Cursor shows "model not found" or similar errors. + +Causes and solutions: + +* **Model not enabled in catalog**: Enable the model in the gateway's model catalog +* **Incorrect model prefix**: Use the correct vendor prefix (for example, `anthropic/claude-sonnet-4-5` not just `claude-sonnet-4-5`) +* **Transform not configured**: Verify OpenAI-compatible transform is enabled for non-OpenAI providers +* **Routing rule mismatch**: Check that routing rules correctly match the model prefix + +=== Transform errors or unexpected responses + +Symptom: Responses are malformed or Cursor reports format errors. + +Causes and solutions: + +* **Transform disabled**: Ensure OpenAI-compatible transform is enabled for Anthropic and other non-OpenAI providers +* **Transform version mismatch**: Verify the transform is compatible with the current provider API version +* **Model-specific transform issues**: Some models may require specific transform configurations +* **Check transform logs**: Review logs for transform errors and stack traces + +=== Tools not appearing in Cursor + +Symptom: Cursor does not discover MCP tools. + +Causes and solutions: + +* **MCP configuration missing**: Ensure `experimental.mcpServers` is configured in Cursor settings +* **MCP servers not configured in gateway**: Add MCP server endpoints in the gateway's MCP tab +* **Exceeds 40-tool limit**: Reduce the number of aggregated tools or enable deferred tool loading +* **Deferred loading enabled but search failing**: Check that the search tool is correctly configured +* **MCP server authentication failing**: Verify MCP server authentication credentials in the gateway configuration + +=== High costs or token usage + +Symptom: Token usage and costs exceed expectations. + +Causes and solutions: + +* **Code completion using expensive model**: Configure completion mode to use `openai/gpt-4o-mini` instead of larger models +* **Deferred tool loading disabled**: Enable deferred tool loading to reduce tokens by 80-90% +* **No rate limits**: Apply per-minute rate limits to prevent runaway usage +* **Missing spending limits**: Set monthly budget limits with blocking enforcement +* **Chat using wrong model**: Route chat requests to cost-effective models (for example, `anthropic/claude-sonnet-4-5` instead of `anthropic/claude-opus-4-5`) +* **Transform overhead**: Monitor if transforms add significant token overhead + +=== Requests failing with 429 errors + +Symptom: Cursor receives HTTP 429 Too Many Requests errors. + +Causes and solutions: + +* **Rate limit exceeded**: Review and increase rate limits if usage is legitimate (code completion needs higher limits) +* **Upstream provider rate limits**: Check if the upstream LLM provider is rate-limiting; configure failover to alternate providers +* **Budget exhausted**: Verify monthly spending limit has not been reached +* **Per-user limits too restrictive**: Adjust per-user rate limits if using multi-tenant strategies + +=== Multi-tenant routing failures + +Symptom: Requests route to wrong gateway or fail authorization. + +Causes and solutions: + +* **Subdomain not configured**: Verify DNS records and SSL certificates for tenant-specific subdomains +* **Path prefix mismatch**: Check that path-based routing rules correctly extract tenant identity +* **Query parameter missing**: Ensure query parameter is appended to all requests +* **Token metadata incorrect**: Verify token is tagged with correct tenant metadata +* **Routing rule conflicts**: Check for overlapping routing rules that may cause unexpected routing + +== Next steps + +* xref:ai-agents:ai-gateway/cel-routing-cookbook.adoc[]: Implement advanced routing rules for model prefix routing +* xref:ai-agents:mcp/remote/overview.adoc[]: Deploy Remote MCP servers for custom tools diff --git a/modules/ai-agents/pages/ai-gateway/integrations/cursor-user.adoc b/modules/ai-agents/pages/ai-gateway/integrations/cursor-user.adoc new file mode 100644 index 000000000..cd7dda2b1 --- /dev/null +++ b/modules/ai-agents/pages/ai-gateway/integrations/cursor-user.adoc @@ -0,0 +1,880 @@ += Configure Cursor IDE with AI Gateway +:description: Configure Cursor IDE to use Redpanda AI Gateway for unified LLM access, MCP tool integration, and AI-assisted coding. +:page-topic-type: how-to +:personas: ai_agent_developer, app_developer +:learning-objective-1: Configure Cursor IDE to route LLM requests through AI Gateway +:learning-objective-2: Set up MCP server integration for tool access through the gateway +:learning-objective-3: Optimize Cursor settings for multi-tenancy and cost control + +include::ai-agents:partial$ai-gateway-byoc-note.adoc[] + +After xref:ai-agents:ai-gateway/gateway-quickstart.adoc[configuring your AI Gateway], set up Cursor IDE to route LLM requests and access MCP tools through the gateway's unified endpoints. + +After reading this page, you will be able to: + +* [ ] Configure Cursor IDE to route LLM requests through AI Gateway. +* [ ] Set up MCP server integration for tool access through the gateway. +* [ ] Optimize Cursor settings for multi-tenancy and cost control. + +== Prerequisites + +Before configuring Cursor IDE, ensure you have: + +* Cursor IDE installed (download from https://cursor.sh[cursor.sh^]) +* An active Redpanda AI Gateway with: +** At least one LLM provider enabled (see xref:ai-agents:ai-gateway/gateway-quickstart.adoc#step-1-enable-a-provider[Enable a provider]) +** A gateway created and configured (see xref:ai-agents:ai-gateway/gateway-quickstart.adoc#step-3-create-a-gateway[Create a gateway]) +* Your AI Gateway credentials: +** Gateway endpoint URL (for example, `https://gw.ai.panda.com`) +** Gateway ID (for example, `gateway-abc123`) +** API key with access to the gateway + +== About Cursor IDE + +Cursor IDE is an AI-powered code editor built on VS Code that provides: + +* Chat interface for code questions and generation +* AI-powered autocomplete with context awareness +* Codebase indexing for semantic search +* Inline code editing with AI assistance +* Terminal integration for command suggestions +* Native integration with multiple LLM providers + +By routing Cursor through AI Gateway, you gain centralized observability, cost controls, provider flexibility, and the ability to aggregate multiple MCP servers into a single interface. + +== Configuration methods + +Cursor IDE supports two configuration approaches for connecting to AI Gateway: + +[cols="1,2,2"] +|=== +|Method |Best for |Trade-offs + +|Settings UI +|Visual configuration, quick setup +|Limited to single provider configuration + +|Configuration file +|Multiple providers, environment-specific settings, version control +|Manual file editing required +|=== + +Choose the method that matches your workflow. The Settings UI is faster for getting started, while the configuration file provides more flexibility for production use. + +== Configure using Settings UI + +The Settings UI provides a visual interface for configuring Cursor's AI providers. + +=== Configure AI provider + +. Open Cursor Settings: +** macOS: *Cursor* > *Settings* or `Cmd+,` +** Windows/Linux: *File* > *Preferences* > *Settings* or `Ctrl+,` +. Navigate to *Features* > *AI* +. Under *OpenAI API*, configure the base URL and API key: + +[source,text] +---- +Override OpenAI Base URL: https://gw.ai.panda.com/v1 +Override OpenAI API Key: YOUR_REDPANDA_API_KEY +---- + +[start=4] +. Scroll to *Advanced Settings* and add custom headers: + +[source,json] +---- +{ + "openai.additionalHeaders": { + "rp-aigw-id": "GATEWAY_ID" + } +} +---- + +Replace placeholder values: + +* `YOUR_REDPANDA_API_KEY` - Your Redpanda API key +* `GATEWAY_ID` - Your gateway ID from the AI Gateway UI + +=== Select models + +In the AI settings, configure which models to use: + +. Under *Model Selection*, choose your preferred model from the dropdown +. Cursor will automatically use the gateway endpoint configured above +. Models available depend on what you've enabled in your AI Gateway + +Model selection options: + +* `gpt-4o` - Routes to OpenAI GPT-4o through your gateway +* `gpt-4o-mini` - Routes to OpenAI GPT-4o-mini (cost-effective) +* `claude-sonnet-4-5` - Routes to Anthropic Claude Sonnet (if enabled in gateway) +* `claude-opus-4-5` - Routes to Anthropic Claude Opus (if enabled in gateway) + +Note: When routing through AI Gateway, Cursor uses the OpenAI SDK format. The gateway automatically translates requests to the appropriate provider based on the model name. + +== Configure using configuration file + +For more control over provider settings, multi-environment configurations, or version control, edit Cursor's configuration file directly. + +=== Locate configuration file + +Cursor stores configuration in `settings.json`: + +* macOS: `~/Library/Application Support/Cursor/User/settings.json` +* Windows: `%APPDATA%\Cursor\User\settings.json` +* Linux: `~/.config/Cursor/User/settings.json` + +Create the directory structure if it doesn't exist: + +[,bash] +---- +# macOS +mkdir -p ~/Library/Application\ Support/Cursor/User + +# Linux +mkdir -p ~/.config/Cursor/User +---- + +=== Basic configuration + +Create or edit `settings.json` with the following structure: + +[,json] +---- +{ + "cursor.overrideOpenAIBaseUrl": "https://gw.ai.panda.com/v1", + "cursor.overrideOpenAIApiKey": "YOUR_REDPANDA_API_KEY", + "openai.additionalHeaders": { + "rp-aigw-id": "GATEWAY_ID" + }, + "cursor.cpp.defaultModel": "gpt-4o", + "cursor.chat.defaultModel": "gpt-4o" +} +---- + +Replace placeholder values: + +* `YOUR_REDPANDA_API_KEY` - Your Redpanda API key +* `GATEWAY_ID` - Your gateway ID + +Configuration fields: + +* `cursor.overrideOpenAIBaseUrl` - Gateway endpoint (always ends with `/v1` for OpenAI compatibility) +* `cursor.overrideOpenAIApiKey` - Your Redpanda API key (used for authentication) +* `openai.additionalHeaders` - Custom headers sent with every request +* `cursor.cpp.defaultModel` - Model for autocomplete (c++ refers to copilot++) +* `cursor.chat.defaultModel` - Model for chat interactions + +=== Multiple environment configuration + +To switch between development and production gateways, use workspace-specific settings. + +Create `.vscode/settings.json` in your project root: + +[,json] +---- +{ + "cursor.overrideOpenAIBaseUrl": "https://gw.staging.ai.panda.com/v1", + "openai.additionalHeaders": { + "rp-aigw-id": "staging-gateway-123", + "x-environment": "staging" + } +} +---- + +Workspace settings override global settings. Use this to: + +* Route different projects through different gateways +* Use cost-effective models for internal projects +* Use premium models for customer-facing projects +* Add project-specific tracking headers + +=== Configuration with environment variables + +For sensitive credentials, avoid hardcoding values in `settings.json`. + +IMPORTANT: VS Code `settings.json` does not support `${VAR}` interpolation - such placeholders will be treated as literal strings. To use environment variables, generate the settings file dynamically with a script. + +==== Option 1: Generate settings.json with a script + +Create a setup script that reads environment variables and writes the actual values to `settings.json`: + +[,bash] +---- +#!/bin/bash +# setup-cursor-config.sh + +# Set your credentials +export REDPANDA_GATEWAY_URL="https://gw.ai.panda.com" +export REDPANDA_GATEWAY_ID="gateway-abc123" +export REDPANDA_API_KEY="your-api-key" + +# Generate settings.json +cat > ~/.cursor/settings.json <>. + +=== Test inline code completion + +. Open a code file in Cursor +. Start typing a function definition +. Wait for inline suggestions to appear + +Autocomplete requests appear in the gateway dashboard with: + +* Lower token counts than chat requests +* Higher request frequency +* The autocomplete model you configured + +=== Test MCP tool integration + +If you configured MCP servers: + +. Open Cursor chat (`Cmd+L` or `Ctrl+L`) +. Ask a question that requires a tool: "What's the current date?" +. Cursor should: +** Discover available tools from the gateway +** Invoke the appropriate tool +** Return the result + +Check the gateway dashboard for MCP tool invocation logs. + +== Advanced configuration + +=== Custom request tracking headers + +Add custom headers for request tracking, user attribution, or routing policies: + +[,json] +---- +{ + "openai.additionalHeaders": { + "rp-aigw-id": "GATEWAY_ID", + "x-user-id": "developer-123", + "x-team": "backend", + "x-project": "api-service" + } +} +---- + +Use these headers with gateway CEL routing to: + +* Track costs per developer or team +* Route based on project type +* Apply different rate limits per user +* Generate team-specific usage reports + +=== Model-specific settings + +Configure different settings for chat vs autocomplete: + +[,json] +---- +{ + "cursor.chat.defaultModel": "claude-sonnet-4-5", + "cursor.cpp.defaultModel": "gpt-4o-mini", + "cursor.chat.temperature": 0.7, + "cursor.cpp.temperature": 0.2, + "cursor.chat.maxTokens": 4096, + "cursor.cpp.maxTokens": 512 +} +---- + +Settings explained: + +* Chat uses Claude Sonnet for reasoning depth +* Autocomplete uses GPT-4o-mini for speed and cost efficiency +* Chat temperature (0.7) allows creative responses +* Autocomplete temperature (0.2) produces deterministic code +* Chat allows longer responses (4096 tokens) +* Autocomplete limits responses (512 tokens) for speed + +=== Multi-tenancy with team-specific gateways + +For organizations with multiple teams sharing Cursor but requiring separate cost tracking and policies: + +[,json] +---- +{ + "cursor.overrideOpenAIBaseUrl": "https://gw.ai.panda.com/v1", + "cursor.overrideOpenAIApiKey": "${TEAM_API_KEY}", + "openai.additionalHeaders": { + "rp-aigw-id": "${TEAM_GATEWAY_ID}", + "x-team": "${TEAM_NAME}" + } +} +---- + +Each team configures their own: + +* `TEAM_API_KEY` - Team-specific API key +* `TEAM_GATEWAY_ID` - Gateway with team budget and rate limits +* `TEAM_NAME` - Identifier for usage reports + +This approach enables: + +* Per-team cost attribution +* Separate budgets and rate limits +* Team-specific model access policies +* Independent observability dashboards + +=== Request timeout configuration + +Configure timeout for LLM and MCP requests: + +[,json] +---- +{ + "cursor.requestTimeout": 30000, + "cursor.mcp.requestTimeout": 15000 +} +---- + +Timeout values are in milliseconds. Defaults: + +* LLM requests: 30000ms (30 seconds) +* MCP requests: 15000ms (15 seconds) + +Increase timeouts for: + +* Long-running MCP tools (database queries, web searches) +* High-latency network environments +* Complex reasoning tasks requiring extended processing + +=== Debug mode + +Enable debug logging to troubleshoot connection issues: + +[,json] +---- +{ + "cursor.debug": true, + "cursor.logLevel": "debug" +} +---- + +Debug mode shows: + +* HTTP request and response headers +* Model selection decisions +* Token usage calculations +* Error details with stack traces + +View debug logs: + +. Open Command Palette (`Cmd+Shift+P` or `Ctrl+Shift+P`) +. Type "Developer: Show Logs" +. Select "Extension Host" +. Filter by "cursor" + +[[troubleshooting]] +== Troubleshooting + +=== Cursor shows connection error + +**Symptom**: Cursor displays "Failed to connect to AI provider" or requests return errors. + +**Causes and solutions**: + +. **Incorrect base URL format** ++ +Verify the URL includes `/v1` at the end: ++ +[,text] +---- +# Correct +"cursor.overrideOpenAIBaseUrl": "https://gw.ai.panda.com/v1" + +# Incorrect +"cursor.overrideOpenAIBaseUrl": "https://gw.ai.panda.com" +---- + +. **Authentication failure** ++ +Verify your API key is valid: ++ +[,bash] +---- +curl -H "Authorization: Bearer YOUR_API_KEY" \ + -H "rp-aigw-id: GATEWAY_ID" \ + https://gw.ai.panda.com/v1/models +---- ++ +You should receive a list of available models. If you get `401 Unauthorized`, regenerate your API key in the Redpanda Cloud Console. + +. **Gateway ID mismatch** ++ +Check that the `rp-aigw-id` header matches your gateway ID exactly (case-sensitive). Copy it directly from the AI Gateway UI. + +. **Missing headers** ++ +Ensure `openai.additionalHeaders` is configured in settings: ++ +[,json] +---- +{ + "openai.additionalHeaders": { + "rp-aigw-id": "GATEWAY_ID" + } +} +---- + +. **Invalid JSON syntax** ++ +Validate your `settings.json` file: ++ +[,bash] +---- +# macOS/Linux +python3 -m json.tool ~/Library/Application\ Support/Cursor/User/settings.json + +# Or use jq +jq . ~/Library/Application\ Support/Cursor/User/settings.json +---- ++ +Fix any syntax errors reported. + +=== Autocomplete not working + +**Symptom**: Inline autocomplete suggestions don't appear or are very slow. + +**Causes and solutions**: + +. **No autocomplete model configured** ++ +Verify `cursor.cpp.defaultModel` is set in `settings.json`: ++ +[,json] +---- +{ + "cursor.cpp.defaultModel": "gpt-4o-mini" +} +---- + +. **Model too slow** ++ +Use a faster, cost-effective model for autocomplete: ++ +[,json] +---- +{ + "cursor.cpp.defaultModel": "gpt-4o-mini", + "cursor.cpp.maxTokens": 256 +} +---- ++ +Smaller models like GPT-4o-mini or Claude Haiku provide faster responses ideal for autocomplete. + +. **Network latency** ++ +Check gateway latency in the observability dashboard. If p95 latency is over 500ms, autocomplete will feel slow. Consider: ++ +* Using a gateway in a closer geographic region +* Switching to a faster model +* Reducing `cursor.cpp.maxTokens` to 256 or lower + +. **Autocomplete disabled in settings** ++ +Verify autocomplete is enabled: ++ +. Open Settings (`Cmd+,` or `Ctrl+,`) +. Search for "cursor autocomplete" +. Ensure "Enable Autocomplete" is checked + +=== MCP tools not appearing + +**Symptom**: Cursor doesn't show tools from MCP servers, or shows error "Too many tools". + +**Causes and solutions**: + +. **40-tool limit exceeded** ++ +Cursor has a hard limit of 40 MCP tools. If your MCP servers expose more than 40 tools combined, enable deferred tool loading in your AI Gateway configuration. ++ +With deferred loading, only 2 tools (search + orchestrator) are sent to Cursor initially, staying well under the limit. + +. **MCP configuration missing** ++ +Verify the `cursor.mcp.servers` section exists in `settings.json`: ++ +[,json] +---- +{ + "cursor.mcp": { + "servers": { + "redpanda-ai-gateway": { + "command": "node", + "args": [/* ... */] + } + } + } +} +---- + +. **No MCP servers in gateway** ++ +Verify your gateway has at least one MCP server configured in the AI Gateway UI. + +. **MCP endpoint unreachable** ++ +Test connectivity to the MCP endpoint: ++ +[,bash] +---- +curl -H "Authorization: Bearer YOUR_API_KEY" \ + -H "rp-aigw-id: GATEWAY_ID" \ + https://gw.ai.panda.com/mcp +---- ++ +You should receive a valid MCP protocol response. + +. **Cursor restart needed** ++ +MCP configuration changes require restarting Cursor: ++ +. Close all Cursor windows +. Relaunch Cursor +. Wait for MCP servers to initialize (may take 5-10 seconds) + +=== Requests not appearing in gateway dashboard + +**Symptom**: Cursor works, but requests don't appear in the AI Gateway observability dashboard. + +**Causes and solutions**: + +. **Wrong gateway ID** ++ +Verify that the `rp-aigw-id` header in your configuration matches the gateway you're viewing in the dashboard. + +. **Missing header** ++ +Ensure the `rp-aigw-id` header is in the `openai.additionalHeaders` section: ++ +[,json] +---- +{ + "openai.additionalHeaders": { + "rp-aigw-id": "GATEWAY_ID" + } +} +---- + +. **Using direct provider connection** ++ +If `cursor.overrideOpenAIBaseUrl` points directly to a provider (for example, `https://api.openai.com`), requests won't route through the gateway. Verify it points to your gateway endpoint. + +. **Log ingestion delay** ++ +Gateway logs can take 5-10 seconds to appear in the dashboard. Wait briefly and refresh. + +. **Workspace settings override** ++ +Check if `.vscode/settings.json` in your project root overrides global settings with different gateway configuration. + +=== High latency after gateway integration + +**Symptom**: Requests are slower after routing through the gateway. + +**Causes and solutions**: + +. **Gateway geographic distance** ++ +If your gateway is in a different region than you or the upstream provider, this adds network latency. Check gateway region in the Redpanda Cloud Console. + +. **Provider pool failover** ++ +If your gateway is configured with fallback providers, check the logs to see if requests are failing over. Failover adds latency. + +. **Model mismatch** ++ +Verify you're using fast models for autocomplete: ++ +[,json] +---- +{ + "cursor.cpp.defaultModel": "gpt-4o-mini" // Fast model +} +---- + +. **MCP tool aggregation overhead** ++ +Aggregating tools from multiple MCP servers adds processing time. Use deferred tool loading to reduce this overhead (see xref:ai-agents:ai-gateway/mcp-aggregation-guide.adoc[]). + +=== Configuration changes not taking effect + +**Symptom**: Changes to `settings.json` don't apply. + +**Solutions**: + +. **Restart Cursor** ++ +Configuration changes require restarting Cursor: ++ +. Close all Cursor windows +. Relaunch Cursor + +. **Invalid JSON syntax** ++ +Validate JSON syntax: ++ +[,bash] +---- +python3 -m json.tool ~/Library/Application\ Support/Cursor/User/settings.json +---- + +. **Workspace settings overriding** ++ +Check if `.vscode/settings.json` in your project root overrides global settings. + +. **File permissions** ++ +Verify Cursor can read the configuration file: ++ +[,bash] +---- +# macOS +ls -la ~/Library/Application\ Support/Cursor/User/settings.json + +# Linux +ls -la ~/.config/Cursor/User/settings.json +---- ++ +Fix permissions if needed: ++ +[,bash] +---- +chmod 600 ~/Library/Application\ Support/Cursor/User/settings.json +---- + +== Cost optimization tips + +=== Use different models for chat and autocomplete + +Chat interactions benefit from reasoning depth, while autocomplete needs speed: + +[,json] +---- +{ + "cursor.chat.defaultModel": "claude-sonnet-4-5", + "cursor.cpp.defaultModel": "gpt-4o-mini" +} +---- + +This can reduce costs by 5-10x for autocomplete while maintaining quality for chat. + +=== Limit token usage + +Reduce the maximum tokens for autocomplete to prevent runaway costs: + +[,json] +---- +{ + "cursor.cpp.maxTokens": 256, + "cursor.chat.maxTokens": 2048 +} +---- + +Autocomplete rarely needs more than 256 tokens, while chat responses can vary. + +=== Use MCP tools for documentation + +Instead of pasting large documentation into chat, create MCP tools that fetch relevant sections on-demand. This reduces token costs by including only needed information. + +=== Monitor usage patterns + +Use the AI Gateway dashboard to identify optimization opportunities: + +. Navigate to your gateway's observability dashboard +. Filter by Cursor requests (use custom header if configured) +. Analyze: +** Token usage per request type (chat vs autocomplete) +** Most expensive queries +** High-frequency low-value requests + +=== Team-based cost attribution + +Use custom headers to track costs per developer or team: + +[,json] +---- +{ + "openai.additionalHeaders": { + "rp-aigw-id": "GATEWAY_ID", + "x-user-id": "${USER_EMAIL}", + "x-team": "backend" + } +} +---- + +Generate team-specific cost reports from the gateway dashboard. + +=== Enable deferred MCP tool loading + +Configure deferred tool loading to reduce token costs by 80-90%: + +. Navigate to your gateway configuration +. Enable *Deferred Tool Loading* under MCP Settings +. Save configuration + +This sends only search + orchestrator tools initially, reducing token usage significantly. + +== Next steps + +* xref:ai-agents:ai-gateway/mcp-aggregation-guide.adoc[]: Configure deferred tool loading to work within Cursor's 40-tool limit +* xref:ai-agents:ai-gateway/cel-routing-cookbook.adoc[]: Use CEL expressions to route Cursor requests based on context + +== Related pages + +* xref:ai-agents:ai-gateway/gateway-quickstart.adoc[]: Create and configure your AI Gateway +* xref:ai-agents:ai-gateway/gateway-architecture.adoc[]: Learn about AI Gateway architecture and benefits +* xref:ai-agents:ai-gateway/integrations/claude-code-user.adoc[]: Configure Claude Code with AI Gateway +* xref:ai-agents:ai-gateway/integrations/continue-user.adoc[]: Configure Continue.dev with AI Gateway +* xref:ai-agents:ai-gateway/integrations/cline-user.adoc[]: Configure Cline with AI Gateway diff --git a/modules/ai-agents/pages/ai-gateway/integrations/github-copilot-admin.adoc b/modules/ai-agents/pages/ai-gateway/integrations/github-copilot-admin.adoc new file mode 100644 index 000000000..730d95498 --- /dev/null +++ b/modules/ai-agents/pages/ai-gateway/integrations/github-copilot-admin.adoc @@ -0,0 +1,827 @@ += Configure AI Gateway for GitHub Copilot +:description: Configure Redpanda AI Gateway to support GitHub Copilot clients. +:page-topic-type: how-to +:personas: platform_admin +:learning-objective-1: Configure AI Gateway endpoints for GitHub Copilot connectivity +:learning-objective-2: Deploy multi-tenant authentication strategies for Copilot clients +:learning-objective-3: Set up model aliasing and BYOK routing for GitHub Copilot + +include::ai-agents:partial$ai-gateway-byoc-note.adoc[] + +Configure Redpanda AI Gateway to support GitHub Copilot clients accessing multiple LLM providers through OpenAI-compatible endpoints with bring-your-own-key (BYOK) support. + +After reading this page, you will be able to: + +* [ ] Configure AI Gateway endpoints for GitHub Copilot connectivity. +* [ ] Deploy multi-tenant authentication strategies for Copilot clients. +* [ ] Set up model aliasing and BYOK routing for GitHub Copilot. + +== Prerequisites + +* AI Gateway deployed on a BYOC cluster running Redpanda version 25.3 or later +* Administrator access to the AI Gateway UI +* API keys for at least one LLM provider (OpenAI, Anthropic, or others) +* Understanding of xref:ai-agents:ai-gateway/gateway-architecture.adoc[AI Gateway concepts] +* GitHub Copilot Business or Enterprise subscription (for BYOK and custom endpoints) + +== About GitHub Copilot + +GitHub Copilot is an AI-powered code completion tool that integrates with popular IDEs including VS Code, Visual Studio, JetBrains IDEs, and Neovim. GitHub Copilot uses OpenAI models by default but supports BYOK (bring your own key) configurations for Business and Enterprise customers. + +Key characteristics: + +* Sends all requests in OpenAI-compatible format to `/v1/chat/completions` +* Limited support for custom headers (similar to Cursor IDE) +* Supports BYOK for Business/Enterprise subscriptions +* Built-in code completion, chat, and inline editing modes +* Configuration via IDE settings or organization policies +* High request volume from code completion features + +== Architecture overview + +GitHub Copilot connects to AI Gateway through standardized endpoints: + +* LLM endpoint: `https://{CLUSTER_ID}.cloud.redpanda.com/ai-gateway/v1/chat/completions` for all providers +* MCP endpoint support: Limited (GitHub Copilot does not natively support MCP protocol) + +The gateway handles: + +. Authentication via bearer tokens in the `Authorization` header +. Gateway selection via query parameters or custom headers (requires workarounds) +. Model routing and aliasing for friendly names +. Format transforms from OpenAI format to provider-native formats +. Request logging and cost tracking per gateway +. BYOK routing for different teams or users + +== Enable LLM providers + +GitHub Copilot works with multiple providers through OpenAI-compatible transforms. Enable the providers your users will access. + +=== Configure OpenAI (default provider) + +GitHub Copilot uses OpenAI by default. To enable OpenAI through the gateway: + +. Navigate to *AI Gateway* > *Providers* in the Redpanda Cloud console +. Select *OpenAI* from the provider list +. Click *Add configuration* +. Enter your OpenAI API key +. Under *Format*, select *Native OpenAI* +. Click *Save* + +=== Configure Anthropic with OpenAI-compatible format + +For BYOK deployments, you can route GitHub Copilot to Anthropic models. Configure the gateway to transform requests: + +. Navigate to *AI Gateway* > *Providers* +. Select *Anthropic* from the provider list +. Click *Add configuration* +. Enter your Anthropic API key +. Under *Format*, select *OpenAI-compatible* (enables automatic transform) +. Click *Save* + +The gateway now transforms OpenAI-format requests to Anthropic's native `/v1/messages` format. + +=== Configure additional providers + +GitHub Copilot supports multiple providers through OpenAI-compatible transforms. For each provider: + +. Add the provider configuration in the gateway +. Set the format to *OpenAI-compatible* (the gateway handles format transformation) +. Enable the transform layer to convert OpenAI request format to the provider's native format + +Common additional providers: + +* Google Gemini (requires OpenAI-compatible transform) +* Mistral AI (already OpenAI-compatible format) +* Azure OpenAI (already OpenAI-compatible format) + +=== Enable models in the catalog + +After enabling providers, enable specific models: + +. Navigate to *AI Gateway* > *Models* +. Enable the models you want GitHub Copilot clients to access ++ +Common models for GitHub Copilot: ++ +* `gpt-4o` (OpenAI) +* `gpt-4o-mini` (OpenAI) +* `o1-mini` (OpenAI) +* `claude-sonnet-4-5` (Anthropic, requires alias) + +. Click *Save* + +GitHub Copilot typically uses model names without vendor prefixes. You'll configure model aliasing in the next section to map friendly names to provider-specific models. + +== Create a gateway for GitHub Copilot clients + +Create a dedicated gateway to isolate GitHub Copilot traffic and apply specific policies. + +=== Gateway configuration + +. Navigate to *AI Gateway* > *Gateways* +. Click *Create Gateway* +. Enter gateway details: ++ +[cols="1,2"] +|=== +|Field |Value + +|Name +|`github-copilot-gateway` (or your preferred name) + +|Workspace +|Select the workspace for access control grouping + +|Description +|Gateway for GitHub Copilot clients +|=== + +. Click *Create* +. Copy the gateway ID from the gateway details page + +The gateway ID is required for routing requests to this gateway. + +=== Configure model aliasing + +GitHub Copilot expects model names like `gpt-4o` without vendor prefixes. Configure aliases to map these to provider-specific models: + +. Navigate to the gateway's *Models* tab +. Click *Add Model Alias* +. Configure aliases: ++ +[cols="1,2,1"] +|=== +|Alias Name |Target Model |Provider + +|`gpt-4o` +|`openai/gpt-4o` +|OpenAI + +|`gpt-4o-mini` +|`openai/gpt-4o-mini` +|OpenAI + +|`claude-sonnet` +|`anthropic/claude-sonnet-4-5` +|Anthropic + +|`o1-mini` +|`openai/o1-mini` +|OpenAI +|=== + +. Click *Save* + +When GitHub Copilot requests `gpt-4o`, the gateway routes to OpenAI's `gpt-4o` model. Users can optionally request `claude-sonnet` for Anthropic models if the IDE configuration supports model selection. + +=== Configure unified LLM routing + +GitHub Copilot sends all requests to a single endpoint (`/v1/chat/completions`). Configure the gateway to route based on the requested model name. + +==== Model-based routing + +Configure routing that inspects the model field to determine the target provider: + +. Navigate to the gateway's *LLM* tab +. Under *Routing*, click *Add route* +. Configure OpenAI routing: ++ +[source,cel] +---- +request.body.model.startsWith("gpt-") || request.body.model.startsWith("o1-") +---- + +. Add a *Primary provider pool*: ++ +* Provider: OpenAI +* Model: All enabled OpenAI models +* Transform: None (already OpenAI format) +* Load balancing: Round robin (if multiple OpenAI configurations exist) + +. Click *Save* +. Add another route for Anthropic models: ++ +[source,cel] +---- +request.body.model.startsWith("claude-") +---- + +. Add a *Primary provider pool*: ++ +* Provider: Anthropic +* Model: All enabled Anthropic models +* Transform: OpenAI to Anthropic + +. Click *Save* + +GitHub Copilot requests route to the appropriate provider based on the model alias. + +==== Default routing with fallback + +Configure a catch-all route for requests without specific model prefixes: + +[source,cel] +---- +true # Matches all requests not matched by previous routes +---- + +Add a primary provider (for example, OpenAI) with fallback to Anthropic: + +* Primary: OpenAI (for requests with no specific model) +* Fallback: Anthropic (if OpenAI is unavailable) +* Failover conditions: Rate limits, timeouts, 5xx errors + +=== Apply rate limits + +Prevent runaway usage from GitHub Copilot clients. Code completion features generate very high request volumes. + +. Navigate to the gateway's *LLM* tab +. Under *Rate Limit*, configure: ++ +[cols="1,2"] +|=== +|Setting |Recommended Value + +|Global rate limit +|300 requests per minute + +|Per-user rate limit +|30 requests per minute (if using user identification) +|=== + +. Click *Save* + +The gateway blocks requests exceeding these limits and returns HTTP 429 errors. + +==== Rate limit considerations for code completion + +GitHub Copilot's code completion feature generates extremely frequent requests (potentially dozens per minute per user). Consider: + +* Higher global rate limits than other AI coding assistants +* Separate rate limits for different request types if the gateway supports request classification +* Monitoring initial usage patterns to adjust limits appropriately + +=== Set spending limits + +Control LLM costs across all providers: + +. Under *Spend Limit*, configure: ++ +[cols="1,2"] +|=== +|Setting |Value + +|Monthly budget +|$10,000 (adjust based on expected usage) + +|Enforcement +|Block requests after budget exceeded + +|Alert threshold +|80% of budget (sends notification) +|=== + +. Click *Save* + +The gateway tracks estimated costs per request across all providers and blocks traffic when the monthly budget is exhausted. + +== Configure authentication + +GitHub Copilot clients authenticate using bearer tokens in the `Authorization` header. + +=== Generate API tokens + +. Navigate to *Security* > *API Tokens* in the Redpanda Cloud console +. Click *Create Token* +. Enter token details: ++ +[cols="1,2"] +|=== +|Field |Value + +|Name +|`copilot-access` + +|Scopes +|`ai-gateway:read`, `ai-gateway:write` + +|Expiration +|Set appropriate expiration based on security policies +|=== + +. Click *Create* +. Copy the token (it appears only once) + +Distribute this token to GitHub Copilot administrators through secure channels for organization-level configuration. + +=== Token rotation + +Implement token rotation for security: + +. Create a new token before the existing token expires +. Update organization-level GitHub Copilot configuration with the new token +. Monitor usage of the old token in (observability dashboard) +. Revoke the old token after the configuration update propagates + +== Multi-tenant deployment strategies + +GitHub Copilot has limited support for custom headers, making traditional `rp-aigw-id` header-based multi-tenancy challenging. Use one of these alternative strategies for BYOK deployments. + +=== Strategy 1: OAI Compatible Provider extension (recommended) + +For organizations using VS Code with GitHub Copilot, the OAI Compatible Provider extension enables custom headers including `rp-aigw-id`. + +==== Install the extension + +. Navigate to VS Code Extensions Marketplace +. Search for "OAI Compatible Provider" +. Install the extension +. Restart VS Code + +==== Configure the extension + +. Open VS Code settings (JSON) +. Add gateway configuration: ++ +[source,json] +---- +{ + "oai-compatible-provider.providers": [ + { + "name": "Redpanda AI Gateway", + "baseUrl": "https://{CLUSTER_ID}.cloud.redpanda.com/ai-gateway/v1", + "headers": { + "Authorization": "Bearer YOUR_API_TOKEN", + "rp-aigw-id": "GATEWAY_ID" + }, + "models": [ + "gpt-4o", + "gpt-4o-mini", + "claude-sonnet" + ] + } + ] +} +---- + +. Replace: ++ +* `{CLUSTER_ID}`: Your Redpanda cluster ID +* `YOUR_API_TOKEN`: Team-specific API token +* `GATEWAY_ID`: Team-specific gateway ID + +This approach allows true multi-tenancy with proper gateway isolation per team. + +**Benefits:** + +* Full support for `rp-aigw-id` header +* Clean separation between tenants +* Standard authentication flow +* Works with any IDE supported by the extension + +**Limitations:** + +* Requires VS Code and extension installation +* Not available for all GitHub Copilot-supported IDEs +* Users must configure extension in addition to GitHub Copilot + +=== Strategy 2: Query parameter routing + +Embed tenant identity in query parameters for multi-tenant routing without custom headers. + +. Configure gateway routing to extract tenant from query parameters: ++ +[source,cel] +---- +request.url.query["tenant"][0] == "team-alpha" +---- + +. Distribute tenant-specific endpoints to each team +. Configure GitHub Copilot organization settings with the tenant-specific base URL + +**Configuration example for Team Alpha:** + +Organization-level GitHub Copilot settings: + +[source,json] +---- +{ + "copilot": { + "api_base_url": "https://{CLUSTER_ID}.cloud.redpanda.com/ai-gateway/v1?tenant=team-alpha", + "api_key": "TEAM_ALPHA_TOKEN" + } +} +---- + +**Benefits:** + +* Works with standard GitHub Copilot configuration +* No additional extensions required +* Simple to implement + +**Limitations:** + +* Tenant identity exposed in URLs and logs +* Less clean than header-based routing +* URL parameters may be logged by intermediate proxies + +=== Strategy 3: Token-based gateway mapping + +Use different API tokens to identify which gateway to route to: + +. Generate separate API tokens for each tenant or team +. Tag tokens with metadata indicating the target gateway +. Configure gateway routing based on token identity: ++ +[source,cel] +---- +request.auth.metadata["gateway_id"] == "team-alpha-gateway" +---- + +. Apply tenant-specific routing, rate limits, and spending limits based on the token + +**Benefits:** + +* Transparent to users +* No URL modifications needed +* Centralized control through token management + +**Limitations:** + +* Requires gateway support for token metadata inspection +* Token management overhead increases with number of tenants +* All tenants use the same base URL + +=== Strategy 4: Single-tenant mode + +For simpler deployments, configure a single gateway with shared access: + +. Create one gateway for all GitHub Copilot users +. Generate a shared API token +. Configure GitHub Copilot at the organization level +. Use rate limits and spending limits to control overall usage + +**Benefits:** + +* Simplest configuration +* No tenant routing complexity +* Easy to manage + +**Limitations:** + +* No per-team cost tracking or limits +* Shared rate limits may impact individual teams +* All users have the same model access + +=== Choosing a multi-tenant strategy + +[cols="1,2,2,1"] +|=== +|Strategy |Pros |Cons |Best For + +|OAI Compatible Provider +|Full `rp-aigw-id` support, clean separation +|Requires extension, VS Code only +|Organizations standardized on VS Code + +|Query parameters +|No extensions needed, simple setup +|Tenant exposed in URLs, less clean +|Quick deployments, small teams + +|Token-based +|Transparent to users, centralized control +|Requires advanced gateway features +|Large organizations with many teams + +|Single-tenant +|Simplest configuration and management +|No per-team isolation or limits +|Small organizations, proof of concept +|=== + +== Configure GitHub Copilot clients + +Provide these instructions based on your chosen multi-tenant strategy. + +=== Organization-level configuration (GitHub Enterprise) + +For GitHub Enterprise customers, configure Copilot at the organization level: + +. Navigate to your organization settings on GitHub +. Go to *Copilot* > *Policies* +. Enable *Allow use of Copilot with custom models* +. Configure the custom endpoint: ++ +[source,json] +---- +{ + "api_base_url": "https://{CLUSTER_ID}.cloud.redpanda.com/ai-gateway/v1", + "api_key": "YOUR_API_TOKEN" +} +---- + +. If using query parameter routing, append the tenant identifier: ++ +[source,json] +---- +{ + "api_base_url": "https://{CLUSTER_ID}.cloud.redpanda.com/ai-gateway/v1?tenant=YOUR_TEAM", + "api_key": "YOUR_API_TOKEN" +} +---- + +This configuration applies to all users in the organization. + +=== IDE-specific configuration (individual users) + +For individual users or when organization-level configuration is not available: + +==== VS Code configuration + +. Open VS Code settings +. Search for "GitHub Copilot" +. Configure custom endpoint (if using OAI Compatible Provider): ++ +[source,json] +---- +{ + "github.copilot.advanced": { + "endpoint": "https://{CLUSTER_ID}.cloud.redpanda.com/ai-gateway/v1" + } +} +---- + +==== JetBrains IDEs + +. Open IDE Settings +. Navigate to *Tools* > *GitHub Copilot* +. Configure custom endpoint (support varies by IDE and Copilot version) + +==== Neovim + +. Edit Copilot configuration +. Add custom endpoint in the Copilot.vim or Copilot.lua configuration +. Refer to the Copilot.vim documentation for exact syntax + +=== Model selection + +Configure model preferences based on use case: + +[cols="1,2,1"] +|=== +|Use Case |Recommended Model |Reason + +|Code completion +|`gpt-4o-mini` +|Fast, cost-effective for frequent requests + +|Code explanation +|`gpt-4o` or `claude-sonnet` +|Higher quality for complex explanations + +|Code generation +|`gpt-4o` or `claude-sonnet` +|Better at generating complete functions + +|Documentation +|`gpt-4o-mini` +|Sufficient quality for docstrings and comments +|=== + +Model selection is typically configured at the organization level or through IDE settings. + +== Monitor GitHub Copilot usage + +Track GitHub Copilot activity through gateway observability features. + +=== View request logs + +. Navigate to *AI Gateway* > *Observability* > *Logs* +. Filter by gateway ID: `github-copilot-gateway` +. Review: ++ +* Request timestamps and duration +* Model used per request (including aliases) +* Token usage (prompt and completion tokens) +* Estimated cost per request +* HTTP status codes and errors +* Transform operations (OpenAI to provider-native format) + +GitHub Copilot generates distinct request patterns: + +* Code completion: Very high volume, short requests with low token counts +* Chat/explain: Medium volume, longer requests with code context +* Code generation: Lower volume, variable length requests + +=== Analyze metrics + +. Navigate to *AI Gateway* > *Observability* > *Metrics* +. Select the GitHub Copilot gateway +. Review: ++ +[cols="1,2"] +|=== +|Metric |Purpose + +|Request volume by model +|Identify most-used models via aliases + +|Token usage by model +|Track consumption patterns (completion vs chat) + +|Estimated spend by provider +|Monitor costs across providers with transforms + +|Latency (p50, p95, p99) +|Detect transform overhead and performance issues + +|Error rate by provider +|Identify failing providers or transform issues + +|Transform success rate +|Monitor OpenAI-to-provider format conversion success + +|Requests per user/tenant +|Track usage by team (if using multi-tenant strategies) +|=== + + +=== Query logs via API + +Programmatically access logs for integration with monitoring systems: + +[source,bash] +---- +curl https://{CLUSTER_ID}.cloud.redpanda.com/api/ai-gateway/logs \ + -H "Authorization: Bearer YOUR_API_TOKEN" \ + -H "Content-Type: application/json" \ + -d '{ + "gateway_id": "GATEWAY_ID", + "start_time": "2026-01-01T00:00:00Z", + "end_time": "2026-01-14T23:59:59Z", + "limit": 100 + }' +---- + +== Security considerations + +Apply these security best practices for GitHub Copilot deployments. + +=== Limit token scope + +Create tokens with minimal required scopes: + +* `ai-gateway:read`: Required for model discovery +* `ai-gateway:write`: Required for LLM requests + +Avoid granting broader scopes like `admin` or `cluster:write`. + +=== Implement network restrictions + +If GitHub Copilot clients connect from known networks, configure network policies: + +. Use cloud provider security groups to restrict access to AI Gateway endpoints +. Allowlist only the IP ranges where GitHub Copilot clients operate +. Monitor for unauthorized access attempts in request logs + +=== Enforce token expiration + +Set short token lifetimes for high-security environments: + +* Development environments: 90 days +* Production environments: 30 days + +Automate token rotation to reduce manual overhead. Coordinate with GitHub organization administrators when rotating tokens. + +=== Monitor transform operations + +Because GitHub Copilot may route to non-OpenAI providers through transforms: + +. Review transform success rates in metrics +. Monitor for transform failures that may leak request details +. Test transforms thoroughly before production deployment +. Keep transform logic updated as provider APIs evolve + +=== Audit model access + +Review which models GitHub Copilot clients can access: + +. Periodically audit enabled models and aliases +. Remove deprecated or unused model configurations +. Monitor model usage logs for unexpected patterns +. Ensure cost-effective models are used for high-volume completion requests + +=== Code completion security + +GitHub Copilot sends code context to LLM providers. Ensure: + +* Users understand what code context is sent with requests +* Proprietary code may be included in prompts +* Configure organization policies to limit code sharing if needed +* Review provider data retention policies +* Monitor logs for sensitive information in prompts (if logging includes prompt content) + +=== Organization-level controls + +For GitHub Enterprise customers: + +. Use organization-level policies to enforce custom endpoint usage +. Restrict which users can configure custom endpoints +. Monitor organization audit logs for configuration changes +. Implement approval workflows for endpoint changes + +== Troubleshooting + +Common issues and solutions when configuring AI Gateway for GitHub Copilot. + +=== GitHub Copilot cannot connect to gateway + +Symptom: Connection errors when GitHub Copilot tries to send requests. + +Causes and solutions: + +* **Invalid base URL**: Verify the configured endpoint matches the gateway URL (including query parameters if using query-based routing) +* **Expired token**: Generate a new API token and update the GitHub Copilot configuration +* **Network connectivity**: Verify the cluster endpoint is accessible from the client network +* **Provider not enabled**: Ensure at least one provider is enabled and has models in the catalog +* **SSL/TLS issues**: Verify the cluster has valid SSL certificates +* **Organization policy blocking custom endpoints**: Check GitHub organization settings + +=== Model not found errors + +Symptom: GitHub Copilot shows "model not found" or similar errors. + +Causes and solutions: + +* **Model not enabled in catalog**: Enable the model in the gateway's model catalog +* **Model alias missing**: Create an alias for the model name GitHub Copilot expects (for example, `gpt-4o`) +* **Incorrect model name**: Verify GitHub Copilot is requesting a model name that exists in your aliases +* **Routing rule mismatch**: Check that routing rules correctly match the requested model name + +=== Transform errors or unexpected responses + +Symptom: Responses are malformed or GitHub Copilot reports format errors. + +Causes and solutions: + +* **Transform disabled**: Ensure OpenAI-compatible transform is enabled for non-OpenAI providers (for example, Anthropic) +* **Transform version mismatch**: Verify the transform is compatible with the current provider API version +* **Model-specific transform issues**: Some models may require specific transform configurations +* **Check transform logs**: Review logs for transform errors and stack traces +* **Response format incompatibility**: Verify the provider's response can be transformed to OpenAI format + +=== High costs or token usage + +Symptom: Token usage and costs exceed expectations. + +Causes and solutions: + +* **Code completion using expensive model**: Configure completion to use `gpt-4o-mini` instead of larger models +* **No rate limits**: Apply per-minute rate limits to prevent runaway usage +* **Missing spending limits**: Set monthly budget limits with blocking enforcement +* **Chat using wrong model**: Ensure chat/explanation features use cost-effective models +* **Transform overhead**: Monitor if transforms add significant token overhead +* **High completion request volume**: Expected behavior, adjust budgets or implement stricter rate limits + +=== Requests failing with 429 errors + +Symptom: GitHub Copilot receives HTTP 429 Too Many Requests errors. + +Causes and solutions: + +* **Rate limit exceeded**: Review and increase rate limits if usage is legitimate (code completion needs very high limits) +* **Upstream provider rate limits**: Check if the upstream LLM provider is rate-limiting; configure failover to alternate providers +* **Budget exhausted**: Verify monthly spending limit has not been reached +* **Per-user limits too restrictive**: Adjust per-user rate limits if using multi-tenant strategies +* **Spike in usage**: Code completion can generate sudden usage spikes, consider burstable rate limits + +=== Multi-tenant routing failures + +Symptom: Requests route to wrong gateway or fail authorization. + +Causes and solutions: + +* **Query parameter missing**: Ensure query parameter is appended to all requests if using query-based routing +* **Token metadata incorrect**: Verify token is tagged with correct gateway metadata +* **Routing rule conflicts**: Check for overlapping routing rules that may cause unexpected routing +* **Organization policy override**: Verify GitHub organization settings aren't overriding user configurations +* **Extension not configured**: If using OAI Compatible Provider extension, verify proper installation and configuration + +=== Performance issues + +Symptom: Slow response times from GitHub Copilot. + +Causes and solutions: + +* **Transform latency**: Monitor metrics for transform processing time overhead +* **Provider latency**: Check latency metrics by provider to identify slow backends +* **Network latency**: Verify cluster is in a region with good connectivity to users +* **Cold start delays**: Some providers may have cold start latency on first request +* **Rate limiting overhead**: Check if rate limit enforcement is adding latency + +== Next steps + +* xref:ai-agents:ai-gateway/cel-routing-cookbook.adoc[]: Implement advanced routing rules for model aliasing + diff --git a/modules/ai-agents/pages/ai-gateway/integrations/github-copilot-user.adoc b/modules/ai-agents/pages/ai-gateway/integrations/github-copilot-user.adoc new file mode 100644 index 000000000..c8b23edb2 --- /dev/null +++ b/modules/ai-agents/pages/ai-gateway/integrations/github-copilot-user.adoc @@ -0,0 +1,965 @@ += Configure GitHub Copilot with AI Gateway +:description: Configure GitHub Copilot to use Redpanda AI Gateway for unified LLM access and custom provider management. +:page-topic-type: how-to +:personas: ai_agent_developer, app_developer +:learning-objective-1: Configure GitHub Copilot in VS Code and JetBrains IDEs to route requests through AI Gateway +:learning-objective-2: Set up multi-tenancy with gateway ID headers for cost tracking +:learning-objective-3: Configure enterprise BYOK deployments for team-wide Copilot access + +include::ai-agents:partial$ai-gateway-byoc-note.adoc[] + +After xref:ai-agents:ai-gateway/gateway-quickstart.adoc[configuring your AI Gateway], set up GitHub Copilot to route LLM requests through the gateway for centralized observability, cost management, and provider flexibility. + +After reading this page, you will be able to: + +* [ ] Configure GitHub Copilot in VS Code and JetBrains IDEs to route requests through AI Gateway. +* [ ] Set up multi-tenancy with gateway ID headers for cost tracking. +* [ ] Configure enterprise BYOK deployments for team-wide Copilot access. + +== Prerequisites + +Before configuring GitHub Copilot, ensure you have: + +* GitHub Copilot subscription (Individual, Business, or Enterprise) +* An active Redpanda AI Gateway with: +** At least one LLM provider enabled (see xref:ai-agents:ai-gateway/gateway-quickstart.adoc#step-1-enable-a-provider[Enable a provider]) +** A gateway created and configured (see xref:ai-agents:ai-gateway/gateway-quickstart.adoc#step-3-create-a-gateway[Create a gateway]) +* Your AI Gateway credentials: +** Gateway endpoint URL (for example, `https://gw.ai.panda.com`) +** Gateway ID (for example, `gateway-abc123`) +** API key with access to the gateway +* Your IDE: +** VS Code with GitHub Copilot extension installed +** Or JetBrains IDE (IntelliJ IDEA, PyCharm, etc.) with GitHub Copilot plugin + +== About GitHub Copilot and AI Gateway + +GitHub Copilot provides AI-powered code completion and chat within your IDE. By default, Copilot routes requests directly to GitHub's infrastructure, which uses OpenAI and other LLM providers. + +When you route Copilot through AI Gateway, you gain: + +* Centralized observability across all Copilot usage +* Cost attribution per developer, team, or project +* Provider flexibility (use your own API keys or alternative models) +* Policy enforcement (rate limits, spend controls) +* Multi-tenancy support for enterprise deployments + +== Configuration approaches + +GitHub Copilot supports different configuration approaches depending on your IDE and subscription tier: + +[cols="1,2,2,1"] +|=== +|IDE |Method |Subscription Tier |Complexity + +|VS Code +|Custom OpenAI models +|Individual, Business, Enterprise +|Medium + +|VS Code +|OAI Compatible Provider extension +|Individual, Business, Enterprise +|Low + +|JetBrains +|Enterprise BYOK +|Enterprise +|Low +|=== + +Choose the approach that matches your environment. VS Code users have multiple options, while JetBrains users need GitHub Copilot Enterprise with BYOK support. + +== Configure in VS Code + +VS Code offers two approaches for routing Copilot through AI Gateway: + +. Custom OpenAI models (manual configuration) +. OAI Compatible Provider extension (simplified) + +=== Option 1: Custom OpenAI models + +This approach configures VS Code to recognize your AI Gateway as a custom OpenAI-compatible provider. + +==== Configure custom models + +. Open VS Code Settings: +** macOS: `Cmd+,` +** Windows/Linux: `Ctrl+,` +. Search for `github.copilot.chat.customOAIModels` +. Click *Edit in settings.json* +. Add the following configuration: + +[,json] +---- +{ + "github.copilot.chat.customOAIModels": [ + { + "id": "anthropic/claude-sonnet-4-5", + "name": "Claude Sonnet 4.5 (Gateway)", + "endpoint": "https://gw.ai.panda.com/v1", + "provider": "redpanda-gateway" + }, + { + "id": "openai/gpt-4o", + "name": "GPT-4o (Gateway)", + "endpoint": "https://gw.ai.panda.com/v1", + "provider": "redpanda-gateway" + } + ] +} +---- + +Replace `https://gw.ai.panda.com/v1` with your gateway endpoint. + +IMPORTANT: This experimental feature requires configuring API keys and custom headers through the Copilot Chat UI, not in `settings.json`. + +==== Configure API key and headers via Copilot Chat UI + +. Open Copilot Chat in VS Code (`Cmd+I` or `Ctrl+I`) +. Click the model selector dropdown +. Click *Manage Models* at the bottom of the dropdown +. Click *Add Model* +. Select your configured provider ("redpanda-gateway") +. Enter the connection details: +** *Base URL*: `https://gw.ai.panda.com/v1` (should match your settings.json endpoint) +** *API Key*: Your Redpanda API key +** *Custom Headers*: Click *Add Header* +*** Header name: `rp-aigw-id` +*** Header value: `gateway-abc123` (your gateway ID) +. Click *Save* + +==== Select model + +. Open Copilot chat with `Cmd+I` (macOS) or `Ctrl+I` (Windows/Linux) +. Click the model selector dropdown +. Choose a model from the "redpanda-gateway" provider + +=== Option 2: OAI Compatible Provider extension + +The OAI Compatible Provider extension provides enhanced support for OpenAI-compatible endpoints with custom headers. + +==== Install extension + +. Open VS Code Extensions (`Cmd+Shift+X` or `Ctrl+Shift+X`) +. Search for "OAI Compatible Provider" +. Click *Install* + +==== Configure base URL in settings + +Add the base URL configuration in VS Code settings: + +. Open VS Code Settings (`Cmd+,` or `Ctrl+,`) +. Search for `oaicopilot` +. Click *Edit in settings.json* +. Add the following: + +[,json] +---- +{ + "oaicopilot.baseUrl": "https://gw.ai.panda.com/v1", + "oaicopilot.models": [ + "anthropic/claude-sonnet-4-5", + "openai/gpt-4o", + "openai/gpt-4o-mini" + ] +} +---- + +Replace `https://gw.ai.panda.com/v1` with your gateway endpoint. + +==== Configure API key and headers via Copilot Chat UI + +IMPORTANT: Do not configure API keys or custom headers in `settings.json`. Use the Copilot Chat UI instead. + +. Open Copilot Chat in VS Code (`Cmd+I` or `Ctrl+I`) +. Click the model selector dropdown +. Click *Manage Models* +. Find the OAI Compatible Provider in the list +. Click *Configure* or *Edit* +. Enter the connection details: +** *API Key*: Your Redpanda API key +** *Custom Headers*: Add the `rp-aigw-id` header +*** Header name: `rp-aigw-id` +*** Header value: `gateway-abc123` (your gateway ID) +. Click *Save* + +==== Select model + +. Open Copilot chat with `Cmd+I` (macOS) or `Ctrl+I` (Windows/Linux) +. Click the model selector dropdown +. Choose a model from the OAI Compatible Provider + +== Configure in JetBrains IDEs + +JetBrains IDE integration requires GitHub Copilot Enterprise with Bring Your Own Key (BYOK) support. + +=== Prerequisites + +* GitHub Copilot Enterprise subscription +* BYOK enabled for your organization +* JetBrains IDE 2024.1 or later +* GitHub Copilot plugin version 1.4.0 or later + +=== Configure BYOK with AI Gateway + +. Open your JetBrains IDE (IntelliJ IDEA, PyCharm, etc.) +. Navigate to *Settings/Preferences*: +** macOS: `Cmd+,` +** Windows/Linux: `Ctrl+Alt+S` +. Go to *Tools* > *GitHub Copilot* +. Under *Advanced Settings*, find *Custom Model Configuration* +. Configure the OpenAI-compatible endpoint: + +[,text] +---- +Base URL: https://gw.ai.panda.com/v1 +API Key: your-redpanda-api-key +---- + +[start=6] +. Click *Advanced Headers* +. Add custom header: + +[,text] +---- +Header Name: rp-aigw-id +Header Value: gateway-abc123 +---- + +Replace placeholder values: + +* `https://gw.ai.panda.com/v1` - Your gateway endpoint +* `your-redpanda-api-key` - Your Redpanda API key +* `gateway-abc123` - Your gateway ID + +=== Configure model selection + +In the GitHub Copilot settings: + +. Expand *Model Selection* +. Choose your preferred models from the AI Gateway: +** Chat model: `anthropic/claude-sonnet-4-5` or `openai/gpt-4o` +** Code completion model: `openai/gpt-4o-mini` (faster, cost-effective) + +Model format uses `vendor/model_id` pattern to route through the gateway to the appropriate provider. + +=== Test configuration + +. Open a code file +. Trigger code completion (start typing) +. Or open Copilot chat: +** Right-click > *Copilot* > *Open Chat* +** Or use shortcut: `Cmd+Shift+C` (macOS) or `Ctrl+Shift+C` (Windows/Linux) +. Verify suggestions appear + +Check the AI Gateway dashboard to confirm requests are logged. + +== Multi-tenancy configuration + +For organizations with multiple teams or projects sharing AI Gateway, use gateway ID headers to track usage per team. + +=== Approach 1: One gateway per team + +Create separate gateways for each team: + +* Team A Gateway: ID `team-a-gateway-123` +* Team B Gateway: ID `team-b-gateway-456` + +Each team configures their IDE with their team's gateway ID: + +[,json] +---- +{ + "oai.provider.headers": { + "rp-aigw-id": "team-a-gateway-123" + } +} +---- + +Benefits: + +* Isolated cost tracking per team +* Team-specific rate limits and budgets +* Separate observability dashboards + +=== Approach 2: Shared gateway with custom headers + +Use a single gateway with custom headers for attribution: + +[,json] +---- +{ + "oai.provider.headers": { + "rp-aigw-id": "shared-gateway-789", + "x-team": "backend-team", + "x-project": "api-service" + } +} +---- + +Configure gateway CEL routing to read these headers: + +[,cel] +---- +request.headers["x-team"] == "backend-team" ? "openai/gpt-4o" : "openai/gpt-4o-mini" +---- + +Benefits: + +* Single gateway to manage +* Flexible cost attribution +* Header-based routing policies + +Filter observability dashboard by `x-team` or `x-project` headers to generate team-specific reports. + +=== Approach 3: Environment-based gateways + +Separate development, staging, and production environments: + +[,json] +---- +{ + "oai.provider.headers": { + "rp-aigw-id": "${env:GATEWAY_ID}", + "x-environment": "${env:ENVIRONMENT}" + } +} +---- + +Set environment variables per workspace: + +[,bash] +---- +# Development workspace +export GATEWAY_ID="dev-gateway-123" +export ENVIRONMENT="development" + +# Production workspace +export GATEWAY_ID="prod-gateway-456" +export ENVIRONMENT="production" +---- + +Benefits: + +* Prevent development usage from affecting production metrics +* Different rate limits and budgets per environment +* Environment-specific model access policies + +== Enterprise BYOK at scale + +For large organizations deploying GitHub Copilot Enterprise with AI Gateway across hundreds or thousands of developers. + +=== Centralized configuration management + +Distribute IDE configuration files via: + +* **Git repository**: Store `settings.json` or IDE configuration in a shared repository +* **Configuration management tools**: Puppet, Chef, Ansible +* **Group Policy** (Windows environments) +* **MDM solutions** (macOS environments) + +Example centralized configuration: + +[,json] +---- +{ + "oai.provider.endpoint": "https://gw.company.com/v1", + "oai.provider.apiKey": "${env:COPILOT_GATEWAY_KEY}", + "oai.provider.headers": { + "rp-aigw-id": "${env:COPILOT_GATEWAY_ID}", + "x-user-email": "${env:USER_EMAIL}", + "x-department": "${env:DEPARTMENT}" + } +} +---- + +Developers set environment variables locally or receive them from identity management systems. + +=== API key management + +**Option 1: Individual API keys** + +Each developer gets their own Redpanda API key: + +* Tied to their identity (email, employee ID) +* Revocable when they leave the organization +* Enables per-developer cost attribution + +**Option 2: Team API keys** + +Teams share API keys: + +* Simpler key management +* Cost attribution by team, not individual +* Use custom headers for finer-grained tracking + +**Option 3: Service account keys** + +Single key for all developers: + +* Simplest to deploy +* No per-developer tracking +* Use custom headers for all attribution + +=== Automated provisioning workflow + +. Developer joins organization +. Identity system (Okta, Azure AD, etc.) triggers provisioning: +.. Create Redpanda API key +.. Assign to appropriate gateway +.. Generate IDE configuration file with embedded keys +.. Distribute to developer workstation +. Developer installs IDE and GitHub Copilot +. Configuration auto-applies (via MDM or configuration management) +. Developer starts using Copilot immediately + +=== Observability and governance + +Track usage across the organization: + +. Navigate to AI Gateway dashboard +. Filter by custom headers: +** `x-department`: View costs per department +** `x-user-email`: Track usage per developer +** `x-project`: Attribute costs to specific projects +. Generate reports: +** Top 10 users by token usage +** Departments exceeding budget +** Projects using deprecated models +. Set alerts: +** Individual developer exceeds threshold (potential misuse) +** Department budget approaching limit +** Unusual request patterns (security concern) + +=== Policy enforcement + +Use gateway CEL routing to enforce policies: + +[,cel] +---- +// Limit junior developers to cost-effective models +request.headers["x-user-level"] == "junior" + ? "openai/gpt-4o-mini" + : "anthropic/claude-sonnet-4-5" + +// Block access for contractors to expensive models +request.headers["x-user-type"] == "contractor" && +request.model.contains("opus") + ? error("Contractors cannot use Opus models") + : request.model +---- + +== Verify configuration + +After configuring GitHub Copilot, verify it routes requests through your AI Gateway. + +=== Test code completion + +. Open a code file in your IDE +. Start typing a function definition +. Wait for code completion suggestions to appear + +Completion requests appear in the gateway dashboard with: + +* Low token counts (typically 50-200 tokens) +* High request frequency (as you type) +* The completion model you configured + +=== Test chat interface + +. Open Copilot chat: +** VS Code: `Cmd+I` (macOS) or `Ctrl+I` (Windows/Linux) +** JetBrains: Right-click > *Copilot* > *Open Chat* +. Ask a simple question: "Explain this function" +. Wait for response + +Chat requests appear in the gateway dashboard with: + +* Higher token counts (500-2000 tokens typical) +* The chat model you configured +* Response status (200 for success) + +=== Verify in dashboard + +. Open the Redpanda Cloud Console +. Navigate to your gateway's observability dashboard +. Filter by gateway ID +. Verify: +** Requests appear in logs +** Models show correct format (for example, `anthropic/claude-sonnet-4-5`) +** Token usage and cost are recorded +** Custom headers appear (if configured) + +If requests don't appear, see <>. + +== Advanced configuration + +=== Model-specific settings + +Configure different models for different tasks: + +[,json] +---- +{ + "oai.provider.models": [ + { + "id": "anthropic/claude-sonnet-4-5", + "name": "Claude Sonnet (chat)", + "type": "chat", + "temperature": 0.7, + "maxTokens": 4096 + }, + { + "id": "openai/gpt-4o-mini", + "name": "GPT-4o Mini (completion)", + "type": "completion", + "temperature": 0.2, + "maxTokens": 512 + } + ] +} +---- + +Settings explained: + +* Chat uses Claude Sonnet with higher temperature for creative responses +* Completion uses GPT-4o Mini with lower temperature for deterministic code +* Chat allows longer responses, completion limits tokens for speed + +=== Workspace-specific configuration + +Override global settings for specific projects using workspace settings. + +In VS Code, create `.vscode/settings.json` in your project root: + +[,json] +---- +{ + "oai.provider.headers": { + "rp-aigw-id": "project-gateway-123", + "x-project": "customer-portal" + } +} +---- + +Benefits: + +* Route different projects through different gateways +* Track costs per project +* Use different models per project (cost-effective for internal, premium for customer-facing) + +=== Custom request timeouts + +Configure timeout for AI Gateway requests: + +[,json] +---- +{ + "oai.provider.timeout": 30000 +} +---- + +Timeout is in milliseconds. Default is typically 30000 (30 seconds). + +Increase timeouts for: + +* High-latency network environments +* Complex code generation tasks +* Large file context + +=== Debug mode + +Enable debug logging to troubleshoot issues: + +[,json] +---- +{ + "oai.provider.debug": true, + "github.copilot.advanced": { + "debug": true + } +} +---- + +View debug logs: + +* VS Code: Developer Console (`Help` > `Toggle Developer Tools` > `Console` tab) +* JetBrains: `Help` > `Diagnostic Tools` > `Debug Log Settings` > Add `github.copilot` + +Debug mode shows: + +* HTTP request and response headers +* Model selection decisions +* Token usage calculations +* Error details + +[[troubleshooting]] +== Troubleshooting + +=== Copilot shows no suggestions + +**Symptom**: Code completion doesn't work or Copilot shows "No suggestions available". + +**Causes and solutions**: + +. **Configuration not loaded** ++ +Reload your IDE to apply configuration changes: ++ +* VS Code: Command Palette > "Developer: Reload Window" +* JetBrains: File > Invalidate Caches / Restart + +. **Incorrect endpoint URL** ++ +Verify the URL format includes `/v1` at the end: ++ +[,text] +---- +# Correct +https://gw.ai.panda.com/v1 + +# Incorrect +https://gw.ai.panda.com +---- + +. **Authentication failure** ++ +Verify your API key is valid: ++ +[,bash] +---- +curl -H "Authorization: Bearer YOUR_API_KEY" \ + -H "rp-aigw-id: GATEWAY_ID" \ + https://gw.ai.panda.com/v1/models +---- ++ +You should receive a list of available models. If you get `401 Unauthorized`, regenerate your API key in the Redpanda Cloud Console. + +. **Extension/plugin disabled** ++ +Verify GitHub Copilot is enabled: ++ +* VS Code: Extensions view > GitHub Copilot > Ensure "Enabled" +* JetBrains: Settings > Plugins > GitHub Copilot > Check "Enabled" + +. **Network connectivity issues** ++ +Test connectivity to the gateway: ++ +[,bash] +---- +curl -I https://gw.ai.panda.com/v1 +---- ++ +If this times out, check your network configuration, firewall rules, or VPN connection. + +=== Requests not appearing in gateway dashboard + +**Symptom**: Copilot works, but requests don't appear in the AI Gateway observability dashboard. + +**Causes and solutions**: + +. **Wrong gateway ID** ++ +Verify the `rp-aigw-id` header matches the gateway you're viewing in the dashboard (case-sensitive). + +. **Missing header configuration** ++ +Ensure `rp-aigw-id` header is configured: ++ +[,json] +---- +{ + "oai.provider.headers": { + "rp-aigw-id": "GATEWAY_ID" + } +} +---- ++ +For VS Code custom models without extension support, you may need to use Option 2 (OAI Compatible Provider extension). + +. **Using direct GitHub connection** ++ +If the endpoint configuration is missing or incorrect, Copilot may route directly to GitHub instead of your gateway. Verify endpoint configuration. + +. **Log ingestion delay** ++ +Gateway logs can take 5-10 seconds to appear in the dashboard. Wait briefly and refresh. + +. **Environment variable not set** ++ +If using environment variables like `${env:REDPANDA_API_KEY}`, verify they're set before launching the IDE: ++ +[,bash] +---- +echo $REDPANDA_API_KEY # Should print your API key +---- + +=== High latency or slow suggestions + +**Symptom**: Code completion is slow or chat responses take a long time. + +**Causes and solutions**: + +. **Gateway geographic distance** ++ +If your gateway is in a different region than you or the upstream provider, this adds network latency. Check gateway region in the Redpanda Cloud Console. + +. **Slow model for completion** ++ +Use a faster model for code completion: ++ +[,json] +---- +{ + "oai.provider.models": [ + { + "id": "openai/gpt-4o-mini", + "type": "completion" + } + ] +} +---- ++ +Models like GPT-4o Mini or Claude Haiku provide faster responses ideal for code completion. + +. **Provider pool failover** ++ +If your gateway is configured with fallback providers, check the logs to see if requests are failing over. Failover adds latency. + +. **Rate limiting** ++ +If you're hitting rate limits, the gateway may be queuing requests. Check the observability dashboard for rate limit metrics. + +. **Token limit too high** ++ +Reduce `maxTokens` for completion models to improve speed: ++ +[,json] +---- +{ + "oai.provider.models": [ + { + "id": "openai/gpt-4o-mini", + "type": "completion", + "maxTokens": 256 + } + ] +} +---- + +=== Custom headers not being sent + +**Symptom**: Custom headers (like `x-team` or `x-project`) don't appear in gateway logs. + +**Causes and solutions**: + +. **Extension not installed (VS Code)** ++ +Custom headers require the OAI Compatible Provider extension in VS Code. Install it from the Extensions marketplace. + +. **Header configuration location** ++ +Ensure headers are in the correct configuration section: ++ +[,json] +---- +{ + "oai.provider.headers": { + "rp-aigw-id": "GATEWAY_ID", + "x-custom": "value" + } +} +---- ++ +Not: ++ +[,json] +---- +{ + "github.copilot.advanced": { + "headers": { // Wrong location + "x-custom": "value" + } + } +} +---- + +. **Environment variable not expanded** ++ +If using `${env:VAR_NAME}` syntax, verify the environment variable is set before launching the IDE. + +=== Model not recognized + +**Symptom**: Error message "Model not found" or "Invalid model ID". + +**Causes and solutions**: + +. **Incorrect model format** ++ +Ensure model names use the `vendor/model_id` format: ++ +[,text] +---- +# Correct +anthropic/claude-sonnet-4-5 +openai/gpt-4o + +# Incorrect +claude-sonnet-4-5 +gpt-4o +---- + +. **Model not enabled in gateway** ++ +Verify the model is enabled in your AI Gateway configuration: ++ +.. Open Redpanda Cloud Console +.. Navigate to your gateway +.. Check enabled providers and models + +. **Typo in model ID** ++ +Double-check the model ID matches exactly (case-sensitive). Copy from the AI Gateway UI rather than typing manually. + +=== Configuration changes not taking effect + +**Symptom**: Changes to settings don't apply. + +**Solutions**: + +. **Reload IDE** ++ +Configuration changes require reloading: ++ +* VS Code: Command Palette > "Developer: Reload Window" +* JetBrains: File > Invalidate Caches / Restart + +. **Invalid JSON syntax** ++ +Validate your `settings.json` file: ++ +[,bash] +---- +python3 -m json.tool ~/.config/Code/User/settings.json +---- ++ +Fix any syntax errors reported. + +. **Workspace settings override** ++ +Check if `.vscode/settings.json` in your project root overrides global settings. Workspace settings take precedence over global settings. + +. **File permissions** ++ +Verify the IDE can read the configuration file: ++ +[,bash] +---- +ls -la ~/.config/Code/User/settings.json +---- ++ +Fix permissions if needed: ++ +[,bash] +---- +chmod 600 ~/.config/Code/User/settings.json +---- + +== Cost optimization tips + +=== Use different models for chat and completion + +Code completion needs speed, while chat benefits from reasoning depth: + +[,json] +---- +{ + "oai.provider.models": [ + { + "id": "anthropic/claude-sonnet-4-5", + "type": "chat" + }, + { + "id": "openai/gpt-4o-mini", + "type": "completion" + } + ] +} +---- + +This can reduce costs by 5-10x for code completion while maintaining chat quality. + +=== Limit token usage + +Reduce maximum tokens for completion to prevent runaway costs: + +[,json] +---- +{ + "oai.provider.models": [ + { + "id": "openai/gpt-4o-mini", + "type": "completion", + "maxTokens": 256 + } + ] +} +---- + +Code completion rarely needs more than 256 tokens. + +=== Monitor usage patterns + +Use the AI Gateway dashboard to identify optimization opportunities: + +. Navigate to your gateway's observability dashboard +. Filter by custom headers (for example, `x-team`, `x-user-email`) +. Analyze: +** Token usage per developer or team +** Most expensive queries +** High-frequency low-value requests + +=== Set team-based budgets + +Use separate gateways or CEL routing to enforce team budgets: + +[,cel] +---- +// Limit team to 1 million tokens per month +request.headers["x-team"] == "frontend" && +monthly_tokens > 1000000 + ? error("Team budget exceeded") + : request.model +---- + +Configure alerts in the dashboard when teams approach their limits. + +=== Track costs per project + +Use custom headers to attribute costs: + +[,json] +---- +{ + "oai.provider.headers": { + "x-project": "mobile-app" + } +} +---- + +Generate project-specific cost reports from the gateway dashboard. + +== Next steps + +* xref:ai-agents:ai-gateway/cel-routing-cookbook.adoc[]: Use CEL expressions to route Copilot requests based on context +* xref:ai-agents:ai-gateway/mcp-aggregation-guide.adoc[]: Learn about MCP tool integration (if using Copilot Workspace) + +== Related pages + +* xref:ai-agents:ai-gateway/gateway-quickstart.adoc[]: Create and configure your AI Gateway +* xref:ai-agents:ai-gateway/gateway-architecture.adoc[]: Learn about AI Gateway architecture and benefits +* xref:ai-agents:ai-gateway/integrations/claude-code-user.adoc[]: Configure Claude Code with AI Gateway +* xref:ai-agents:ai-gateway/integrations/continue-user.adoc[]: Configure Continue.dev with AI Gateway +* xref:ai-agents:ai-gateway/integrations/cursor-user.adoc[]: Configure Cursor IDE with AI Gateway diff --git a/modules/ai-agents/pages/ai-gateway/integrations/index.adoc b/modules/ai-agents/pages/ai-gateway/integrations/index.adoc new file mode 100644 index 000000000..bf8c6966c --- /dev/null +++ b/modules/ai-agents/pages/ai-gateway/integrations/index.adoc @@ -0,0 +1,5 @@ += AI Gateway Integrations +:description: Configure AI development tools and IDEs to connect to Redpanda AI Gateway for centralized LLM routing and MCP tool aggregation. +:page-layout: index + +include::ai-agents:partial$ai-gateway-byoc-note.adoc[] diff --git a/modules/ai-agents/pages/ai-gateway/mcp-aggregation-guide.adoc b/modules/ai-agents/pages/ai-gateway/mcp-aggregation-guide.adoc new file mode 100644 index 000000000..68ba38cb0 --- /dev/null +++ b/modules/ai-agents/pages/ai-gateway/mcp-aggregation-guide.adoc @@ -0,0 +1,1026 @@ += MCP Aggregation and Orchestration Guide +:description: Guide to MCP aggregation and orchestration in Redpanda AI Gateway, including architecture, deferred tool loading, orchestrator workflows, administration, observability, security, and integration examples. +:page-topic-type: guide +:personas: app_developer, platform_admin +:learning-objective-1: Configure MCP aggregation with deferred tool loading to reduce token costs +:learning-objective-2: Write orchestrator workflows to reduce multi-step interactions +:learning-objective-3: Manage approved MCP servers with security controls and audit trails + +include::ai-agents:partial$ai-gateway-byoc-note.adoc[] + +AI Gateway provides MCP (Model Context Protocol) aggregation, allowing AI agents to access tools from multiple MCP servers through a single unified endpoint. This eliminates the need for agents to manage multiple MCP connections and significantly reduces token costs through deferred tool loading. + +MCP aggregation benefits: + +* Single endpoint: One MCP endpoint aggregates all approved MCP servers +* Token reduction: Often 80-90% fewer tokens through deferred tool loading (depending on configuration) +* Centralized governance: Admin-approved MCP servers only +* Orchestration: JavaScript-based orchestrator reduces multi-step round trips +* Security: Controlled tool execution environment + +== What is MCP? + +*Model Context Protocol (MCP)* is a standard for exposing tools (functions) that AI agents can discover and invoke. MCP servers provide tools like: + +* Database queries +* File system operations +* API integrations (CRM, payment, analytics) +* Search (web, vector, enterprise) +* Code execution +* Workflow automation + +[cols="1,1"] +|=== +| Without AI Gateway | With AI Gateway + +| Agent connects to each MCP server individually +| Agent connects to gateway's unified `/mcp` endpoint + +| Agent loads ALL tools from ALL servers upfront (high token cost) +| Gateway aggregates tools from approved MCP servers + +| No centralized governance or security +| Deferred loading: Only search + orchestrator tools sent initially + +| Complex configuration +| Agent queries for specific tools when needed (token savings) + +| +| Centralized governance and observability +|=== + +== Architecture + +[source,text] +---- +┌─────────────────┐ +│ AI Agent │ +│ (Claude, GPT) │ +└────────┬────────┘ + │ + │ 1. Discover tools via /mcp endpoint + │ 2. Invoke specific tool + │ +┌────────▼────────────────────────────────┐ +│ AI Gateway (MCP Aggregator) │ +│ │ +│ ┌─────────────────────────────────┐ │ +│ │ Deferred Tool Loading │ │ +│ │ (Send search + orchestrator │ │ +│ │ initially, defer others) │ │ +│ └─────────────────────────────────┘ │ +│ │ +│ ┌─────────────────────────────────┐ │ +│ │ Orchestrator (JavaScript) │ │ +│ │ (Reduce round trips for │ │ +│ │ multi-step workflows) │ │ +│ └─────────────────────────────────┘ │ +│ │ +│ ┌─────────────────────────────────┐ │ +│ │ Approved MCP Server Registry │ │ +│ │ (Admin-controlled) │ │ +│ └─────────────────────────────────┘ │ +└────────┬────────────────────────────────┘ + │ + │ Routes to appropriate MCP server + │ + ┌────▼─────┬──────────┬─────────┐ + │ │ │ │ +┌───▼────┐ ┌──▼─────┐ ┌──▼──────┐ ┌▼──────┐ +│ MCP │ │ MCP │ │ MCP │ │ MCP │ +│Database│ │Filesystem│ │ Slack │ │Search │ +│Server │ │ Server │ │ Server │ │Server │ +└────────┘ └────────┘ └─────────┘ └───────┘ +---- + + +== MCP request lifecycle + +=== Tool discovery (initial connection) + +Agent request: + +[source,http] +---- +GET /mcp/tools +Headers: + Authorization: Bearer {TOKEN} + rp-aigw-id: {GATEWAY_ID} + rp-aigw-mcp-deferred: true # Enable deferred loading +---- + + +Gateway response (with deferred loading): + +[source,json] +---- +{ + "tools": [ + { + "name": "search_tools", + "description": "Query available tools by keyword or category", + "input_schema": { + "type": "object", + "properties": { + "query": {"type": "string"}, + "category": {"type": "string"} + } + } + }, + { + "name": "orchestrator", + "description": "Execute multi-step workflows with JavaScript logic", + "input_schema": { + "type": "object", + "properties": { + "workflow": {"type": "string"}, + "context": {"type": "object"} + } + } + } + ] +} +---- + + +Note: Only 2 tools returned initially (search + orchestrator), not all 50+ tools from all MCP servers. + +Token savings: + +* Without deferred loading: ~5,000-10,000 tokens (all tool definitions) +* With deferred loading: ~500-1,000 tokens (2 tool definitions) +* Typically 80-90% reduction + +=== Tool query (when agent needs specific tool) + +Agent request: + +[source,http] +---- +POST /mcp/tools/search_tools +Headers: + Authorization: Bearer {TOKEN} + rp-aigw-id: {GATEWAY_ID} +Body: +{ + "query": "database query" +} +---- + + +Gateway response: + +[source,json] +---- +{ + "tools": [ + { + "name": "execute_sql", + "description": "Execute SQL query against the database", + "mcp_server": "database-server", + "input_schema": { + "type": "object", + "properties": { + "query": {"type": "string"}, + "database": {"type": "string"} + }, + "required": ["query"] + } + }, + { + "name": "list_tables", + "description": "List all tables in the database", + "mcp_server": "database-server", + "input_schema": { + "type": "object", + "properties": { + "database": {"type": "string"} + } + } + } + ] +} +---- + + +Agent receives only relevant tools based on query. + +=== Tool execution + +Agent request: + +[source,http] +---- +POST /mcp/tools/execute_sql +Headers: + Authorization: Bearer {TOKEN} + rp-aigw-id: {GATEWAY_ID} +Body: +{ + "query": "SELECT * FROM users WHERE tier = 'premium' LIMIT 10", + "database": "prod" +} +---- + + +Gateway: + +1. Routes to appropriate MCP server (database-server) +2. Executes tool +3. Returns result + +Gateway response: + +[source,json] +---- +{ + "result": [ + {"id": 1, "name": "Alice", "tier": "premium"}, + {"id": 2, "name": "Bob", "tier": "premium"}, + ... + ] +} +---- + + +Agent receives result and can continue reasoning. + +== Deferred tool loading + +=== How it works + +Traditional MCP (No deferred loading): + +1. Agent connects to MCP endpoint +2. Gateway sends ALL tools from ALL MCP servers (50+ tools) +3. Agent includes ALL tool definitions in EVERY LLM request +4. High token cost: ~5,000-10,000 tokens per request + +Deferred loading (AI Gateway): + +1. Agent connects to MCP endpoint with `rp-aigw-mcp-deferred: true` header +2. Gateway sends only 2 tools: `search_tools` + `orchestrator` +3. Agent includes only 2 tool definitions in LLM request (~500-1,000 tokens) +4. When agent needs specific tool: + * Agent calls `search_tools` with query (e.g., "database") + * Gateway returns matching tools + * Agent calls specific tool (e.g., `execute_sql`) +5. Total token cost: Initial 500-1,000 + per-query ~200-500 + * Often 80-90% lower than loading all tools + +=== When to use deferred loading + +Use deferred loading when: + +* You have 10+ tools across multiple MCP servers +* Agents don't need all tools for every request +* Token costs are a concern +* Agents can handle multi-step workflows (search → execute) + +Don't use deferred loading when: + +* You have <5 tools total (overhead not worth it) +* Agents need all tools for every request (rare) +* Latency is more important than token costs (deferred adds 1 round trip) + +=== Configure deferred loading + +// PLACEHOLDER: Add UI path or configuration method + +Option 1: Enable at gateway level (recommended) + +[source,yaml] +---- +# PLACEHOLDER: Actual configuration format +mcp: + deferred_loading: true # Default for all agents using this gateway +---- + + +Option 2: Enable per-request (agent-controlled) + +[source,python] +---- +# Agent includes header +headers = { + "rp-aigw-id": "gw_abc123", + "rp-aigw-mcp-deferred": "true" # Enable for this request +} +---- + + +=== Measure token savings + +Compare token usage before/after deferred loading: + +1. Check logs without deferred loading: + + * Filter: Gateway = your-gateway, Model = your-model, Date = before enabling + * Average tokens per request: // PLACEHOLDER: measure + +2. Enable deferred loading + +3. Check logs after deferred loading: + + * Filter: Same gateway/model, Date = after enabling + * Average tokens per request: // PLACEHOLDER: measure + +4. Calculate savings: ++ +[source,text] +---- +Savings % = ((Before - After) / Before) × 100 +---- + +Expected Results: Typically 80-90% reduction in average tokens per request + +== Orchestrator: multi-step workflows + +=== What is the orchestrator? + +The *orchestrator* is a special tool that executes JavaScript workflows, reducing multi-step interactions from multiple round trips to a single request. + +Without Orchestrator: + +1. Agent: "Search vector database for relevant docs" → Round trip 1 +2. Agent receives results, evaluates: "Results insufficient" +3. Agent: "Fallback to web search" → Round trip 2 +4. Agent receives results, processes → Round trip 3 +5. *Total: 3 round trips* (high latency, 3× token cost) + +With Orchestrator: + +1. Agent: "Execute workflow: Search vector DB → if insufficient, fallback to web search" +2. Gateway executes entire workflow in JavaScript +3. Agent receives final result → *1 round trip* + +Benefits: + +* *Latency Reduction*: 1 round trip vs 3+ +* *Token Reduction*: No intermediate LLM calls needed +* *Reliability*: Workflow logic executes deterministically +* *Cost*: Single LLM call instead of multiple + +=== When to use orchestrator + +Use orchestrator when: + +* Multi-step workflows with conditional logic (if/else) +* Fallback patterns (try A, if fails, try B) +* Sequential tool calls with dependencies +* Loop-based operations (iterate, aggregate) + +Don't use orchestrator when: + +* Single tool call (no benefit) +* Agent needs to reason between steps (orchestrator is deterministic) +* Workflow requires LLM judgment at each step + +=== Orchestrator example: search with fallback + +Scenario: Search vector database; if results insufficient, fallback to web search. + +Without Orchestrator (3 round trips): + +[source,python] +---- +# Agent's internal reasoning (3 separate LLM calls) + +# Round trip 1: Search vector DB +vector_results = call_tool("vector_search", {"query": "Redpanda pricing"}) + +# Round trip 2: Agent evaluates results +if len(vector_results) < 3: + # Round trip 3: Fallback to web search + web_results = call_tool("web_search", {"query": "Redpanda pricing"}) + results = web_results +else: + results = vector_results + +# Agent processes final results +---- + + +With Orchestrator (1 round trip): + +[source,python] +---- +# Agent invokes orchestrator once +results = call_tool("orchestrator", { + "workflow": """ + // JavaScript workflow + const vectorResults = await tools.vector_search({ + query: context.query + }); + + if (vectorResults.length < 3) { + // Fallback to web search + const webResults = await tools.web_search({ + query: context.query + }); + return webResults; + } + + return vectorResults; + """, + "context": { + "query": "Redpanda pricing" + } +}) + +# Agent receives final results directly +---- + + +Savings: + +* Latency: ~3-5 seconds (3 round trips) → ~1-2 seconds (1 round trip) +* Tokens: ~1,500 tokens (3 LLM calls) → ~500 tokens (1 LLM call) +* Cost: ~$0.0075 → ~$0.0025 (67% reduction) + +=== Orchestrator API + +// PLACEHOLDER: Confirm orchestrator API details + +Tool name: `orchestrator` + +Input schema: + +[source,json] +---- +{ + "workflow": "string (JavaScript code)", + "context": "object (variables available to workflow)" +} +---- + + +Available in workflow: + +* `tools.{tool_name}(params)`: Call any tool from approved MCP servers +* `context.{variable}`: Access context variables +* Standard JavaScript: `if`, `for`, `while`, `try/catch`, `async/await` + +Security: + +* Sandboxed execution (no file system, network, or system access) +* Timeout: // PLACEHOLDER: e.g., 30 seconds +* Memory limit: // PLACEHOLDER: e.g., 128MB + +Limitations: + +* Cannot call external APIs directly (must use MCP tools) +* Cannot import npm packages (built-in JS only) +* // PLACEHOLDER: Other limitations? + +=== Orchestrator example: data aggregation + +Scenario: Fetch user data from database, calculate summary statistics. + +[source,python] +---- +results = call_tool("orchestrator", { + "workflow": """ + // Fetch all premium users + const users = await tools.execute_sql({ + query: "SELECT * FROM users WHERE tier = 'premium'", + database: "prod" + }); + + // Calculate statistics + const stats = { + total: users.length, + by_region: {}, + avg_spend: 0 + }; + + let totalSpend = 0; + for (const user of users) { + // Count by region + if (!stats.by_region[user.region]) { + stats.by_region[user.region] = 0; + } + stats.by_region[user.region]++; + + // Sum spend + totalSpend += user.monthly_spend; + } + + stats.avg_spend = totalSpend / users.length; + + return stats; + """, + "context": {} +}) +---- + + +Output: + +[source,json] +---- +{ + "total": 1250, + "by_region": { + "us-east": 600, + "us-west": 400, + "eu": 250 + }, + "avg_spend": 149.50 +} +---- + + +vs Without Orchestrator: + +* Would require fetching all users to agent → agent processes → 2 round trips +* Orchestrator: All processing in gateway → 1 round trip + +=== Orchestrator best practices + +DO: + +* Use for deterministic workflows (same input → same output) +* Use for sequential operations with dependencies +* Use for fallback patterns +* Handle errors with `try/catch` +* Keep workflows readable (add comments) + +DON'T: + +* Use for workflows requiring LLM reasoning at each step (let agent handle that) +* Execute long-running operations (timeout will hit) +* Access external resources (use MCP tools instead) +* Execute untrusted user input (security risk) + +== MCP server administration + +=== Add MCP servers + +// PLACEHOLDER: Add UI path for MCP server management + +Prerequisites: + +* MCP server URL +* Authentication method (if required) +* List of tools to enable + +Steps: + +1. Navigate to MCP servers: + + * Console → AI Gateway → MCP Servers → Add Server + +2. Configure server: ++ +[source,yaml] +---- +# PLACEHOLDER: Actual configuration format +name: database-server +url: https://mcp-database.example.com +authentication: + type: bearer_token + token: ${SECRET_REF} # Reference to secret +enabled_tools: + * execute_sql + * list_tables + * describe_table +---- + +3. Test connection: + + * Gateway attempts connection to MCP server + * Verifies authentication + * Retrieves tool list + +4. Enable server: + + * Server status: Active + * Tools available to agents + +Common MCP servers: + +* Database: PostgreSQL, MySQL, MongoDB query tools +* Filesystem: Read/write/search files +* API Integrations: Slack, GitHub, Salesforce, Stripe +* Search: Web search, vector search, enterprise search +* Code Execution: Python, JavaScript sandboxes +* Workflow: Zapier, n8n integrations + +=== MCP server approval workflow + +Why approval is required: + +* Security: Prevent agents from accessing unauthorized systems +* Governance: Control which tools are available +* Cost: Some tools are expensive (API calls, compute) +* Compliance: Audit trail of approved tools + +Approval process: + +// PLACEHOLDER: Confirm if there's an approval workflow or if admins directly enable servers + +1. Request: User/team requests MCP server +2. Review: Admin reviews security, cost, necessity +3. Approval/Rejection: Admin decision +4. Configuration: If approved, admin adds server to gateway + +Rejected server behavior: + +* Server not listed in tool discovery +* Agent cannot query or invoke tools from this server +* Requests return `403 Forbidden` + +=== Restrict MCP server access + +Per-gateway restrictions: + +[source,yaml] +---- +# PLACEHOLDER: Actual configuration format +gateways: + - name: production-gateway + mcp_servers: + allowed: + - database-server # Only this server allowed + denied: + - filesystem-server # Explicitly denied + + - name: staging-gateway + mcp_servers: + allowed: + - "*" # All approved servers allowed +---- + + +Use cases: + +* Production gateway: Only production-safe tools +* Staging gateway: All tools for testing +* Customer-specific gateway: Only tools relevant to customer + +=== MCP server versioning + +// PLACEHOLDER: How is MCP server versioning handled? + +Challenge: MCP server updates may change tool schemas + +Recommendations: + +1. Pin versions (if supported): ++ +[source,yaml] +---- +mcp_servers: + * name: database-server + version: "1.2.3" # Pin to specific version +---- + +2. Test in staging first: + + * Update MCP server in staging gateway + * Test agent workflows + * Promote to production when validated + +3. Monitor breaking changes: + + * Subscribe to MCP server changelogs + * Set up alerts for schema changes + +== MCP observability + +=== Logs + +MCP tool invocations appear in request logs with: + +* Tool name +* MCP server +* Input parameters +* Output result +* Execution time +* Errors (if any) + +Filter logs by MCP: + +[source,text] +---- +Filter: request.path.startsWith("/mcp") +---- + + +Common log fields: + +[cols="1,2,2"] +|=== +| Field | Description | Example + +| Tool +| Tool invoked +| `execute_sql` + +| MCP Server +| Which server handled it +| `database-server` + +| Input +| Parameters sent +| `{"query": "SELECT ..."}` + +| Output +| Result returned +| `[{"id": 1, ...}]` + +| Latency +| Tool execution time +| `250ms` + +| Status +| Success/failure +| `200`, `500` +|=== + +=== Metrics + +// PLACEHOLDER: Confirm if MCP-specific metrics exist + +MCP-specific metrics (if available): + +* MCP requests per second +* Tool invocation count (by tool, by MCP server) +* MCP latency (p50, p95, p99) +* MCP error rate (by server, by tool) +* Orchestrator execution count +* Orchestrator execution time + +Dashboard: MCP Analytics + +* Top tools by usage +* Top MCP servers by latency +* Error rate by MCP server +* Token savings from deferred loading + +=== Debug MCP issues + +Issue: "Tool not found" + +Possible causes: + +1. MCP server not added to gateway +2. Tool not enabled in MCP server configuration +3. Deferred loading enabled but agent didn't query for tool first + +Solution: + +1. Verify MCP server is active: // PLACEHOLDER: UI path +2. Verify tool is in enabled_tools list +3. If deferred loading: Agent must call `search_tools` first + +Issue: "MCP server timeout" + +Possible causes: + +1. MCP server is down/unreachable +2. Tool execution is slow (e.g., expensive database query) +3. Gateway timeout too short + +Solution: + +1. Check MCP server health +2. Optimize tool (e.g., add database index) +3. Increase timeout: // PLACEHOLDER: How to configure? + +Issue: "Orchestrator workflow failed" + +Possible causes: + +1. JavaScript syntax error +2. Tool invocation failed inside workflow +3. Timeout exceeded +4. Memory limit exceeded + +Solution: + +1. Test workflow syntax in JavaScript playground +2. Check logs for tool error inside orchestrator +3. Simplify workflow or increase timeout +4. Reduce data processing in workflow + +== Security considerations + +=== Tool execution sandboxing + +// PLACEHOLDER: Confirm sandboxing implementation + +Orchestrator sandbox: + +* No file system access +* No network access (except via MCP tools) +* No system calls +* Memory limit: // PLACEHOLDER: e.g., 128MB +* Execution timeout: // PLACEHOLDER: e.g., 30s + +MCP tool execution: + +* Tools execute in MCP server's environment (not gateway) +* Gateway does not execute tool code (only proxies requests) +* Security is MCP server's responsibility + +=== Authentication + +Gateway → MCP server: + +* Bearer token (most common) +* API key +* mTLS (for high-security environments) + +Agent → Gateway: + +* Standard gateway authentication (Redpanda Cloud token) +* `rp-aigw-id` header identifies gateway (and its approved MCP servers) + +=== Audit trail + +All MCP operations logged: + +* Who (agent/user) invoked tool +* When (timestamp) +* What tool was invoked +* What parameters were sent +* What result was returned +* Whether it succeeded or failed + +Use case: Compliance, security investigation, debugging + +=== Restrict dangerous tools + +Recommendation: Don't enable destructive tools in production gateways + +Examples of dangerous tools*: + +* File deletion (`delete_file`) +* Database writes without safeguards (`execute_sql` with UPDATE/DELETE) +* Payment operations (`charge_customer`) +* System commands (`execute_bash`) + +Best practice: + +* Read-only tools in production gateway +* Write tools only in staging gateway (with approval workflows) +* Wrap dangerous operations in MCP server with safeguards (e.g., "require confirmation token") + +== MCP + LLM routing + +=== Combine MCP with CEL routing + +Use case: Route agents to different MCP servers based on customer tier + +CEL expression: + +[source,cel] +---- +request.headers["x-customer-tier"] == "enterprise" + ? "gateway-with-premium-mcp-servers" + : "gateway-with-basic-mcp-servers" +---- + + +Result: + +* Enterprise customers: Access to proprietary data, expensive APIs +* Basic customers: Access to public data, free APIs + +=== MCP with provider pools + +Scenario: Different agents use different models + different tools + +Configuration: + +* Gateway A: GPT-4o + database + CRM MCP servers +* Gateway B: Claude Sonnet + web search + analytics MCP servers + +Use case: Optimize model-tool pairing (some models better at certain tools) + +== Integration examples + +[tabs] +==== +Python (OpenAI SDK):: ++ +-- +[source,python] +---- +from openai import OpenAI + +# Initialize client with MCP endpoint +client = OpenAI( + base_url="https://{CLUSTER_ID}.cloud.redpanda.com/ai-gateway/v1", + api_key=os.getenv("REDPANDA_CLOUD_TOKEN"), + default_headers={ + "rp-aigw-id": os.getenv("GATEWAY_ID"), + "rp-aigw-mcp-deferred": "true" # Enable deferred loading + } +) + +# Discover tools +tools_response = requests.get( + "https://{CLUSTER_ID}.cloud.redpanda.com/ai-gateway/mcp/tools", + headers={ + "Authorization": f"Bearer {os.getenv('REDPANDA_CLOUD_TOKEN')}", + "rp-aigw-id": os.getenv("GATEWAY_ID"), + "rp-aigw-mcp-deferred": "true" + } +) +tools = tools_response.json()["tools"] + +# Agent uses tools +response = client.chat.completions.create( + model="anthropic/claude-sonnet-3.5", + messages=[ + {"role": "user", "content": "Query the database for premium users"} + ], + tools=tools, # Pass MCP tools to agent + tool_choice="auto" +) + +# Handle tool calls +if response.choices[0].message.tool_calls: + for tool_call in response.choices[0].message.tool_calls: + # Execute tool via gateway + tool_result = requests.post( + f"https://{CLUSTER_ID}.cloud.redpanda.com/ai-gateway/mcp/tools/{tool_call.function.name}", + headers={ + "Authorization": f"Bearer {os.getenv('REDPANDA_CLOUD_TOKEN')}", + "rp-aigw-id": os.getenv("GATEWAY_ID") + }, + json=json.loads(tool_call.function.arguments) + ) + + # Continue conversation with tool result + response = client.chat.completions.create( + model="anthropic/claude-sonnet-3.5", + messages=[ + {"role": "user", "content": "Query the database for premium users"}, + response.choices[0].message, + { + "role": "tool", + "tool_call_id": tool_call.id, + "content": json.dumps(tool_result.json()) + } + ] + ) +---- +-- + +Claude Code CLI:: ++ +-- +[source,bash] +---- +# Configure gateway with MCP +export CLAUDE_API_BASE="https://{CLUSTER_ID}.cloud.redpanda.com/ai-gateway/v1" +export ANTHROPIC_API_KEY="your-redpanda-token" + +# Claude Code automatically discovers MCP tools from gateway +claude code + +# Agent can now use aggregated MCP tools +---- +-- + +LangChain:: ++ +-- +[source,python] +---- +from langchain_openai import ChatOpenAI +from langchain.agents import initialize_agent, Tool + +# Initialize LLM with gateway +llm = ChatOpenAI( + base_url="https://{CLUSTER_ID}.cloud.redpanda.com/ai-gateway/v1", + api_key=os.getenv("REDPANDA_CLOUD_TOKEN"), + default_headers={ + "rp-aigw-id": os.getenv("GATEWAY_ID") + } +) + +# Fetch MCP tools from gateway +# PLACEHOLDER: LangChain-specific integration code + +# Create agent with MCP tools +agent = initialize_agent( + tools=mcp_tools, + llm=llm, + agent="openai-tools", + verbose=True +) + +# Agent can now use MCP tools +response = agent.run("Find all premium users in the database") +---- +-- +==== + + +== Next steps + diff --git a/modules/ai-agents/pages/ai-gateway/what-is-ai-gateway.adoc b/modules/ai-agents/pages/ai-gateway/what-is-ai-gateway.adoc new file mode 100644 index 000000000..6af235d8a --- /dev/null +++ b/modules/ai-agents/pages/ai-gateway/what-is-ai-gateway.adoc @@ -0,0 +1,182 @@ += What is an AI Gateway? +:description: Understand what an AI Gateway is, the problems it solves, and how it benefits your AI infrastructure. +:page-topic-type: concept +:personas: app_developer, platform_admin +:learning-objective-1: Describe how AI Gateway centralizes LLM provider management and reduces operational complexity +:learning-objective-2: Identify key features that address common LLM integration challenges +:learning-objective-3: Determine whether AI Gateway fits your use case based on traffic volume and provider diversity + +include::ai-agents:partial$ai-gateway-byoc-note.adoc[] + +Redpanda AI Gateway is a unified access layer for LLM providers and AI tools that sits between your applications and the AI services they use. It provides centralized routing, policy enforcement, cost management, and observability for all your AI traffic. + +== The problem + +Modern AI applications face four critical challenges that increase costs, reduce reliability, and slow down development. + +First, applications typically hardcode provider-specific SDKs. An application using OpenAI's SDK cannot easily switch to Anthropic or Google without code changes and redeployment. This tight coupling makes testing across providers time-consuming and error-prone, and means provider outages directly impact your application availability. + +Second, costs can spiral without visibility into usage patterns. Without a centralized view of token consumption across teams and applications, it's difficult to attribute costs to specific customers, features, or environments. Testing and debugging can generate unexpected bills, and there's no way to enforce budgets or rate limits per team or customer. + +Third, AI agents that use MCP (Model Context Protocol) servers face tool coordination challenges. Managing tool discovery and execution is repetitive across projects, and agents typically load all available tools upfront, which creates high token costs. There's also no centralized governance over which tools agents can access. + +Finally, observability is fragmented across provider dashboards. You cannot reconstruct user sessions that span multiple models, compare latency and costs across providers in a unified view, or efficiently debug issues. Troubleshooting "the AI gave the wrong answer" requires manual log diving across different systems. + +== What AI Gateway solves + +Redpanda AI Gateway addresses these challenges through four core capabilities: + +=== Unified LLM access (single endpoint for all providers) + +AI Gateway provides a single OpenAI-compatible endpoint that routes requests to multiple LLM providers. Instead of integrating with each provider's SDK separately, you configure your application once and switch providers by changing only the model parameter. + +Without AI Gateway, you need different SDKs and patterns for each provider: + +[source,python] +---- +# OpenAI +from openai import OpenAI +client = OpenAI(api_key="sk-...") +response = client.chat.completions.create( + model="gpt-4o", + messages=[{"role": "user", "content": "Hello"}] +) + +# Anthropic (different SDK, different patterns) +from anthropic import Anthropic +client = Anthropic(api_key="sk-ant-...") +response = client.messages.create( + model="claude-sonnet-3.5", + max_tokens=1024, + messages=[{"role": "user", "content": "Hello"}] +) +---- + +With AI Gateway, you use the OpenAI SDK for all providers: + +[source,python] +---- +from openai import OpenAI + +# Single configuration, multiple providers +client = OpenAI( + base_url="https://{GATEWAY_ENDPOINT}", + api_key="your-redpanda-token", + default_headers={"rp-aigw-id": "{GATEWAY_ID}"} +) + +# Route to OpenAI +response = client.chat.completions.create( + model="openai/gpt-4o", + messages=[{"role": "user", "content": "Hello"}] +) + +# Route to Anthropic (same code, different model string) +response = client.chat.completions.create( + model="anthropic/claude-sonnet-3.5", + messages=[{"role": "user", "content": "Hello"}] +) +---- + +To switch providers, you change only the `model` parameter from `openai/gpt-4o` to `anthropic/claude-sonnet-3.5`. No code changes or redeployment needed. + +=== Policy-based routing and cost control + +AI Gateway lets you define routing rules, rate limits, and budgets once, then enforces them automatically for all requests. + +You can route requests to different models based on user attributes. For example, to direct premium users to a more capable model while routing free tier users to a cost-effective option, use a CEL expression: + +[source,cel] +---- +// Route premium users to best model, free users to cost-effective model +request.headers["x-user-tier"] == "premium" + ? "anthropic/claude-opus-4" + : "anthropic/claude-sonnet-3.5" +---- + +You can also set different rate limits and spend limits per environment to prevent staging or development traffic from consuming production budgets. + +For reliability, you can configure provider pools with automatic failover. If you configure OpenAI GPT-4 as your primary model and Anthropic Claude Opus as the fallback, the gateway automatically routes requests to the fallback when it detects rate limits or timeouts from the primary provider. This configuration can significantly improve uptime (potentially up to 99.9% in some configurations) even during provider outages. + +=== MCP aggregation and orchestration + +AI Gateway aggregates multiple MCP (Model Context Protocol) servers and provides deferred tool loading, which dramatically reduces token costs for AI agents. + +Without AI Gateway, agents typically load all available tools from multiple MCP servers at startup. This approach sends 50+ tool definitions with every request, creating high token costs (thousands of tokens per request), slow agent startup times, and no centralized governance over which tools agents can access. + +With AI Gateway, you configure approved MCP servers once, and the gateway loads only search and orchestrator tools initially. Agents query for specific tools only when needed, which often reduces token usage by 80-90% depending on your configuration and the number of tools aggregated. You also gain centralized approval and governance over which MCP servers your agents can access. + +For complex workflows, AI Gateway provides a JavaScript-based orchestrator tool that reduces multi-step workflows from multiple round trips to a single call. For example, you can create a workflow that searches a vector database and, if the results are insufficient, falls back to web search—all in one orchestration step. + +=== Unified observability and cost tracking + +AI Gateway provides a single dashboard that tracks all LLM traffic across providers, eliminating the need to switch between multiple provider dashboards. + +The dashboard tracks request volume per gateway, model, and provider, along with token usage for both prompt and completion tokens. You can view estimated spend per model with cross-provider comparisons, latency metrics (p50, p95, p99), and errors broken down by type, provider, and model. + +This unified view helps you answer critical questions such as which model is the most cost-effective for your use case, why a specific user request failed, how much your staging environment costs per week, and what the latency difference is between providers for your workload. + +ifdef::show-gateway-patterns[] +== Common gateway patterns + +=== Team isolation + +When multiple teams share infrastructure but need separate budgets and policies, create one gateway per team. For example, you might configure Team A's gateway with a $5K/month budget for both staging and production environments, while Team B's gateway has a $10K/month budget with different rate limits. Each team sees only their own traffic in the observability dashboards, providing clear cost attribution and isolation. + +=== Environment separation + +To prevent staging traffic from affecting production metrics, create separate gateways for each environment. Configure the staging gateway with lower rate limits, restricted model access, and aggressive cost controls to prevent runaway expenses. The production gateway can have higher rate limits, access to all models, and alerting configured to detect anomalies. + +=== Primary and fallback for reliability + +To ensure uptime during provider outages, configure provider pools with automatic failover. For example, you can set OpenAI as your primary provider (preferred for quality) and configure Anthropic as the fallback that activates when the gateway detects rate limits or timeouts from OpenAI. Monitor the fallback rate to detect primary provider issues early, before they impact your users. + +=== A/B testing models + +To compare model quality and cost without dual integration, route a percentage of traffic to different models. For example, you can send 80% of traffic to `claude-sonnet-3.5` and 20% to `claude-opus-4`, then compare quality metrics and costs in the observability dashboard before adjusting the split. + +=== Customer-based routing + +For SaaS products with tiered pricing (free, pro, enterprise), use CEL routing based on request headers to match users with appropriate models: + +[source,cel] +---- +request.headers["x-customer-tier"] == "enterprise" ? "anthropic/claude-opus-4" : +request.headers["x-customer-tier"] == "pro" ? "anthropic/claude-sonnet-3.5" : +"anthropic/claude-haiku" +---- + +endif::[] + +== When to use AI Gateway + +AI Gateway is ideal for organizations that: + +* Use or plan to use multiple LLM providers +* Need centralized cost tracking and budgeting +* Want to experiment with different models without code changes +* Require high availability during provider outages +* Have multiple teams or customers using AI services +* Build AI agents that need MCP tool aggregation +* Need unified observability across all AI traffic + +AI Gateway may not be necessary if: + +* You only use a single provider with simple requirements +* You have minimal AI traffic (< 1000 requests/day) +* You don't need cost tracking or policy enforcement +* Your application doesn't require provider switching + +== Next steps + +Now that you understand what AI Gateway is and how it can benefit your organization: + +*For Administrators:* + +* xref:ai-gateway/admin/setup-guide.adoc[Setup Guide] - Enable providers, models, and create gateways +* xref:ai-gateway/gateway-architecture.adoc[Architecture Deep Dive] - Technical architecture details + +*For Builders:* + +* xref:ai-gateway/builders/discover-gateways.adoc[Discover Available Gateways] - Find which gateways you can access +* xref:ai-gateway/builders/connect-your-agent.adoc[Connect Your Agent] - Integrate your application diff --git a/modules/ai-agents/pages/index.adoc b/modules/ai-agents/pages/index.adoc index 9ac867a96..591ad65cc 100644 --- a/modules/ai-agents/pages/index.adoc +++ b/modules/ai-agents/pages/index.adoc @@ -1,8 +1,4 @@ -= AI Agents in Redpanda Cloud -:description: Learn about AI agents and the tools Redpanda Cloud provides for building them. += Agentic AI +:description: Learn about the Redpanda Agentic Data Plane, including the AI Gateway, AI agents, and MCP servers. :page-layout: index :page-aliases: develop:agents/about.adoc, develop:ai-agents/about.adoc - -AI agents are configurable assistants that autonomously perform specialist tasks by leveraging large language models (LLMs) and connecting to external data sources and tools. - -Redpanda Cloud provides two complementary Model Context Protocol (MCP) options to help you build AI agents. diff --git a/modules/ai-agents/pages/mcp/local/configuration.adoc b/modules/ai-agents/pages/mcp/local/configuration.adoc index 134c50b0c..f2170a11b 100644 --- a/modules/ai-agents/pages/mcp/local/configuration.adoc +++ b/modules/ai-agents/pages/mcp/local/configuration.adoc @@ -2,7 +2,7 @@ :page-beta: true :description: Learn how to configure the Redpanda Cloud Management MCP Server, including auto and manual client setup, enabling deletes, and security considerations. :page-topic-type: how-to -:personas: ai_agent_developer, platform_admin +:personas: agent_developer, platform_admin // Reader journey: "I customize and configure" // Learning objectives - what readers can learn from this page: :learning-objective-1: Configure MCP clients diff --git a/modules/ai-agents/pages/mcp/local/overview.adoc b/modules/ai-agents/pages/mcp/local/overview.adoc index 01bfd6227..6b2643a34 100644 --- a/modules/ai-agents/pages/mcp/local/overview.adoc +++ b/modules/ai-agents/pages/mcp/local/overview.adoc @@ -2,7 +2,7 @@ :page-beta: true :description: Learn about the Redpanda Cloud Management MCP Server, which lets AI agents securely access and operate your Redpanda Cloud account and clusters. :page-topic-type: overview -:personas: evaluator, ai_agent_developer, platform_admin +:personas: evaluator, agent_developer, platform_admin // Reader journey: "I'm new" // Learning objectives - what readers should understand after reading this page: :learning-objective-1: Explain what the Redpanda Cloud Management MCP Server does @@ -66,7 +66,7 @@ MCP servers authenticate to Redpanda Cloud using your personal or service accoun == Next steps -* xref:ai-agents:mcp/local/quickstart.adoc[Redpanda Cloud Management MCP Server quickstart] -* xref:ai-agents:mcp/local/configuration.adoc[Configure the Redpanda Cloud Management MCP Server] +* xref:ai-agents:mcp/local/quickstart.adoc[] +* xref:ai-agents:mcp/local/configuration.adoc[] TIP: The Redpanda documentation site has a read-only MCP server that provides access to Redpanda docs and examples. This server has no access to your Redpanda Cloud account or clusters. See xref:home:ROOT:mcp-setup.adoc[]. diff --git a/modules/ai-agents/pages/mcp/local/quickstart.adoc b/modules/ai-agents/pages/mcp/local/quickstart.adoc index 875d4e5a0..413f6d146 100644 --- a/modules/ai-agents/pages/mcp/local/quickstart.adoc +++ b/modules/ai-agents/pages/mcp/local/quickstart.adoc @@ -2,8 +2,8 @@ :page-beta: true :description: Connect your Claude AI agent to your Redpanda Cloud account and clusters using the Redpanda Cloud Management MCP Server. :page-topic-type: tutorial -:personas: ai_agent_developer, platform_admin -// Reader journey: "I'm new" → first hands-on experience +:personas: agent_developer, platform_admin +// Reader journey: "I'm new" - seeking first hands-on experience // Learning objectives - what readers will achieve by completing this quickstart: :learning-objective-1: Authenticate to Redpanda Cloud with rpk :learning-objective-2: Install the MCP integration for Claude @@ -29,7 +29,7 @@ TIP: For other clients, see xref:ai-agents:mcp/local/configuration.adoc[]. == Set up the MCP server -. Verify your `rpk` version +. Verify your `rpk` version. + ```bash rpk version @@ -37,7 +37,7 @@ rpk version + Ensure the version is at least 25.2.3. -. Log in to Redpanda Cloud +. Log in to Redpanda Cloud. + ```bash rpk cloud login diff --git a/modules/ai-agents/pages/mcp/overview.adoc b/modules/ai-agents/pages/mcp/overview.adoc index 5b452c357..1357d7211 100644 --- a/modules/ai-agents/pages/mcp/overview.adoc +++ b/modules/ai-agents/pages/mcp/overview.adoc @@ -1,7 +1,7 @@ = MCP Servers for Redpanda Cloud Overview :description: Learn about Model Context Protocol (MCP) in Redpanda Cloud, including the two complementary options: the Redpanda Cloud Management MCP Server and Remote MCP. :page-topic-type: overview -:personas: evaluator, ai_agent_developer +:personas: evaluator, agent_developer // Reader journey: "I'm new" - understanding the landscape // Learning objectives - what readers should understand after reading this page: :learning-objective-1: Describe what MCP enables for AI agents @@ -18,7 +18,11 @@ After reading this page, you will be able to: == What is MCP? -MCP (Model Context Protocol) is an open standard that lets AI agents use tools. Think of it like a universal adapter: instead of building custom integrations for every AI system, you define your tools once using MCP, and any MCP-compatible AI client can discover and use them. +The Model Context Protocol (MCP) provides a standardized way for AI agents to connect with external data sources and tools in Redpanda Cloud. + +Each MCP server hosts a set of tools that AI clients can discover and invoke. Tools are custom integrations that expose data, APIs, or workflows to AI agents. + +Think of MCP like a universal adapter: instead of building custom integrations for every AI system, you define your tools once using MCP, and any MCP-compatible AI client can discover and use them. Without MCP, connecting AI to your business systems requires custom API code, authentication handling, and response formatting for each AI platform. With MCP, you describe what a tool does and what inputs it needs, and the protocol handles the rest. @@ -85,9 +89,9 @@ You can use both options together. For example, use the Redpanda Cloud Managemen == Get started -* xref:ai-agents:mcp/local/quickstart.adoc[]: Connect Claude to your Redpanda Cloud account -* xref:ai-agents:mcp/remote/quickstart.adoc[]: Build and deploy custom MCP tools +* xref:ai-agents:mcp/local/quickstart.adoc[] +* xref:ai-agents:mcp/remote/quickstart.adoc[] == Suggested reading -* xref:home:ROOT:mcp-setup.adoc[]: Access Redpanda documentation through AI agents (read-only, no Cloud access required) +* xref:home:ROOT:mcp-setup.adoc[] diff --git a/modules/ai-agents/pages/mcp/remote/admin-guide.adoc b/modules/ai-agents/pages/mcp/remote/admin-guide.adoc deleted file mode 100644 index 214e3070f..000000000 --- a/modules/ai-agents/pages/mcp/remote/admin-guide.adoc +++ /dev/null @@ -1,41 +0,0 @@ -= Remote MCP Server Administration Guide -:description: Overview of administrative tasks for managing MCP servers in Redpanda Cloud. -:page-topic-type: overview -:personas: platform_admin, ai_agent_developer -// Reader journey: "I operate and maintain" -// Learning objectives - what readers can learn from this page: -:learning-objective-1: Identify available MCP server administrative tasks -:learning-objective-2: Navigate to administrative resources -:learning-objective-3: Describe the server lifecycle stages - -Use these resources to manage it throughout its lifecycle, from editing and scaling to monitoring and deletion. - -After reading this page, you will be able to: - -* [ ] {learning-objective-1} -* [ ] {learning-objective-2} -* [ ] {learning-objective-3} - -== Server lifecycle management - -Manage the basic lifecycle of your MCP servers, including editing configurations, pausing to save costs, and deleting. - -See xref:ai-agents:mcp/remote/manage-servers.adoc[]. - -== Resource scaling - -Adjust your MCP server's compute resources to match workload demands and optimize costs. Resource allocation directly affects your billing charges. - -See xref:ai-agents:mcp/remote/scale-resources.adoc[]. - -== Monitoring and observability - -Monitor your MCP server's activity using OpenTelemetry traces. Track tool invocations, measure performance, debug failures, and integrate with observability platforms. - -See xref:ai-agents:mcp/remote/monitor-activity.adoc[]. - -== Next steps - -* xref:ai-agents:mcp/remote/best-practices.adoc[Learn best practices] for building robust tools. -* xref:develop:connect/configuration/secret-management.adoc[Manage secrets] that MCP server tools use. -* xref:billing:billing.adoc#remote-mcp-billing-metrics[Review MCP billing] to optimize costs. diff --git a/modules/ai-agents/pages/mcp/remote/best-practices.adoc b/modules/ai-agents/pages/mcp/remote/best-practices.adoc index 28df4084d..81738ab01 100644 --- a/modules/ai-agents/pages/mcp/remote/best-practices.adoc +++ b/modules/ai-agents/pages/mcp/remote/best-practices.adoc @@ -1,7 +1,7 @@ = MCP Tool Design :description: Design effective MCP tool interfaces with clear names, descriptions, and input properties. :page-topic-type: best-practices -:personas: ai_agent_developer +:personas: agent_developer // Reader journey: "I want AI clients to discover and use my tools effectively" // Learning objectives - what readers should be able to do after reading this page: :learning-objective-1: Write tool names and descriptions that help AI clients select the right tool @@ -37,6 +37,6 @@ include::redpanda-connect:ai-agents:example$best-practices/mcp-metadata/search-c == Next steps -* xref:ai-agents:mcp/remote/create-tool.adoc#secrets[Use secrets]: Store credentials securely in the Secrets Store -* xref:ai-agents:mcp/remote/tool-patterns.adoc[]: Find reusable patterns including validation, error handling, and response formatting -* xref:ai-agents:mcp/remote/troubleshooting.adoc[]: Diagnose common issues +* xref:ai-agents:mcp/remote/create-tool.adoc#secrets[Use secrets for credentials] +* xref:ai-agents:mcp/remote/tool-patterns.adoc[] +* xref:ai-agents:mcp/remote/troubleshooting.adoc[] diff --git a/modules/ai-agents/pages/mcp/remote/concepts.adoc b/modules/ai-agents/pages/mcp/remote/concepts.adoc index 16e78912c..db7f22ada 100644 --- a/modules/ai-agents/pages/mcp/remote/concepts.adoc +++ b/modules/ai-agents/pages/mcp/remote/concepts.adoc @@ -2,7 +2,7 @@ :description: Understand the MCP execution model, choose the right component type, and use traces for observability. :page-aliases: ai-agents:mcp/remote/understanding-mcp-tools.adoc :page-topic-type: concepts -:personas: ai_agent_developer, streaming_developer +:personas: agent_developer, streaming_developer // Reader journey: "I want to understand how it works" // Learning objectives - what readers should know after reading this page: :learning-objective-1: Describe the request/response execution model @@ -23,141 +23,23 @@ include::redpanda-connect:ai-agents:partial$mcp/concepts/component-mapping.adoc[ // Execution model - single-sourced from partial include::redpanda-connect:ai-agents:partial$mcp/concepts/execution-model.adoc[] +MCP tools use an agent-initiated execution model where agents invoke tools on-demand. Redpanda also supports pipeline-initiated integration where pipelines call agents using the `a2a_message` processor. For guidance on choosing between these patterns, see xref:ai-agents:agents/integration-overview.adoc[]. + [[component-selection]] == Choose the right component type // Component selection guide - single-sourced from partial include::redpanda-connect:ai-agents:partial$mcp/concepts/component-selection.adoc[] -[[execution-log]] -== Execution log and observability - -Every MCP server automatically emits OpenTelemetry traces to a topic called `redpanda.otel_traces`. These traces provide detailed observability into your MCP server's operations, creating a complete execution log. - -=== Traces and spans - -OpenTelemetry traces provide a complete picture of how a request flows through your system: - -* A _trace_ represents the entire lifecycle of a request (for example, a tool invocation from start to finish). -* A _span_ represents a single unit of work within that trace (such as a data processing operation or an external API call). -* A trace contains one or more spans organized hierarchically, showing how operations relate to each other. - -With 100% sampling, every operation is captured, creating a complete execution log that you can use for debugging, monitoring, and performance analysis. - -=== How Redpanda stores traces - -The `redpanda.otel_traces` topic stores OpenTelemetry spans in JSON format, following the https://opentelemetry.io/docs/specs/otel/protocol/[OpenTelemetry Protocol (OTLP)^] specification. A Protobuf schema named `redpanda.otel_traces-value` is also automatically registered with the topic, enabling clients to deserialize trace data correctly. - -The `redpanda.otel_traces` topic and its schema are managed automatically by Redpanda. If you delete either the topic or the schema, they are recreated automatically. However, deleting the topic permanently deletes all trace data, and the topic comes back empty. Do not produce your own data to this topic. It is reserved for OpenTelemetry traces. - -Each span in the execution log represents a specific operation performed by your MCP server, such as: - -* Tool invocation requests -* Data processing operations -* External API calls -* Error conditions -* Performance metrics - -=== Topic configuration and lifecycle - -The `redpanda.otel_traces` topic has a predefined retention policy. Configuration changes to this topic are not supported. If you modify settings, Redpanda reverts them to the default values. - -The topic persists in your cluster even after all MCP servers are deleted, allowing you to retain historical trace data for analysis. - -Trace data may contain sensitive information from your tool inputs and outputs. Consider implementing appropriate glossterm:ACL[,access control lists (ACLs)] for the `redpanda.otel_traces` topic, and review the data in traces before sharing or exporting to external systems. - -=== Understand the trace structure - -Each span captures a unit of work. Here's what a typical MCP tool invocation looks like: - -[,json] ----- -{ - "traceId": "71cad555b35602fbb35f035d6114db54", - "spanId": "43ad6bc31a826afd", - "name": "http_processor", - "attributes": [ - {"key": "city_name", "value": {"stringValue": "london"}}, - {"key": "result_length", "value": {"intValue": "198"}} - ], - "startTimeUnixNano": "1765198415253280028", - "endTimeUnixNano": "1765198424660663434", - "instrumentationScope": {"name": "rpcn-mcp"}, - "status": {"code": 0, "message": ""} -} ----- - -Key elements to understand: - -* **`traceId`**: Links all spans belonging to the same request. Use this to follow a tool invocation through its entire lifecycle. -* **`name`**: The tool name (`http_processor` in this example). This tells you which tool was invoked. -* **`instrumentationScope.name`**: When this is `rpcn-mcp`, the span represents an MCP tool. When it's `redpanda-connect`, it's internal processing. -* **`attributes`**: Context about the operation, like input parameters or result metadata. -* **`status.code`**: `0` means success, `2` means error. - -=== Parent-child relationships - -Traces show how operations relate. A tool invocation (parent) may trigger internal operations (children): - -[,json] ----- -{ - "traceId": "71cad555b35602fbb35f035d6114db54", - "spanId": "ed45544a7d7b08d4", - "parentSpanId": "43ad6bc31a826afd", - "name": "http", - "instrumentationScope": {"name": "redpanda-connect"}, - "status": {"code": 0, "message": ""} -} ----- - -The `parentSpanId` links this child span to the parent tool invocation. Both share the same `traceId`, so you can reconstruct the complete operation. - -=== Error events in traces - -When something goes wrong, traces capture error details: - -[,json] ----- -{ - "traceId": "71cad555b35602fbb35f035d6114db54", - "spanId": "ba332199f3af6d7f", - "parentSpanId": "43ad6bc31a826afd", - "name": "http_request", - "events": [ - { - "name": "event", - "timeUnixNano": "1765198420254169629", - "attributes": [{"key": "error", "value": {"stringValue": "type"}}] - } - ], - "status": {"code": 0, "message": ""} -} ----- - -The `events` array captures what happened and when. Use `timeUnixNano` to see exactly when the error occurred within the operation. - -=== Traces compared to audit logs - -OpenTelemetry traces are designed for observability and debugging, not audit logging or compliance. - -Traces provide: - -* Hierarchical view of request flow through your system (parent-child span relationships) -* Detailed timing information for performance analysis -* Ability to reconstruct execution paths and identify bottlenecks -* Insights into how operations flow through distributed systems - -Traces are not: +== Observability -* Immutable audit records for compliance purposes -* Designed for "who did what" accountability tracking +MCP servers automatically emit OpenTelemetry traces for monitoring and debugging. For detailed information about traces, spans, and the trace structure, see xref:ai-agents:observability/concepts.adoc[]. -For monitoring tasks like consuming traces, debugging failures, and measuring performance, see xref:ai-agents:mcp/remote/monitor-activity.adoc[]. +To monitor MCP server activity, consume traces, and debug failures, see xref:ai-agents:mcp/remote/monitor-mcp-servers.adoc[]. == Next steps * xref:ai-agents:mcp/remote/create-tool.adoc[] -* xref:ai-agents:mcp/remote/best-practices.adoc[]: Apply naming and design guidelines -* xref:ai-agents:mcp/remote/tool-patterns.adoc[]: Find reusable patterns -* xref:ai-agents:mcp/remote/troubleshooting.adoc[]: Diagnose common issues +* xref:ai-agents:mcp/remote/best-practices.adoc[] +* xref:ai-agents:mcp/remote/tool-patterns.adoc[] +* xref:ai-agents:mcp/remote/troubleshooting.adoc[] diff --git a/modules/ai-agents/pages/mcp/remote/create-tool.adoc b/modules/ai-agents/pages/mcp/remote/create-tool.adoc index 73a080526..3b13dcec4 100644 --- a/modules/ai-agents/pages/mcp/remote/create-tool.adoc +++ b/modules/ai-agents/pages/mcp/remote/create-tool.adoc @@ -1,7 +1,7 @@ = Create an MCP Tool :description: Create an MCP tool with the correct YAML structure, metadata, and parameter mapping. :page-topic-type: how-to -:personas: ai_agent_developer, streaming_developer, data_engineer +:personas: agent_developer, streaming_developer, data_engineer // Reader journey: "I want to create a tool for my AI agent" // Learning objectives - what readers can do after reading this page: :learning-objective-1: Create a tool with the correct structure and MCP metadata @@ -33,7 +33,7 @@ Cloud Console:: -- . Log in to the link:https://cloud.redpanda.com/[Redpanda Cloud Console^]. -. Navigate to *Remote MCP* and either create a new MCP server or edit an existing one. +. Navigate to *Agentic AI* > *Remote MCP* and either create a new MCP server or edit an existing one. . In the *Tools* section, click *Add Tool*. @@ -64,25 +64,7 @@ Here's an example using the xref:develop:connect/components/processors/sql_selec [source,yaml] ---- -label: lookup-customer # <1> - -sql_select: # <2> - driver: postgres - dsn: "${secrets.DATABASE_URL}" - table: customers - columns: ["id", "name", "email", "plan"] - where: id = ? - args_mapping: '[this.customer_id]' - -meta: # <3> - mcp: - enabled: true - description: "Look up a customer by ID and return their profile." - properties: - - name: customer_id - type: string - description: "The customer's unique identifier" - required: true +include::ai-agents:example$mcp-tools/processors/lookup_customer.yaml[tag=complete,indent=0] ---- <1> **Label**: Becomes the tool name. @@ -104,17 +86,7 @@ xref:develop:connect/components/processors/about.adoc[Processors] transform, fil .Processor tool [source,yaml] ---- -label: enrich-order - -processors: - - http: - url: "https://api.example.com/lookup" - verb: GET - -meta: - mcp: - enabled: true - description: "Enrich order with customer data" +include::ai-agents:example$mcp-tools/processors/enrich_order.yaml[tag=complete,indent=0] ---- xref:develop:connect/components/inputs/about.adoc[Inputs] read data from sources, xref:develop:connect/components/outputs/about.adoc[outputs] write data to destinations, and xref:develop:connect/components/caches/about.adoc[caches] store and retrieve data. Define these components directly at the top level: @@ -122,59 +94,20 @@ xref:develop:connect/components/inputs/about.adoc[Inputs] read data from sources .Input tool [source,yaml] ---- -label: read-events - -redpanda: # <1> - seed_brokers: ["${REDPANDA_BROKERS}"] - topics: ["events"] - consumer_group: "mcp-reader" - tls: - enabled: true - sasl: - - mechanism: SCRAM-SHA-256 - username: "${secrets.MCP_USERNAME}" - password: "${secrets.MCP_PASSWORD}" - -meta: - mcp: - enabled: true - description: "Read events from Redpanda" +include::ai-agents:example$mcp-tools/inputs/read_events.yaml[tag=complete,indent=0] ---- <1> The component name (`redpanda`) is at the top level, not wrapped in `input:`. .Output tool [source,yaml] ---- -label: publish-event - -redpanda: - seed_brokers: ["${REDPANDA_BROKERS}"] - topic: "processed-events" - tls: - enabled: true - sasl: - - mechanism: SCRAM-SHA-256 - username: "${secrets.MCP_USERNAME}" - password: "${secrets.MCP_PASSWORD}" - -meta: - mcp: - enabled: true - description: "Publish event to Redpanda" +include::ai-agents:example$mcp-tools/outputs/publish_event.yaml[tag=complete,indent=0] ---- .Cache tool [source,yaml] ---- -label: session-cache - -memory: - default_ttl: 300s - -meta: - mcp: - enabled: true - description: "In-memory cache for session data" +include::ai-agents:example$mcp-tools/caches/session_cache.yaml[tag=complete,indent=0] ---- Outputs can include a `processors:` section to transform data before publishing: @@ -182,27 +115,7 @@ Outputs can include a `processors:` section to transform data before publishing: .Output tool with processors [source,yaml] ---- -label: publish-with-timestamp - -processors: - - mutation: | - root = this - root.published_at = now() - -redpanda: - seed_brokers: ["${REDPANDA_BROKERS}"] - topic: "processed-events" - tls: - enabled: true - sasl: - - mechanism: SCRAM-SHA-256 - username: "${secrets.MCP_USERNAME}" - password: "${secrets.MCP_PASSWORD}" - -meta: - mcp: - enabled: true - description: "Add timestamp and publish to Redpanda" +include::ai-agents:example$mcp-tools/outputs/publish_with_timestamp.yaml[tag=complete,indent=0] ---- See xref:ai-agents:mcp/remote/tool-patterns.adoc#outputs-with-processors[outputs with processors] for more examples. @@ -216,6 +129,7 @@ The `meta.mcp` block defines how AI clients discover and interact with your tool include::redpanda-connect:ai-agents:partial$mcp/create-tool/mcp-metadata-fields-table.adoc[] +[#mcp-property-fields] ==== Property fields include::redpanda-connect:ai-agents:partial$mcp/create-tool/property-fields-table.adoc[] @@ -239,17 +153,12 @@ Use `this` to access message fields directly in processors like `mutation`, `map [source,yaml] ---- -mutation: | - root.search_query = this.query.lowercase() - root.max_results = this.limit.or(10) +include::ai-agents:example$mcp-tools/snippets/bloblang_this_context.yaml[tag=mutation,indent=0] ---- [source,yaml] ---- -sql_select: - table: orders - where: customer_id = ? AND status = ? - args_mapping: '[this.customer_id, this.status.or("active")]' +include::ai-agents:example$mcp-tools/snippets/bloblang_this_context.yaml[tag=args_mapping,indent=0] ---- === In string fields (interpolation) @@ -258,15 +167,12 @@ Use `${! ... }` interpolation to embed Bloblang expressions inside string values [source,yaml] ---- -http: - url: 'https://api.weather.com/v1/current?city=${! json("city") }&units=${! json("units").or("metric") }' +include::ai-agents:example$mcp-tools/snippets/interpolation.yaml[tag=http_url,indent=0] ---- [source,yaml] ---- -redpanda: - seed_brokers: ["${REDPANDA_BROKERS}"] # <1> - topic: '${! json("topic_name") }' # <2> +include::ai-agents:example$mcp-tools/snippets/interpolation.yaml[tag=redpanda_topic,indent=0] ---- <1> `$\{VAR}` without `!` is environment variable substitution, not Bloblang. <2> `${! ... }` with `!` is Bloblang interpolation that accesses message data. @@ -279,25 +185,14 @@ Use `.or(default)` to handle missing optional parameters: [source,yaml] ---- -mutation: | - root.city = this.city # Required - will error if missing - root.units = this.units.or("metric") # Optional with default - root.limit = this.limit.or(10).number() # Optional, converted to number +include::ai-agents:example$mcp-tools/snippets/defaults.yaml[tag=mutation,indent=0] ---- Declare which parameters are required in your `meta.mcp.properties`: [source,yaml] ---- -properties: - - name: city - type: string - description: "City name to look up" - required: true - - name: units - type: string - description: "Temperature units: 'metric' or 'imperial' (default: metric)" - required: false +include::ai-agents:example$mcp-tools/snippets/defaults.yaml[tag=properties,indent=0] ---- [[secrets]] @@ -309,15 +204,7 @@ Reference secrets using `${secrets.SECRET_NAME}` syntax: [source,yaml] ---- -http: - url: "https://api.example.com/data" - headers: - Authorization: "Bearer ${secrets.API_TOKEN}" - -sql_select: - driver: postgres - dsn: "${secrets.DATABASE_URL}" - table: customers +include::ai-agents:example$mcp-tools/snippets/secrets.yaml[tag=example,indent=0] ---- When you add secret references to your tool configuration, the Cloud Console automatically detects them and provides an interface to create the required secrets. @@ -361,63 +248,13 @@ Here's a complete tool that wraps the `http` processor to fetch weather data: [source,yaml] ---- -label: get-weather - -processors: - # Validate and sanitize input - - label: validate_city - mutation: | - root.city = if this.city.or("").trim() == "" { - throw("city is required") - } else { - this.city.trim().lowercase().re_replace_all("[^a-z\\s\\-]", "") - } - root.units = this.units.or("metric") - - # Fetch weather data - - label: fetch_weather - try: - - http: - url: 'https://wttr.in/${! json("city") }?format=j1' - verb: GET - timeout: 10s - - - mutation: | - root.weather = { - "location": this.nearest_area.0.areaName.0.value, - "country": this.nearest_area.0.country.0.value, - "temperature_c": this.current_condition.0.temp_C, - "temperature_f": this.current_condition.0.temp_F, - "condition": this.current_condition.0.weatherDesc.0.value, - "humidity": this.current_condition.0.humidity, - "wind_kph": this.current_condition.0.windspeedKmph - } - - # Handle errors gracefully - - label: handle_errors - catch: - - mutation: | - root.error = true - root.message = "Failed to fetch weather: " + error() - -meta: - mcp: - enabled: true - description: "Get current weather for a city. Returns temperature, conditions, humidity, and wind speed." - properties: - - name: city - type: string - description: "City name (e.g., 'London', 'New York', 'Tokyo')" - required: true - - name: units - type: string - description: "Temperature units: 'metric' or 'imperial' (default: metric)" - required: false +include::ai-agents:example$mcp-tools/processors/get_weather_complete.yaml[tag=complete,indent=0] ---- == Next steps -* xref:ai-agents:mcp/remote/best-practices.adoc[]: Apply naming and design guidelines. -* xref:ai-agents:mcp/remote/tool-patterns.adoc[]: Find patterns for databases, APIs, and Redpanda. -* xref:ai-agents:mcp/remote/troubleshooting.adoc[]: Diagnose common issues. -* xref:develop:connect/components/about.adoc[]: Browse all available components. +* xref:ai-agents:agents/quickstart.adoc[] +* xref:ai-agents:mcp/remote/best-practices.adoc[] +* xref:ai-agents:mcp/remote/tool-patterns.adoc[] +* xref:ai-agents:mcp/remote/troubleshooting.adoc[] +* xref:develop:connect/components/about.adoc[] diff --git a/modules/ai-agents/pages/mcp/remote/manage-servers.adoc b/modules/ai-agents/pages/mcp/remote/manage-servers.adoc index d6cb056eb..40fe836f7 100644 --- a/modules/ai-agents/pages/mcp/remote/manage-servers.adoc +++ b/modules/ai-agents/pages/mcp/remote/manage-servers.adoc @@ -1,7 +1,8 @@ = Manage Remote MCP Servers :description: Learn how to edit, stop, start, and delete MCP servers in Redpanda Cloud. +:page-aliases: ai-agents:mcp/remote/admin-guide.adoc :page-topic-type: how-to -:personas: platform_admin, ai_agent_developer +:personas: platform_admin, agent_developer // Reader journey: "I operate and maintain" // Learning objectives - what readers can accomplish from this page: :learning-objective-1: Edit MCP server configurations @@ -29,7 +30,7 @@ You can update the configuration, resources, or metadata of an MCP server at any Cloud Console:: + -- -. In the Redpanda Cloud Console, navigate to *Remote MCP*. +. In the Redpanda Cloud Console, navigate to *Agentic AI* > *Remote MCP*. . Find the MCP server you want to edit and click its name. . Click *Edit configuration*. . Make your changes. @@ -72,7 +73,7 @@ Stopping a server pauses all tool execution and releases compute resources, but Cloud Console:: + -- -. In the Redpanda Cloud Console, navigate to *Remote MCP*. +. In the Redpanda Cloud Console, navigate to *Agentic AI* > *Remote MCP*. . Find the server you want to stop. . Click the three dots and select *Stop*. . Confirm the action. @@ -103,7 +104,7 @@ Resume a stopped server to restore its functionality. Cloud Console:: + -- -. In the Redpanda Cloud Console, navigate to *Remote MCP*. +. In the Redpanda Cloud Console, navigate to *Agentic AI* > *Remote MCP*. . Find the stopped server. . Click the three dots and select *Start*. . Wait for the status to show *Running* before reconnecting clients. @@ -134,7 +135,7 @@ Deleting a server permanently removes it. You cannot undo this action. Redpanda Cloud Console:: + -- -. In the Redpanda Cloud Console, navigate to *Remote MCP*. +. In the Redpanda Cloud Console, navigate to *Agentic AI* > *Remote MCP*. . Find the server you want to delete. . Click the three dots and select *Delete*. . Confirm the deletion when prompted. @@ -161,6 +162,6 @@ Deletion is immediate and permanent. Make sure you have backed up any important == Next steps -* xref:ai-agents:mcp/remote/scale-resources.adoc[Scale MCP server resources] to optimize performance and costs. -* xref:ai-agents:mcp/remote/monitor-activity.adoc[Monitor MCP server activity] using OpenTelemetry traces. -* xref:ai-agents:mcp/remote/best-practices.adoc[Learn best practices] for building robust tools. +* xref:ai-agents:mcp/remote/scale-resources.adoc[] +* xref:ai-agents:mcp/remote/monitor-mcp-servers.adoc[] +* xref:ai-agents:mcp/remote/best-practices.adoc[] diff --git a/modules/ai-agents/pages/mcp/remote/monitor-activity.adoc b/modules/ai-agents/pages/mcp/remote/monitor-activity.adoc deleted file mode 100644 index 600d70e19..000000000 --- a/modules/ai-agents/pages/mcp/remote/monitor-activity.adoc +++ /dev/null @@ -1,113 +0,0 @@ -= Monitor MCP Server Activity -:description: How to consume traces, track tool invocations, measure performance, and debug failures in MCP servers. -:page-topic-type: how-to -:personas: platform_admin, ai_agent_developer, data_engineer -// Reader journey: "I need to accomplish X" -// Learning objectives - what readers can DO with this guide: -:learning-objective-1: Consume traces from the execution log -:learning-objective-2: Track tool invocations and measure performance -:learning-objective-3: Debug tool failures using trace data - -After creating an MCP server, you can monitor its activity using the execution log. - -After reading this page, you will be able to: - -* [ ] {learning-objective-1} -* [ ] {learning-objective-2} -* [ ] {learning-objective-3} - -For conceptual background on traces, spans, and the trace data structure, see xref:ai-agents:mcp/remote/concepts.adoc#execution-log[Execution log and observability]. - -== Prerequisites - -You must have an existing MCP server. If you do not have one, see xref:ai-agents:mcp/remote/quickstart.adoc[]. - -== Consume traces from the execution log - -MCP servers emit OpenTelemetry traces to the `redpanda.otel_traces` topic. You can consume these traces using any Kafka-compatible client or the Redpanda Cloud Console. - -[tabs] -===== -Cloud Console:: -+ --- -. In the Redpanda Cloud Console, navigate to *Topics*. -. Select `redpanda.otel_traces`. -. Click *Messages* to view recent traces. -. Use filters to search for specific trace IDs, span names, or time ranges. --- - -rpk:: -+ --- -Consume the most recent traces: - -[,bash] ----- -rpk topic consume redpanda.otel_traces --offset end -n 10 ----- - -Filter for specific MCP server activity by examining the span attributes. --- - -Data Plane API:: -+ --- -Use the link:/api/doc/cloud-dataplane/[Data Plane API] to programmatically consume traces and integrate with your monitoring pipeline. --- -===== - -== Track tool invocations - -Monitor which tools are being called and how often: - -. Consume traces from `redpanda.otel_traces`. -. Filter spans where `instrumentationScope.name` is `rpcn-mcp`. -. Examine the `name` field to see which tools are being invoked. -. Calculate frequency by counting spans per tool name over time windows. - -Example: To find all invocations of a specific tool, filter for spans where `name` matches your tool name (for example, `weather`, `http_processor`). - -== Measure performance - -Analyze tool execution times: - -. Find spans with `instrumentationScope.name` set to `rpcn-mcp`. -. Calculate duration: `(endTimeUnixNano - startTimeUnixNano) / 1000000` (milliseconds). -. Track percentiles (p50, p95, p99) to identify performance issues. -. Set alerts for durations exceeding acceptable thresholds. - -Example: A span with `startTimeUnixNano: "1765198415253280028"` and `endTimeUnixNano: "1765198424660663434"` has a duration of 9407ms. - -== Debug failures - -Investigate errors and failures: - -. Filter spans where `status.code` is `2` (error). -. Examine `status.message` for error details. -. Check the `events` array for error events with timestamps. -. Use `traceId` to correlate related spans and understand the full error context. -. Follow `parentSpanId` relationships to trace the error back to the originating tool. - -Example: A span with `status.code: 2` and `status.message: "connection timeout"` indicates the operation failed due to a timeout. - -== Correlate distributed operations - -Link MCP server activity to downstream effects: - -. Extract `traceId` from tool invocation spans. -. Search for the same `traceId` in other application logs or traces. -. Follow `parentSpanId` relationships to build complete operation timelines. -. Identify bottlenecks across your entire system. - -== Integrate with observability platforms - -The `redpanda.otel_traces` topic stores trace data in OpenTelemetry format. Redpanda does not support direct export to platforms like Grafana Cloud and Datadog due to format compatibility limitations. Redpanda produces one span per topic message, whereas these platforms expect traces in batch format. - -You can consume traces directly from the `redpanda.otel_traces` topic using any Kafka-compatible consumer for custom analysis and processing. - -== Next steps - -* xref:ai-agents:mcp/remote/concepts.adoc#execution-log[Execution logs]: Learn how traces and spans work -* xref:ai-agents:mcp/remote/troubleshooting.adoc[]: Diagnose and fix common issues -* xref:ai-agents:mcp/remote/manage-servers.adoc[]: Manage MCP server lifecycle diff --git a/modules/ai-agents/pages/mcp/remote/monitor-mcp-servers.adoc b/modules/ai-agents/pages/mcp/remote/monitor-mcp-servers.adoc new file mode 100644 index 000000000..7966fc3ae --- /dev/null +++ b/modules/ai-agents/pages/mcp/remote/monitor-mcp-servers.adoc @@ -0,0 +1,104 @@ += Monitor MCP Server Activity +:description: Consume traces, track tool invocations, measure performance, and debug failures in MCP servers. +:page-topic-type: how-to +:personas: platform_admin, agent_developer, data_engineer +:learning-objective-1: Consume traces from the redpanda.otel_traces topic +:learning-objective-2: Track tool invocations and measure performance +:learning-objective-3: Debug tool failures using trace data + +Monitor MCP server activity using OpenTelemetry traces emitted to the `redpanda.otel_traces` glossterm:topic[]. + +After reading this page, you will be able to: + +* [ ] {learning-objective-1} +* [ ] {learning-objective-2} +* [ ] {learning-objective-3} + +For conceptual background on traces, spans, and the trace data structure, see xref:ai-agents:observability/concepts.adoc[]. + +== Prerequisites + +You must have an existing MCP server. If you do not have one, see xref:ai-agents:mcp/remote/quickstart.adoc[]. + +== View transcripts in the Cloud Console + +:context: mcp +include::ai-agents:partial$transcripts-ui-guide.adoc[] + +== Analyze traces programmatically + +MCP servers emit OpenTelemetry traces to the `redpanda.otel_traces` topic. Consume these traces to build custom monitoring, track tool usage, and analyze performance. + +=== Consume traces + +[tabs] +===== +Cloud Console:: ++ +-- +. In the Redpanda Cloud Console, navigate to *Topics*. +. Select `redpanda.otel_traces`. +. Click *Messages* to view recent traces. +. Use filters to search for specific trace IDs, span names, or time ranges. +-- + +rpk:: ++ +-- +Consume the most recent traces: + +[,bash] +---- +rpk topic consume redpanda.otel_traces --offset end -n 10 +---- + +Filter for specific MCP server activity by examining the span attributes. +-- + +Data Plane API:: ++ +-- +Use the link:/api/doc/cloud-dataplane/[Data Plane API^] to programmatically consume traces and integrate with your monitoring pipeline. +-- +===== + +=== Track tool invocations + +Monitor which tools are being called and how often by filtering spans where `instrumentationScope.name` is `rpcn-mcp`. The `name` field shows which tool was invoked. + +Example: Find all invocations of a specific tool: + +[,bash] +---- +rpk topic consume redpanda.otel_traces --offset start \ + | jq '.value | select(.instrumentationScope.name == "rpcn-mcp" and .name == "weather")' +---- + +=== Measure performance + +Calculate tool execution time using span timestamps: + +[,bash] +---- +Duration (ms) = (endTimeUnixNano - startTimeUnixNano) / 1000000 +---- + +Track percentiles (p50, p95, p99) to identify performance issues and set alerts for durations exceeding acceptable thresholds. + +=== Debug failures + +Filter for error spans where `status.code` is `2`: + +[,bash] +---- +rpk topic consume redpanda.otel_traces --offset start \ + | jq 'select(.status.code == 2)' +---- + +Check `status.message` for error details and the `events` array for error events with timestamps. Use `traceId` to correlate related spans across the distributed system. + +== Next steps + +* xref:ai-agents:observability/concepts.adoc[] +* xref:ai-agents:mcp/remote/troubleshooting.adoc[] +* xref:ai-agents:mcp/remote/manage-servers.adoc[] diff --git a/modules/ai-agents/pages/mcp/remote/overview.adoc b/modules/ai-agents/pages/mcp/remote/overview.adoc index bc3d11845..7e5aac62d 100644 --- a/modules/ai-agents/pages/mcp/remote/overview.adoc +++ b/modules/ai-agents/pages/mcp/remote/overview.adoc @@ -1,7 +1,7 @@ = Remote MCP Server Overview :description: Discover how AI agents can interact with your streaming data and how to connect them to Redpanda Cloud. :page-topic-type: overview -:personas: evaluator, ai_agent_developer +:personas: evaluator, agent_developer // Reader journey: "I'm evaluating this" // Learning objectives - what readers should understand after reading this page: :learning-objective-1: Explain what a Remote MCP server is and how tools differ from pipelines @@ -40,11 +40,11 @@ include::redpanda-connect:ai-agents:partial$mcp/overview/use-cases-table.adoc[] Remote MCP servers sit between AI clients and your data: -. Your AI agent connects to your MCP server using `rpk cloud mcp proxy` or direct authentication +. Your AI agent connects to your MCP server using `rpk cloud mcp proxy` or direct authentication. . A user asks their AI agent something like "What's the weather in London?" -. The server finds the matching tool and runs your Redpanda Connect configuration -. Your configuration fetches data, transforms it, and returns a structured response -. The AI agent gets the data and can use it to answer the user +. The server finds the matching tool and runs your Redpanda Connect configuration. +. Your configuration fetches data, transforms it, and returns a structured response. +. The AI agent gets the data and can use it to answer the user. === What a tool looks like @@ -54,20 +54,7 @@ Here's a minimal example that returns weather data: [source,yaml] ---- -http: - url: "https://wttr.in/${! this.city }?format=j1" - verb: GET - -meta: - mcp: - enabled: true - name: get_weather - description: "Get current weather for a city" - properties: - - name: city - type: string - description: "City name" - required: true +include::ai-agents:example$mcp-tools/processors/get_weather_simple.yaml[tag=complete,indent=0] ---- When an AI client asks about weather, it calls this tool with the city name. The tool fetches data from the weather API and returns it. @@ -78,7 +65,8 @@ include::redpanda-connect:ai-agents:partial$mcp/overview/specification-support.a == Next steps * xref:ai-agents:mcp/remote/quickstart.adoc[] -* xref:ai-agents:mcp/remote/concepts.adoc[]: Learn about execution and component types -* xref:ai-agents:mcp/remote/create-tool.adoc[]: Create custom tools step by step +* xref:ai-agents:agents/overview.adoc[] +* xref:ai-agents:mcp/remote/concepts.adoc[] +* xref:ai-agents:mcp/remote/create-tool.adoc[] * link:https://modelcontextprotocol.io/[Model Context Protocol documentation^] diff --git a/modules/ai-agents/pages/mcp/remote/quickstart.adoc b/modules/ai-agents/pages/mcp/remote/quickstart.adoc index a778df103..92ee94aeb 100644 --- a/modules/ai-agents/pages/mcp/remote/quickstart.adoc +++ b/modules/ai-agents/pages/mcp/remote/quickstart.adoc @@ -1,7 +1,7 @@ = Remote MCP Server Quickstart :description: Learn how to extend AI agents with custom tools that interact with your Redpanda data using the Model Context Protocol (MCP). :page-topic-type: tutorial -:personas: ai_agent_developer, streaming_developer, evaluator +:personas: agent_developer, streaming_developer, evaluator // Reader journey: "I want to try it now" // Learning objectives - what readers will achieve by completing this quickstart: :learning-objective-1: Create an MCP server in Redpanda Cloud @@ -181,7 +181,7 @@ Cloud Console:: -- . Log in to the link:https://cloud.redpanda.com/[Redpanda Cloud Console^]. -. Navigate to *Remote MCP*. +. Navigate to *Agentic AI* > *Remote MCP*. + This page shows a list of existing servers. @@ -304,10 +304,10 @@ Now that your MCP server is running with two tools available, you'll connect Cla When you connect Claude Code: -. Claude automatically discovers your `generate_input` and `redpanda_output` tools -. You can ask Claude in natural language to perform tasks using these tools -. Claude decides which tools to call and in what order based on your request -. The Redpanda CLI acts as a secure proxy, forwarding Claude's tool requests to your MCP server in the cloud +. Claude automatically discovers your `generate_input` and `redpanda_output` tools. +. You can ask Claude in natural language to perform tasks using these tools. +. Claude decides which tools to call and in what order based on your request. +. The Redpanda CLI acts as a secure proxy, forwarding Claude's tool requests to your MCP server in the cloud. This example uses Claude Code, but the same pattern works with any MCP-compatible client. @@ -389,9 +389,10 @@ For detailed solutions, see xref:ai-agents:mcp/remote/troubleshooting.adoc[]. You've deployed an MCP server and connected Claude Code to your Redpanda cluster. Here's where to go next: -* xref:ai-agents:mcp/remote/concepts.adoc[]: Understand how MCP tools differ from pipelines -* xref:ai-agents:mcp/remote/create-tool.adoc[]: Build production-quality tools with validation -* xref:ai-agents:mcp/remote/best-practices.adoc[]: Apply naming and design guidelines -* xref:ai-agents:mcp/remote/tool-patterns.adoc[]: Find reusable patterns -* xref:ai-agents:mcp/remote/troubleshooting.adoc[]: Diagnose common issues -* xref:ai-agents:mcp/remote/admin-guide.adoc[]: Scale resources, monitor activity, and administer your MCP servers +* xref:ai-agents:agents/quickstart.adoc[] +* xref:ai-agents:mcp/remote/concepts.adoc[] +* xref:ai-agents:mcp/remote/create-tool.adoc[] +* xref:ai-agents:mcp/remote/best-practices.adoc[] +* xref:ai-agents:mcp/remote/tool-patterns.adoc[] +* xref:ai-agents:mcp/remote/troubleshooting.adoc[] +* xref:ai-agents:mcp/remote/admin-guide.adoc[] diff --git a/modules/ai-agents/pages/mcp/remote/scale-resources.adoc b/modules/ai-agents/pages/mcp/remote/scale-resources.adoc index f6bb7c375..3c4d948b7 100644 --- a/modules/ai-agents/pages/mcp/remote/scale-resources.adoc +++ b/modules/ai-agents/pages/mcp/remote/scale-resources.adoc @@ -27,7 +27,7 @@ You must have an existing MCP server. If you do not have one, see xref:ai-agents Cloud Console:: + -- -. In the Redpanda Cloud Console, navigate to *Remote MCP*. +. In the Redpanda Cloud Console, navigate to *Agentic AI* > *Remote MCP*. . Find the MCP server you want to scale and click its name. . Click *Edit configuration*. . Under *Resources*, select a new size: diff --git a/modules/ai-agents/pages/mcp/remote/tool-patterns.adoc b/modules/ai-agents/pages/mcp/remote/tool-patterns.adoc index 1348419f1..2e9c658f0 100644 --- a/modules/ai-agents/pages/mcp/remote/tool-patterns.adoc +++ b/modules/ai-agents/pages/mcp/remote/tool-patterns.adoc @@ -2,7 +2,7 @@ :page-aliases: ai-agents:mcp/remote/pipeline-patterns.adoc :description: Catalog of patterns for MCP server tools in Redpanda Cloud. :page-topic-type: cookbook -:personas: ai_agent_developer, data_engineer +:personas: agent_developer, data_engineer // Reader journey: "I need an example for X" :learning-objective-1: Find reusable patterns for common MCP tool scenarios :learning-objective-2: Apply validation and error handling patterns for production robustness @@ -16,8 +16,6 @@ After reading this page, you will be able to: * [ ] {learning-objective-2} * [ ] {learning-objective-3} - - [[read-data]] == Read data @@ -32,7 +30,7 @@ Use xref:develop:connect/components/inputs/about.adoc[inputs] to create tools th [source,yaml] ---- -include::ai-agents:example$generate_input.yaml[] +include::ai-agents:example$mcp-tools/inputs/generate_input.yaml[] ---- See also: xref:develop:connect/components/inputs/generate.adoc[`generate` input component] @@ -46,17 +44,7 @@ See also: xref:develop:connect/components/inputs/generate.adoc[`generate` input [source,yaml] ---- -redpanda: - seed_brokers: [ "${REDPANDA_BROKERS}" ] - topics: [ "user-events" ] - consumer_group: "mcp-event-processor" - start_from_oldest: true - tls: - enabled: true - sasl: - - mechanism: "${REDPANDA_SASL_MECHANISM}" - username: "${REDPANDA_SASL_USERNAME}" - password: "${REDPANDA_SASL_PASSWORD}" +include::ai-agents:example$mcp-tools/inputs/consume_redpanda.yaml[tag=component,indent=0] ---- See also: xref:develop:connect/components/inputs/redpanda.adoc[`redpanda` input] @@ -70,23 +58,7 @@ See also: xref:develop:connect/components/inputs/redpanda.adoc[`redpanda` input] [source,yaml] ---- -redpanda: - seed_brokers: [ "${REDPANDA_BROKERS}" ] - topics: [ "sensor-readings" ] - consumer_group: "analytics-processor" - tls: - enabled: true - sasl: - - mechanism: "${REDPANDA_SASL_MECHANISM}" - username: "${REDPANDA_SASL_USERNAME}" - password: "${REDPANDA_SASL_PASSWORD}" - processors: - - mapping: | - root.sensor_id = this.sensor_id - root.avg_temperature = this.readings.map_each(r -> r.temperature).mean() - root.max_temperature = this.readings.map_each(r -> r.temperature).max() - root.reading_count = this.readings.length() - root.window_end = now() +include::ai-agents:example$mcp-tools/inputs/stream_processing.yaml[tag=component,indent=0] ---- See also: xref:develop:connect/components/inputs/redpanda.adoc[`redpanda` input] @@ -105,7 +77,7 @@ Use xref:develop:connect/components/processors/about.adoc[processors] to fetch d [source,yaml] ---- -include::ai-agents:example$http_processor.yaml[] +include::ai-agents:example$mcp-tools/processors/http_processor.yaml[] ---- See also: xref:develop:connect/components/processors/http.adoc[`http` processor], xref:develop:connect/components/processors/mutation.adoc[`mutation` processor] @@ -119,11 +91,27 @@ See also: xref:develop:connect/components/processors/http.adoc[`http` processor] [source,yaml] ---- -include::ai-agents:example$gcp_bigquery_select_processor.yaml[] +include::ai-agents:example$mcp-tools/processors/gcp_bigquery_select_processor.yaml[] ---- See also: xref:develop:connect/components/processors/gcp_bigquery_select.adoc[`gcp_bigquery_select` processor], xref:develop:connect/components/processors/sql_select.adoc[`sql_select` processor] +[[jira-queries]] +=== Query Jira issues + +*When to use:* Fetching tickets by status, checking assignments, finding recent issues, or building AI agents that interact with project management data. + +*Example use cases:* Get open bugs for a sprint, find issues assigned to a user, list recently updated tickets, search by custom fields. + +NOTE: The `jira` processor is available on Dedicated and BYOC clusters. + +[source,yaml] +---- +include::ai-agents:example$mcp-tools/processors/search_jira.yaml[tag=complete,indent=0] +---- + +For more patterns including pagination, custom fields, and creating issues via the HTTP processor, see xref:develop:connect/cookbooks/jira.adoc[]. + [[ai-llm-integration]] === Integrate with AI/LLM services @@ -135,17 +123,7 @@ See also: xref:develop:connect/components/processors/gcp_bigquery_select.adoc[`g [source,yaml] ---- -openai_chat_completion: - api_key: "${secrets.OPENAI_API_KEY}" - model: "gpt-4" - prompt: | - Analyze this customer feedback and provide: - 1. Sentiment (positive/negative/neutral) - 2. Key themes - 3. Actionable insights - - Feedback: ${! json("feedback_text") } - max_tokens: 500 +include::ai-agents:example$mcp-tools/processors/openai_chat.yaml[tag=component,indent=0] ---- See also: xref:develop:connect/components/processors/openai_chat_completion.adoc[`openai_chat_completion`], xref:develop:connect/components/processors/openai_embeddings.adoc[`openai_embeddings`] @@ -154,10 +132,7 @@ See also: xref:develop:connect/components/processors/openai_chat_completion.adoc [source,yaml] ---- -openai_embeddings: - api_key: "${secrets.OPENAI_API_KEY}" - model: "text-embedding-3-small" - text: ${! json("content") } +include::ai-agents:example$mcp-tools/processors/openai_embeddings.yaml[tag=component,indent=0] ---- See also: xref:develop:connect/components/processors/cohere_embeddings.adoc[`cohere_embeddings`], xref:develop:connect/components/processors/gcp_vertex_ai_embeddings.adoc[`gcp_vertex_ai_embeddings`] @@ -176,7 +151,7 @@ Use xref:develop:connect/components/outputs/about.adoc[outputs] to write data to [source,yaml] ---- -include::ai-agents:example$redpanda_output.yaml[] +include::ai-agents:example$mcp-tools/outputs/redpanda_output.yaml[] ---- See also: xref:develop:connect/components/outputs/redpanda.adoc[`redpanda` output] @@ -189,7 +164,7 @@ Output tools can include processors to transform data before publishing. This pa [source,yaml] ---- -include::ai-agents:example$redpanda_output_with_processors.yaml[] +include::ai-agents:example$mcp-tools/outputs/redpanda_output_with_processors.yaml[] ---- [[caching]] @@ -202,13 +177,13 @@ include::ai-agents:example$redpanda_output_with_processors.yaml[] .Redpanda-backed cache [source,yaml] ---- -include::ai-agents:example$redpanda_cache.yaml[] +include::ai-agents:example$mcp-tools/caches/redpanda_cache.yaml[] ---- .In-memory cache [source,yaml] ---- -include::ai-agents:example$memory_cache.yaml[] +include::ai-agents:example$mcp-tools/caches/memory_cache.yaml[] ---- See also: xref:develop:connect/components/caches/memory.adoc[`memory` cache], xref:develop:connect/components/outputs/redpanda.adoc[`redpanda` output] @@ -227,17 +202,7 @@ Use Bloblang and processors to transform, validate, and route data. [source,yaml] ---- -mapping: | - # Parse and validate incoming data - root.user_id = this.user_id.or(throw("user_id is required")) - root.timestamp = now().ts_format("2006-01-02T15:04:05Z07:00") - - # Transform and enrich - root.email_domain = this.email.split("@").index(1) - root.is_premium = this.subscription_tier == "premium" - - # Filter sensitive data - root.profile = this.profile.without("ssn", "credit_card") +include::ai-agents:example$mcp-tools/processors/transform_validate.yaml[tag=mapping,indent=0] ---- See also: xref:develop:connect/components/processors/mapping.adoc[`mapping` processor], xref:develop:connect/guides/bloblang/about.adoc[Bloblang guide] @@ -251,30 +216,7 @@ See also: xref:develop:connect/components/processors/mapping.adoc[`mapping` proc [source,yaml] ---- -redpanda: - seed_brokers: [ "${REDPANDA_BROKERS}" ] - topics: [ "order-events" ] - consumer_group: "workflow-orchestrator" - tls: - enabled: true - sasl: - - mechanism: "${REDPANDA_SASL_MECHANISM}" - username: "${REDPANDA_SASL_USERNAME}" - password: "${REDPANDA_SASL_PASSWORD}" - processors: - - switch: - - check: this.event_type == "order_created" - processors: - - http: - url: "${secrets.INVENTORY_API}/reserve" - verb: POST - body: '{"order_id": "${! this.order_id }", "items": ${! json("items") }}' - - check: this.event_type == "payment_confirmed" - processors: - - http: - url: "${secrets.FULFILLMENT_API}/ship" - verb: POST - body: '{"order_id": "${! this.order_id }"}' +include::ai-agents:example$mcp-tools/inputs/event_driven_workflow.yaml[tag=component,indent=0] ---- See also: xref:develop:connect/components/inputs/redpanda.adoc[`redpanda` input] @@ -310,6 +252,7 @@ include::redpanda-connect:ai-agents:partial$mcp/tool-patterns/production-workflo == Next steps -* xref:ai-agents:mcp/remote/create-tool.adoc[]: Step-by-step tool creation guide -* xref:ai-agents:mcp/remote/best-practices.adoc[]: Apply naming and design guidelines -* xref:ai-agents:mcp/remote/troubleshooting.adoc[]: Diagnose and fix common issues +* xref:ai-agents:agents/integration-overview.adoc[] +* xref:ai-agents:mcp/remote/create-tool.adoc[] +* xref:ai-agents:mcp/remote/best-practices.adoc[] +* xref:ai-agents:mcp/remote/troubleshooting.adoc[] diff --git a/modules/ai-agents/pages/mcp/remote/troubleshooting.adoc b/modules/ai-agents/pages/mcp/remote/troubleshooting.adoc index 9c0dc41e6..2dd384758 100644 --- a/modules/ai-agents/pages/mcp/remote/troubleshooting.adoc +++ b/modules/ai-agents/pages/mcp/remote/troubleshooting.adoc @@ -1,7 +1,7 @@ = Troubleshoot Remote MCP Servers :description: Diagnose and fix common issues when building and running Remote MCP servers in Redpanda Cloud. :page-topic-type: troubleshooting -:personas: ai_agent_developer, streaming_developer, platform_admin +:personas: agent_developer, streaming_developer, platform_admin // Reader journey: "Something went wrong" // Learning objectives - what readers can do with this page: :learning-objective-1: Diagnose and fix lint and YAML configuration errors @@ -39,8 +39,8 @@ include::redpanda-connect:ai-agents:partial$mcp/troubleshooting/debugging-techni If you're still experiencing issues: -* xref:ai-agents:mcp/remote/create-tool.adoc[]: Review YAML structure rules and metadata fields -* xref:ai-agents:mcp/remote/best-practices.adoc[]: Review naming and metadata design -* xref:ai-agents:mcp/remote/concepts.adoc[]: Review component type selection +* xref:ai-agents:mcp/remote/create-tool.adoc[] +* xref:ai-agents:mcp/remote/best-practices.adoc[] +* xref:ai-agents:mcp/remote/concepts.adoc[] For protocol-level troubleshooting, see the link:https://modelcontextprotocol.io/[MCP documentation^]. diff --git a/modules/ai-agents/pages/observability/concepts.adoc b/modules/ai-agents/pages/observability/concepts.adoc new file mode 100644 index 000000000..1d891dd9c --- /dev/null +++ b/modules/ai-agents/pages/observability/concepts.adoc @@ -0,0 +1,353 @@ += Transcripts and AI Observability +:description: Understand how Redpanda captures execution transcripts for agents and MCP servers using OpenTelemetry. +:page-topic-type: concepts +:personas: agent_developer, platform_admin, data_engineer +:learning-objective-1: Explain how transcripts and spans capture execution flow +:learning-objective-2: Interpret transcript structure for debugging and monitoring +:learning-objective-3: Distinguish between transcripts and audit logs + +Redpanda automatically captures execution transcripts for both AI agents and MCP servers, providing complete observability into how your agentic systems operate. + +After reading this page, you will be able to: + +* [ ] {learning-objective-1} +* [ ] {learning-objective-2} +* [ ] {learning-objective-3} + +== What are transcripts + +Every agent and MCP server automatically emits OpenTelemetry traces to a glossterm:topic[] called `redpanda.otel_traces`. These traces provide detailed observability into operations, creating complete transcripts. + +Transcripts capture: + +* Tool invocations and results +* Agent reasoning steps +* Data processing operations +* External API calls +* Error conditions +* Performance metrics + +With 100% sampling, every operation is captured, enabling comprehensive debugging, monitoring, and performance analysis. + +== Traces and spans + +OpenTelemetry traces provide a complete picture of how a request flows through your system: + +* A _trace_ represents the entire lifecycle of a request (for example, a tool invocation from start to finish). +* A _span_ represents a single unit of work within that trace (such as a data processing operation or an external API call). +* A trace contains one or more spans organized hierarchically, showing how operations relate to each other. + +== Agent transcript hierarchy + +Agent executions create a hierarchy of spans that reflect how agents process requests. Understanding this hierarchy helps you interpret agent behavior and identify where issues occur. + +=== Agent span types + +Agent transcripts contain these span types: + +[cols="2,3,3", options="header"] +|=== +| Span Type | Description | Use To + +| `ai-agent` +| Top-level span representing the entire agent invocation from start to finish. Includes all processing time, from receiving the request through executing the reasoning loop, calling tools, and returning the final response. +| Measure total request duration and identify slow agent invocations. + +| `agent` +| Internal agent processing that represents reasoning and decision-making. Shows time spent in the LLM reasoning loop, including context processing, tool selection, and response generation. Multiple `agent` spans may appear when the agent iterates through its reasoning loop. +| Track reasoning time and identify iteration patterns. + +| `invoke_agent` +| Agent and sub-agent invocation ( in multi-agent architectures). Represents one agent calling another via the A2A protocol. +| Trace calls between root agents and sub-agents, measure cross-agent latency, and identify which sub-agent was invoked. + +| `openai`, `anthropic`, or other LLM providers +| LLM provider API call showing calls to the language model. The span name matches the provider, and attributes typically include the model name (like `gpt-5.2` or `claude-sonnet-4-5`). +| Identify which model was called, measure LLM response time, and debug LLM API errors. + +| `rpcn-mcp` +| MCP tool invocation representing calls to Remote MCP servers. Shows tool execution time, including network latency and tool processing. Child spans with `instrumentationScope.name` set to `redpanda-connect` represent internal Redpanda Connect processing. +| Measure tool execution time and identify slow MCP tool calls. +|=== + +=== Typical agent execution flow + +A simple agent request creates this hierarchy: + +---- +ai-agent (6.65 seconds) +├── agent (6.41 seconds) +│ ├── invoke_agent: customer-support-agent (6.39 seconds) +│ │ └── openai: chat gpt-5.2 (6.2 seconds) +---- + +This shows: + +1. Total agent invocation: 6.65 seconds +2. Agent reasoning: 6.41 seconds +3. Sub-agent call: 6.39 seconds (most of the time) +4. LLM API call: 6.2 seconds (the actual bottleneck) + +Examine span durations to identify where time is spent and optimize accordingly. + +== MCP server transcript hierarchy + +MCP server tool invocations produce a different span hierarchy focused on tool execution and internal processing. This structure reveals performance bottlenecks and helps debug tool-specific issues. + +=== MCP server span types + +MCP server transcripts contain these span types: + +[cols="2,3,3", options="header"] +|=== +| Span Type | Description | Use To + +| `mcp-{server-id}` +| Top-level span representing the entire MCP server invocation. The server ID uniquely identifies the MCP server instance. This span encompasses all tool execution from request receipt to response completion. +| Measure total MCP server response time and identify slow tool invocations. + +| `service` +| Internal service processing span that appears at multiple levels in the hierarchy. Represents Redpanda Connect service operations including routing, processing, and component execution. +| Track internal processing overhead and identify where time is spent in the service layer. + +| Tool name (e.g., `get_order_status`, `get_customer_history`) +| The specific MCP tool being invoked. This span name matches the tool name defined in the MCP server configuration. +| Identify which tool was called and measure tool-specific execution time. + +| `processors` +| Processor pipeline execution span showing the collection of processors that process the tool's data. Appears as a child of the tool invocation span. +| Measure total processor pipeline execution time. + +| Processor name (e.g., `mapping`, `http`, `branch`) +| Individual processor execution span representing a single Redpanda Connect processor. The span name matches the processor type. +| Identify slow processors and debug processing logic. +|=== + +=== Typical MCP server execution flow + +An MCP tool invocation creates this hierarchy: + +---- +mcp-d5mnvn251oos73 (4.00 seconds) +├── service > get_order_status (4.07 seconds) +│ └── service > processors (43 microseconds) +│ └── service > mapping (18 microseconds) +---- + +This shows: + +1. Total MCP server invocation: 4.00 seconds +2. Tool execution (get_order_status): 4.07 seconds +3. Processor pipeline: 43 microseconds +4. Mapping processor: 18 microseconds (data transformation) + +The majority of time (4+ seconds) is spent in tool execution, while internal processing (mapping) takes only microseconds. This indicates the tool itself (likely making external API calls or database queries) is the bottleneck, not Redpanda Connect's internal processing. + +== Transcript layers and scope + +Transcripts contain multiple layers of instrumentation, from HTTP transport through application logic to external service calls. The `scope.name` field in each span identifies which instrumentation layer created that span. + +=== Instrumentation layers + +A complete agent transcript includes these layers: + +[cols="2,2,4", options="header"] +|=== +| Layer | Scope Name | Purpose + +| HTTP Server +| `go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp` +| HTTP transport layer receiving requests. Shows request/response sizes, status codes, client addresses, and network details. + +| AI SDK (Agent) +| `github.com/redpanda-data/ai-sdk-go/plugins/otel` +| Agent application logic. Shows agent invocations, LLM calls, tool executions, conversation IDs, token usage, and model details. Includes `gen_ai.*` semantic convention attributes. + +| HTTP Client +| `go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp` +| Outbound HTTP calls from agent to MCP servers. Shows target URLs, request methods, and response codes. + +| MCP Server +| `rpcn-mcp` +| MCP server tool execution. Shows tool name, input parameters, result size, and execution time. Appears as a separate `service.name` in resource attributes. + +| Redpanda Connect +| `redpanda-connect` +| Internal Redpanda Connect component execution within MCP tools. Shows pipeline and individual component spans. +|=== + +=== How layers connect + +Layers connect through parent-child relationships in a single transcript: + +---- +ai-agent-http-server (HTTP Server layer) +└── invoke_agent customer-support-agent (AI SDK layer) + ├── chat gpt-5-nano (AI SDK layer, LLM call 1) + ├── execute_tool get_order_status (AI SDK layer) + │ └── HTTP POST (HTTP Client layer) + │ └── get_order_status (MCP Server layer, different service) + │ └── processors (Redpanda Connect layer) + └── chat gpt-5-nano (AI SDK layer, LLM call 2) +---- + +The request flow demonstrates: + +1. HTTP request arrives at agent +2. Agent invokes sub-agent +3. Agent makes first LLM call to decide what to do +4. Agent executes tool, making HTTP call to MCP server +5. MCP server processes tool through its pipeline +6. Agent makes second LLM call with tool results +7. Response returns through HTTP layer + +=== Cross-service transcripts + +When agents call MCP tools, the transcript spans multiple services. Each service has a different `service.name` in the resource attributes: + +* Agent spans: `"service.name": "ai-agent"` +* MCP server spans: `"service.name": "mcp-{server-id}"` + +Both use the same `traceId`, allowing you to follow a request across service boundaries. + +=== Key attributes by layer + +Different layers expose different attributes: + +HTTP Server/Client layer: + +- `http.request.method`, `http.response.status_code` +- `server.address`, `url.path`, `url.full` +- `network.peer.address`, `network.peer.port` +- `http.request.body.size`, `http.response.body.size` + +AI SDK layer: + +- `gen_ai.operation.name`: Operation type (`invoke_agent`, `chat`, `execute_tool`) +- `gen_ai.conversation.id`: Links spans to the same conversation +- `gen_ai.agent.name`: Sub-agent name for multi-agent systems +- `gen_ai.provider.name`, `gen_ai.request.model`: LLM provider and model +- `gen_ai.usage.input_tokens`, `gen_ai.usage.output_tokens`: Token consumption +- `gen_ai.tool.name`, `gen_ai.tool.call.arguments`: Tool execution details +- `gen_ai.input.messages`, `gen_ai.output.messages`: Full LLM conversation context + +MCP Server layer: + +- Tool-specific attributes like `order_id`, `customer_id` +- `result_prefix`, `result_length`: Tool result metadata + +Redpanda Connect layer: + +- Component-specific attributes from your tool configuration + +Use `scope.name` to filter spans by layer when analyzing transcripts. + +== Understand the transcript structure + +Each span captures a unit of work. Here's what a typical MCP tool invocation looks like: + +[,json] +---- +{ + "traceId": "71cad555b35602fbb35f035d6114db54", + "spanId": "43ad6bc31a826afd", + "name": "http_processor", + "attributes": [ + {"key": "city_name", "value": {"stringValue": "london"}}, + {"key": "result_length", "value": {"intValue": "198"}} + ], + "startTimeUnixNano": "1765198415253280028", + "endTimeUnixNano": "1765198424660663434", + "instrumentationScope": {"name": "rpcn-mcp"}, + "status": {"code": 0, "message": ""} +} +---- + +Key elements to understand: + +* `traceId`: Links all spans belonging to the same request. Use this to follow a tool invocation through its entire lifecycle. +* `name`: The tool or operation name (`http_processor` in this example). This tells you which component was invoked. +* `instrumentationScope.name`: When this is `rpcn-mcp`, the span represents an MCP tool. When it's `redpanda-connect`, it's internal processing. +* `attributes`: Context about the operation, like input parameters or result metadata. +* `status.code`: `0` means success, `2` means error. + +=== Parent-child relationships + +Transcripts show how operations relate. A tool invocation (parent) may trigger internal operations (children): + +[,json] +---- +{ + "traceId": "71cad555b35602fbb35f035d6114db54", + "spanId": "ed45544a7d7b08d4", + "parentSpanId": "43ad6bc31a826afd", + "name": "http", + "instrumentationScope": {"name": "redpanda-connect"}, + "status": {"code": 0, "message": ""} +} +---- + +The `parentSpanId` links this child span to the parent tool invocation. Both share the same `traceId` so you can reconstruct the complete operation. + +== Error events in transcripts + +When something goes wrong, transcripts capture error details: + +[,json] +---- +{ + "traceId": "71cad555b35602fbb35f035d6114db54", + "spanId": "ba332199f3af6d7f", + "parentSpanId": "43ad6bc31a826afd", + "name": "http_request", + "events": [ + { + "name": "event", + "timeUnixNano": "1765198420254169629", + "attributes": [{"key": "error", "value": {"stringValue": "type"}}] + } + ], + "status": {"code": 0, "message": ""} +} +---- + +The `events` array captures what happened and when. Use `timeUnixNano` to see exactly when the error occurred within the operation. + +[[opentelemetry-traces-topic]] +== How Redpanda stores trace data + +The `redpanda.otel_traces` topic stores OpenTelemetry spans using Redpanda's Schema Registry wire format, with a custom Protobuf schema named `redpanda.otel_traces-value` that follows the https://opentelemetry.io/docs/specs/otel/protocol/[OpenTelemetry Protocol (OTLP)^] specification. Spans include attributes following OpenTelemetry https://opentelemetry.io/docs/specs/semconv/gen-ai/[semantic conventions for generative AI^], such as `gen_ai.operation.name` and `gen_ai.conversation.id`. The schema is automatically registered in the Schema Registry with the topic, so Kafka clients can consume and deserialize trace data correctly. + +Redpanda manages both the `redpanda.otel_traces` topic and its schema automatically. If you delete either the topic or the schema, they are recreated automatically. However, deleting the topic permanently deletes all trace data, and the topic comes back empty. Do not produce your own data to this topic. It is reserved for OpenTelemetry traces. + +=== Topic configuration and lifecycle + +The `redpanda.otel_traces` topic has a predefined retention policy. Configuration changes to this topic are not supported. If you modify settings, Redpanda reverts them to the default values. + +The topic persists in your cluster even after all agents and MCP servers are deleted, allowing you to retain historical trace data for analysis. + +Transcripts may contain sensitive information from your tool inputs and outputs. Consider implementing appropriate glossterm:ACL[access control lists (ACLs)] for the `redpanda.otel_traces` topic, and review the data in transcripts before sharing or exporting to external systems. + +== Transcripts compared to audit logs + +Transcripts are designed for observability and debugging, not audit logging or compliance. + +Transcripts provide: + +* Hierarchical view of request flow through your system (parent-child span relationships) +* Detailed timing information for performance analysis +* Ability to reconstruct execution paths and identify bottlenecks +* Insights into how operations flow through distributed systems + +Transcripts are not: + +* Immutable audit records for compliance purposes +* Designed for "who did what" accountability tracking + +For compliance and audit requirements, use the session and task topics for agents, which provide records of agent conversations and execution. + +== Next steps + +* xref:ai-agents:observability/view-transcripts.adoc[] +* xref:ai-agents:agents/monitor-agents.adoc[] +* xref:ai-agents:mcp/remote/monitor-mcp-servers.adoc[] diff --git a/modules/ai-agents/pages/observability/index.adoc b/modules/ai-agents/pages/observability/index.adoc new file mode 100644 index 000000000..cb4b7ea73 --- /dev/null +++ b/modules/ai-agents/pages/observability/index.adoc @@ -0,0 +1,6 @@ += Transcripts +:page-layout: index +:description: Monitor agent and MCP server execution using complete OpenTelemetry traces captured by Redpanda. + +{description} + diff --git a/modules/ai-agents/pages/observability/ingest-custom-traces.adoc b/modules/ai-agents/pages/observability/ingest-custom-traces.adoc new file mode 100644 index 000000000..96a8656bd --- /dev/null +++ b/modules/ai-agents/pages/observability/ingest-custom-traces.adoc @@ -0,0 +1,457 @@ += Ingest OpenTelemetry Traces from Custom Agents +:description: Configure a Redpanda Connect pipeline to ingest OTEL traces from custom agents into Redpanda for unified observability. +:page-topic-type: how-to +:learning-objective-1: Configure a Redpanda Connect pipeline to receive OpenTelemetry traces from custom agents via HTTP and publish them to redpanda.otel_traces +:learning-objective-2: Validate trace data format and compatibility with existing MCP server traces +:learning-objective-3: Secure the ingestion endpoint using authentication mechanisms + +When you build custom agents or instrument applications outside of Remote MCP servers and declarative agents, you can send OpenTelemetry (OTEL) traces to Redpanda for centralized observability. Deploy a Redpanda Connect pipeline as an HTTP ingestion endpoint to collect and publish traces to the `redpanda.otel_traces` topic. + +After reading this page, you will be able to: + +* [ ] {learning-objective-1} +* [ ] {learning-objective-2} +* [ ] {learning-objective-3} + +== Prerequisites + +* A BYOC cluster +* Ability to manage secrets in Redpanda Cloud +* The latest version of `rpk` installed +* Custom agent or application instrumented with OpenTelemetry SDK +* Basic understanding of the https://opentelemetry.io/docs/specs/semconv/gen-ai/gen-ai-agent-spans/[OpenTelemetry span format^] and https://opentelemetry.io/docs/specs/otlp/[OpenTelemetry Protocol (OTLP)^] + +== Quickstart for LangChain users + +If you're using LangChain with OpenTelemetry tracing, you can send traces to Redpanda's `redpanda.otel_traces` glossterm:topic[] to view them in the Transcripts view. + +. Configure LangChain's OpenTelemetry integration by following the https://docs.langchain.com/langsmith/trace-with-opentelemetry[LangChain documentation^]. + +. Deploy a Redpanda Connect pipeline using the `otlp_http` input to receive OTLP traces over HTTP. Create the pipeline in the **Connect** page of your cluster, or see the <> section below for a sample configuration. + +. Configure your OTEL exporter to send traces to your Redpanda Connect pipeline using environment variables: + +[,bash] +---- +# Configure LangChain OTEL integration +export LANGSMITH_OTEL_ENABLED=true +export LANGSMITH_TRACING=true + +# Send traces to Redpanda Connect pipeline +export OTEL_EXPORTER_OTLP_ENDPOINT="https://:4318" +export OTEL_EXPORTER_OTLP_HEADERS="Authorization=Bearer " +---- + +By default, traces are sent to both LangSmith and your Redpanda Connect pipeline. If you want to send traces only to Redpanda (not LangSmith), set: + +[,bash] +---- +export LANGSMITH_OTEL_ONLY="true" +---- + +Your LangChain application will send traces to the `redpanda.otel_traces` topic, making them visible in the Transcripts view in your cluster alongside Remote MCP server and declarative agent traces. + +For non-LangChain applications or custom instrumentation, continue with the sections below. + +== About custom trace ingestion + +Custom agents include applications you build with OpenTelemetry instrumentation that operate independently of Redpanda's Remote MCP servers or declarative agents. Examples include: + +* Custom AI agents built with LangChain, CrewAI, or other frameworks +* Applications with manual OpenTelemetry instrumentation +* Services that integrate with third-party AI platforms + +When these applications send traces to Redpanda's `redpanda.otel_traces` glossterm:topic[], you gain unified observability across all agentic components in your system. Custom agent transcripts appear alongside Remote MCP server and declarative agent transcripts in the Transcripts view, creating xref:ai-agents:observability/concepts.adoc#cross-service-transcripts[cross-service transcripts] that allow you to correlate operations and analyze end-to-end request flows. + +=== Trace format requirements + +Custom agents must emit traces in OTLP format. The `otlp_http` input accepts both OTLP Protobuf (`application/x-protobuf`) and JSON (`application/json`) payloads. For <>, use the `otlp_grpc` input. + +Each trace must follow the OTLP specification with these required fields: + +[cols="1,3", options="header"] +|=== +| Field | Description + +| `traceId` +| Hex-encoded unique identifier for the entire trace + +| `spanId` +| Hex-encoded unique identifier for this span + +| `name` +| Descriptive operation name + +| `startTimeUnixNano` and `endTimeUnixNano` +| Timing information in nanoseconds + +| `instrumentationScope` +| Identifies the library that created the span + +| `status` +| Operation status with code (0 = OK, 2 = ERROR) +|=== + +Optional but recommended fields: +- `parentSpanId` for hierarchical traces +- `attributes` for contextual information + +For complete trace structure details, see xref:ai-agents:observability/concepts.adoc#understand-the-transcript-structure[Understand the transcript structure]. + +== Configure the ingestion pipeline + +Create a Redpanda Connect pipeline that receives HTTP requests containing OTLP traces and publishes them to the `redpanda.otel_traces` topic. The pipeline uses the `otlp_http` input component, which is specifically designed to receive OpenTelemetry Protocol data. + +=== Create the pipeline configuration + +Create a pipeline configuration file that defines the OTLP HTTP ingestion endpoint. + +The `otlp_http` input component: + +* Exposes an OpenTelemetry Collector HTTP receiver +* Accepts traces at the standard `/v1/traces` endpoint +* Listens on port 4318 by default (standard OTLP/HTTP port) +* Converts incoming OTLP data into individual Redpanda OTEL v1 Protobuf messages and publishes them to the `redpanda.otel_traces` topic + +Create a file named `trace-ingestion.yaml`: + +[,yaml] +---- +input: + otlp_http: + address: "0.0.0.0:4318" + auth_token: "${secrets.TRACE_AUTH_TOKEN}" + max_body_size: 4194304 # 4MB default + read_timeout: "10s" + write_timeout: "10s" + +output: + redpanda: + seed_brokers: ["${REDPANDA_BROKERS}"] + topic: "redpanda.otel_traces" + compression: snappy + max_in_flight: 10 +---- + +The `otlp_http` input automatically handles format conversion, so no processors are needed for basic trace ingestion. Each span becomes a separate message in the `redpanda.otel_traces` topic. + +[[use-grpc]] +==== Alternative: Use gRPC instead of HTTP + +If your custom agent requires gRPC transport, use the `otlp_grpc` input instead: + +[,yaml] +---- +input: + otlp_grpc: + address: "0.0.0.0:4317" # Standard OTLP/gRPC port + auth_token: "${secrets.TRACE_AUTH_TOKEN}" + max_recv_msg_size: 4194304 + +output: + redpanda: + seed_brokers: ["${REDPANDA_BROKERS}"] + topic: "redpanda.otel_traces" + compression: snappy + max_in_flight: 10 +---- + +The gRPC input works identically to HTTP but uses Protobuf encoding over gRPC. Clients must include the authentication token in gRPC metadata as `authorization: Bearer `. + +=== Deploy the pipeline in Redpanda Cloud + +. In the *Connect* page of your Redpanda Cloud cluster, click *Create Pipeline*. +. For the input, select the *otlp_http* (or *otlp_grpc*) component. +. Skip to *Add a topic* and select `redpanda.otel_traces` from the list of existing topics. Leave the default advanced settings. +. In the *Add permissions* step, you can create a service account with write access to the `redpanda.otel_traces` topic. +. In the *Create pipeline* step, enter a name for your ingestion pipeline and paste your `trace-ingestion.yaml` configuration. Ensure that you've created the TRACE_AUTH_TOKEN secret you're referencing in the configuration. + +== Send traces from your custom agent + +Configure your custom agent to send OpenTelemetry traces to the ingestion endpoint. The endpoint accepts traces in OTLP format via HTTP on port 4318 at the `/v1/traces` path. + +=== Configure your OTEL exporter + +Install the OpenTelemetry SDK for your language and configure the OTLP exporter to target your Redpanda Connect pipeline endpoint. + +The exporter configuration requires: + +* **Endpoint**: Your pipeline's URL including the `/v1/traces` path +* **Headers**: Authorization header with your bearer token +* **Protocol**: HTTP to match the `otlp_http` input (or gRPC for `otlp_grpc`) + +.Python example for OTLP HTTP exporter +[,python] +---- +from opentelemetry import trace +from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter +from opentelemetry.sdk.trace import TracerProvider +from opentelemetry.sdk.trace.export import BatchSpanProcessor +from opentelemetry.sdk.resources import Resource + +# Configure resource attributes to identify your agent +resource = Resource(attributes={ + "service.name": "my-custom-agent", + "service.version": "1.0.0" +}) + +# Configure the OTLP HTTP exporter +exporter = OTLPSpanExporter( + endpoint=":4318/v1/traces", + headers={"Authorization": "Bearer YOUR_TOKEN"} +) + +# Set up tracing with batch processing +provider = TracerProvider(resource=resource) +processor = BatchSpanProcessor(exporter) +provider.add_span_processor(processor) +trace.set_tracer_provider(provider) + +# Use the tracer with GenAI semantic conventions +tracer = trace.get_tracer(__name__) +with tracer.start_as_current_span( + "invoke_agent my-assistant", + kind=trace.SpanKind.INTERNAL +) as span: + # Set GenAI semantic convention attributes + span.set_attribute("gen_ai.operation.name", "invoke_agent") + span.set_attribute("gen_ai.agent.name", "my-assistant") + span.set_attribute("gen_ai.provider.name", "openai") + span.set_attribute("gen_ai.request.model", "gpt-4") + + # Your agent logic here + result = process_request() + + # Set token usage if available + span.set_attribute("gen_ai.usage.input_tokens", 150) + span.set_attribute("gen_ai.usage.output_tokens", 75) +---- + +.Node.js example for OTLP HTTP exporter +[,javascript] +---- +const { NodeTracerProvider } = require('@opentelemetry/sdk-trace-node'); +const { OTLPTraceExporter } = require('@opentelemetry/exporter-trace-otlp-http'); +const { BatchSpanProcessor } = require('@opentelemetry/sdk-trace-base'); +const { Resource } = require('@opentelemetry/resources'); +const { trace, SpanKind } = require('@opentelemetry/api'); + +// Configure resource +const resource = new Resource({ + 'service.name': 'my-custom-agent', + 'service.version': '1.0.0' +}); + +// Configure OTLP HTTP exporter +const exporter = new OTLPTraceExporter({ + url: 'https://your-pipeline-endpoint.redpanda.cloud:4318/v1/traces', + headers: { + 'Authorization': 'Bearer YOUR_TOKEN' + } +}); + +// Set up provider +const provider = new NodeTracerProvider({ resource }); +provider.addSpanProcessor(new BatchSpanProcessor(exporter)); +provider.register(); + +// Use the tracer with GenAI semantic conventions +const tracer = trace.getTracer('my-agent'); +const span = tracer.startSpan('invoke_agent my-assistant', { + kind: SpanKind.INTERNAL +}); + +// Set GenAI semantic convention attributes +span.setAttribute('gen_ai.operation.name', 'invoke_agent'); +span.setAttribute('gen_ai.agent.name', 'my-assistant'); +span.setAttribute('gen_ai.provider.name', 'openai'); +span.setAttribute('gen_ai.request.model', 'gpt-4'); + +// Your agent logic +processRequest().then(result => { + // Set token usage if available + span.setAttribute('gen_ai.usage.input_tokens', 150); + span.setAttribute('gen_ai.usage.output_tokens', 75); + span.end(); +}); +---- + +TIP: Use environment variables for the endpoint URL and authentication token to keep credentials out of your code. + +=== Use recommended semantic conventions + +The Transcripts view recognizes https://opentelemetry.io/docs/specs/semconv/gen-ai/gen-ai-agent-spans/[OpenTelemetry semantic conventions for GenAI operations^]. Following these conventions ensures your traces display correctly with proper attribution, token usage, and operation identification. + +==== Required attributes for agent operations + +Following the OpenTelemetry semantic conventions, agent spans should include these attributes: + +* Operation identification: +** `gen_ai.operation.name` - Set to `"invoke_agent"` for agent execution spans +** `gen_ai.agent.name` - Human-readable name of your agent (displayed in Transcripts view) +* LLM provider details: +** `gen_ai.provider.name` - LLM provider identifier (e.g., `"openai"`, `"anthropic"`, `"gcp.vertex_ai"`) +** `gen_ai.request.model` - Model name (e.g., `"gpt-4"`, `"claude-sonnet-4"`) +* Token usage (for cost tracking): +** `gen_ai.usage.input_tokens` - Number of input tokens consumed +** `gen_ai.usage.output_tokens` - Number of output tokens generated +* Session correlation: +** `gen_ai.conversation.id` - Identifier linking related agent invocations in the same conversation + +==== Example with semantic conventions + +.Python example with GenAI semantic conventions +[,python] +---- +from opentelemetry import trace + +tracer = trace.get_tracer(__name__) + +# Create an agent invocation span +with tracer.start_as_current_span( + "invoke_agent my-assistant", + kind=trace.SpanKind.INTERNAL +) as span: + # Set required attributes + span.set_attribute("gen_ai.operation.name", "invoke_agent") + span.set_attribute("gen_ai.agent.name", "my-assistant") + span.set_attribute("gen_ai.provider.name", "openai") + span.set_attribute("gen_ai.request.model", "gpt-4") + span.set_attribute("gen_ai.conversation.id", "session-abc-123") + + # Your agent logic here + response = process_agent_request(user_input) + + # Set token usage after completion + span.set_attribute("gen_ai.usage.input_tokens", response.usage.input_tokens) + span.set_attribute("gen_ai.usage.output_tokens", response.usage.output_tokens) +---- + +.Node.js example with GenAI semantic conventions +[,javascript] +---- +const { trace } = require('@opentelemetry/api'); + +const tracer = trace.getTracer('my-agent'); + +const span = tracer.startSpan('invoke_agent my-assistant', { + kind: SpanKind.INTERNAL +}); + +// Set required attributes +span.setAttribute('gen_ai.operation.name', 'invoke_agent'); +span.setAttribute('gen_ai.agent.name', 'my-assistant'); +span.setAttribute('gen_ai.provider.name', 'openai'); +span.setAttribute('gen_ai.request.model', 'gpt-4'); +span.setAttribute('gen_ai.conversation.id', 'session-abc-123'); + +// Your agent logic +const response = await processAgentRequest(userInput); + +// Set token usage +span.setAttribute('gen_ai.usage.input_tokens', response.usage.inputTokens); +span.setAttribute('gen_ai.usage.output_tokens', response.usage.outputTokens); + +span.end(); +---- + +=== Validate trace format + +Before deploying to production, verify your traces match the expected format. + +//// + +* How to validate trace format against schema +* Common format issues and solutions +* Tools for format validation +==== + +//// + +Test your agent locally and inspect the traces it produces: + +[,bash] +---- +# Example validation steps + +---- + +== Verify trace ingestion + +After deploying your pipeline and configuring your custom agent, verify traces are flowing correctly. + +=== Consume traces from the topic + +Check that traces are being published to the `redpanda.otel_traces` topic: + +[,bash] +---- +rpk topic consume redpanda.otel_traces --offset end -n 10 +---- + +You can also view the `redpanda.otel_traces` topic in the *Topics* page of Redpanda Cloud UI. + +Look for spans with your custom `instrumentationScope.name` to identify traces from your agent. + +=== View traces in Transcripts + +After your custom agent sends traces through the pipeline, they appear in your cluster's *Agentic AI > Transcripts* view alongside traces from Remote MCP servers and declarative agents. + +==== Identify custom agent transcripts + +Custom agent transcripts are identified by the `service.name` resource attribute, which differs from Redpanda's built-in services (`ai-agent` for declarative agents, `mcp-{server-id}` for MCP servers). See xref:ai-agents:observability/concepts.adoc#cross-service-transcripts[Cross-service transcripts] to understand how the `service.name` attribute identifies transcript sources. + +Your custom agent transcripts display with: + +* **Service name** in the service filter dropdown (from your `service.name` resource attribute) +* **Agent name** in span details (from the `gen_ai.agent.name` attribute) +* **Operation names** like `"invoke_agent my-assistant"` indicating agent executions + +For detailed instructions on filtering, searching, and navigating transcripts in the UI, see xref:ai-agents:observability/view-transcripts.adoc[View Transcripts]. + +==== Token usage tracking + +If your spans include the recommended token usage attributes (`gen_ai.usage.input_tokens` and `gen_ai.usage.output_tokens`), they display in the summary panel's token usage section. This enables cost tracking alongside Remote MCP server and declarative agent transcripts. + +== Troubleshooting + +//// +* Common issues and solutions +* How to monitor pipeline health +* Log locations and debugging techniques +* Failure modes and diagnostics + +//// + +=== Pipeline not receiving requests + +If your custom agent cannot reach the ingestion endpoint: + +. Verify the endpoint URL includes the correct port and path: + * HTTP: `https://your-endpoint:4318/v1/traces` + * gRPC: `https://your-endpoint:4317` +. Check network connectivity and firewall rules. +. Ensure authentication tokens are valid and properly formatted in the `Authorization: Bearer ` header (HTTP) or `authorization` metadata field (gRPC). +. Verify the Content-Type header matches your data format (`application/x-protobuf` or `application/json`). +. Review pipeline logs for connection errors or authentication failures. + +=== Traces not appearing in topic + +If requests succeed but traces do not appear in `redpanda.otel_traces`: + +. Check pipeline output configuration. +. Verify topic permissions. +. Validate trace format matches OTLP specification. + +== Limitations + +* The `otlp_http` and `otlp_grpc` inputs accept only traces, logs, and metrics, not profiles. +* Only traces are published to the `redpanda.otel_traces` topic. +* Exceeded rate limits return HTTP 429 (HTTP) or ResourceExhausted status (gRPC). + +== Next steps + +* xref:ai-agents:observability/view-transcripts.adoc[] +* xref:ai-agents:agents/monitor-agents.adoc[Observability for declarative agents] +* https://docs.redpanda.com/redpanda-connect/components/inputs/otlp_http/[OTLP HTTP input reference^] - Complete configuration options for the `otlp_http` component +* https://docs.redpanda.com/redpanda-connect/components/inputs/otlp_grpc/[OTLP gRPC input reference^] - Alternative gRPC-based trace ingestion diff --git a/modules/ai-agents/pages/observability/view-transcripts.adoc b/modules/ai-agents/pages/observability/view-transcripts.adoc new file mode 100644 index 000000000..851c9f9bc --- /dev/null +++ b/modules/ai-agents/pages/observability/view-transcripts.adoc @@ -0,0 +1,104 @@ += View Transcripts +:description: Learn how to filter and navigate the Transcripts interface to investigate agent execution traces using multiple detail views and interactive timeline navigation. +:page-topic-type: how-to +:personas: agent_developer, platform_admin +:learning-objective-1: Filter transcripts to find specific execution traces +:learning-objective-2: Navigate between detail views to inspect span information at different levels +:learning-objective-3: Use the timeline interactively to navigate to specific time periods + +The Transcripts view provides filtering and navigation capabilities for investigating agent, MCP server, and AI Gateway execution glossterm:transcript[transcripts]. Use this view to quickly locate specific operations, analyze performance patterns, and debug issues across glossterm:tool[] invocations, LLM calls, and glossterm:agent[] reasoning steps. + +After reading this page, you will be able to: + +* [ ] {learning-objective-1} +* [ ] {learning-objective-2} +* [ ] {learning-objective-3} + +For basic orientation on monitoring each Redpanda Agentic Data Plane component, see: + +* xref:ai-agents:ai-gateway/observability-metrics.adoc[] +* xref:ai-agents:agents/monitor-agents.adoc[] +* xref:ai-agents:mcp/remote/monitor-mcp-servers.adoc[] + +For conceptual background on what transcripts capture and how glossterm:span[spans] are organized hierarchically, see xref:ai-agents:observability/concepts.adoc[]. + +== Prerequisites + +* xref:ai-agents:agents/create-agent.adoc[Running agent] or xref:ai-agents:mcp/remote/quickstart.adoc[MCP server] with at least one execution +* Access to the Transcripts view (requires appropriate permissions to read the `redpanda.otel_traces` topic) + +== Navigate the Transcripts interface + +=== Use the interactive timeline + +Use the timeline visualization to quickly identify when errors began or patterns changed, and navigate directly to transcripts from particular timestamps. + +When viewing time periods with many transcripts (hundreds or thousands), the timeline displays a subset of the data to maintain performance and usability. The timeline bar indicates the actual time range of currently visible data, which may be narrower than your <>. + +TIP: See xref:ai-agents:agents/monitor-agents.adoc[] and xref:ai-agents:mcp/remote/monitor-mcp-servers.adoc[] to learn basic execution patterns and health indicators to investigate. + +=== Filter transcripts + +Use filters to narrow down transcripts and quickly locate specific executions. When you use any of the filters, the transcript list updates to show only matching results. You can toggle *Full transcript* on to see the complete execution context, in grayed-out text, for the filtered transcripts. + +==== Filter by attribute + +// Add details when available + +==== Adjust time range + +Use the time range selector to focus on specific time periods (from the last five minutes up to the last 24 hours): + +* View recent executions (for example, over the last hour) to monitor real-time activity +* Expand to longer periods for trend analysis over the last day +* Narrow to specific time windows when investigating issues that occurred at known times + +== Inspect span details + +Each row in the transcript table represents a high-level agent or MCP server request flow. Expand each parent glossterm:span[] to see the xref:ai-agents:observability/concepts.adoc#agent-transcript-hierarchy[hierarchical structure] of nested operations, including tool calls, LLM interactions, and internal processing steps. Parent-child spans show how operations relate: for example, an agent invocation (parent) triggers LLM calls and tool executions (children). + +When agents invoke remote MCP servers, transcripts fold together under a tree structure to provide a unified view of the complete operation across service boundaries. The glossterm:trace ID[] originates at the initial request touchpoint and propagates across all involved services, linking spans from both the agent and MCP server under a single transcript. Use the tree view to follow the trace flow across multiple services and understand the complete request lifecycle. + +If you use external agents that directly invoke MCP servers in the Redpanda Agentic Data Plane, you may only see MCP-level parent transcripts, unless you have configured the agents to also emit traces to the Redpanda glossterm:OpenTelemetry[OTEL] ingestion pipeline. + +Selected spans display detailed information at multiple levels, from high-level summaries to complete raw data: + +* Start with summary view for quick assessment +* Inspect attributes for detailed investigation +* Use raw data when you need complete information + +=== Summary view + +The summary panel provides high-level span information: + +* Total nested operations (span count) and execution time +* Token usage for LLM operations +* Counts of LLM calls and tool calls + +Click on an individual span to drill down into the execution context: + +* View the full conversation history saved for that session, including user prompts, configured xref:ai-agents:agents/create-agent.adoc#write-the-system-prompt[system prompts] to guide agent behavior, and LLM outputs +* Inspect individual tool calls made by the agent and any of its sub-agents, including request arguments and responses + +TIP: Expand the summary panel to full view to easily read long conversations. + +=== Detailed attributes view + +The attributes view shows structured metadata for each transcript span. Use this view to inspect span attributes and understand the context of each operation. See xref:ai-agents:observability/concepts.adoc#key-attributes-by-layer[Transcripts and AI Observability] for details on standard attributes by instrumentation layer. + +=== Raw data view + +The raw data view provides the complete span structure: + +* Full OpenTelemetry span in JSON format +* All fields including those not displayed in summary or attributes views +* Structured data suitable for export or programmatic access + +You can also view the raw transcript data in the `redpanda.otel_traces` topic. + +== Next steps + +* xref:ai-agents:agents/monitor-agents.adoc[] +* xref:ai-agents:mcp/remote/monitor-mcp-servers.adoc[] +* xref:ai-agents:observability/concepts.adoc[] +* xref:ai-agents:agents/troubleshooting.adoc[] diff --git a/modules/ai-agents/partials/ai-gateway-byoc-note.adoc b/modules/ai-agents/partials/ai-gateway-byoc-note.adoc new file mode 100644 index 000000000..86fdf86a6 --- /dev/null +++ b/modules/ai-agents/partials/ai-gateway-byoc-note.adoc @@ -0,0 +1 @@ +NOTE: The Agentic Data Plane is supported on BYOC clusters running with AWS and Redpanda version 25.3 and later. diff --git a/modules/ai-agents/partials/migration-guide.adoc b/modules/ai-agents/partials/migration-guide.adoc new file mode 100644 index 000000000..3c5c228d4 --- /dev/null +++ b/modules/ai-agents/partials/migration-guide.adoc @@ -0,0 +1,929 @@ += Migrate to AI Gateway +:description: Step-by-step migration guide to transition existing applications from direct LLM provider integrations to Redpanda AI Gateway with minimal disruption. +:page-topic-type: how-to +:personas: app_developer, platform_admin +:learning-objective-1: Migrate LLM integrations to AI Gateway with zero downtime using feature flags +:learning-objective-2: Verify gateway connectivity and compare performance metrics +:learning-objective-3: Roll back to direct integration if issues arise during migration + +include::ai-agents:partial$ai-gateway-byoc-note.adoc[] + +This guide helps you migrate existing applications from direct LLM provider integrations (OpenAI, Anthropic, and others) to Redpanda AI Gateway. Design the migration to be incremental and reversible, allowing you to test thoroughly before fully committing. + +**Downtime required:** None (supports parallel operation) + +**Rollback difficulty:** Easy (feature flag or environment variable) + +== Prerequisites + +Before migrating, ensure you have: + +* AI Gateway configured in your Redpanda Cloud account +* Enabled providers and models in AI Gateway +* Created gateway with appropriate policies +* Your gateway ID (`rp-aigw-id` header value) +* Your gateway endpoint URL + +//// +PLACEHOLDER: Add verification command, for example: + +[source,bash] +---- +curl https://{GATEWAY_ENDPOINT}/v1/models \ + -H "Authorization: Bearer {YOUR_TOKEN}" \ + -H "rp-aigw-id: {GATEWAY_ID}" +---- + +Expected output: List of enabled models +//// + +== Migration strategy + +=== Recommended approach: Parallel operation + +Run both direct and gateway-routed requests simultaneously to validate behavior before full cutover. + +[source,text] +---- +┌─────────────────┐ +│ Application │ +└────────┬────────┘ + │ + ┌────▼─────┐ + │ Feature │ + │ Flag │ + └────┬─────┘ + │ + ┌────▼──────────────┐ + │ │ +┌───▼─────┐ ┌─────▼─────┐ +│ Direct │ │ Gateway │ +│Provider │ │ Route │ +└─────────┘ └───────────┘ +---- + + +Benefits: + +* No downtime +* Easy rollback +* Compare results side-by-side +* Gradual traffic shift + +== Step-by-step migration + +=== Add environment variables + +Add gateway configuration to your environment without removing existing provider keys (yet). + +*.env (or equivalent)* +[source,bash] +---- +# Existing (keep these for now) +OPENAI_API_KEY=sk-... +ANTHROPIC_API_KEY=sk-ant-... + +# New gateway configuration +REDPANDA_AI_GATEWAY_URL=https://{GATEWAY_ENDPOINT} +REDPANDA_AI_GATEWAY_ID={GATEWAY_ID} +REDPANDA_AI_GATEWAY_TOKEN={YOUR_TOKEN} + +# Feature flag (start with gateway disabled) +USE_AI_GATEWAY=false +---- + + +=== Update your code + +==== Option A: OpenAI SDK (recommended for most use cases) + +Before (Direct OpenAI) + +[source,python] +---- +from openai import OpenAI + +client = OpenAI( + api_key=os.getenv("OPENAI_API_KEY") +) + +response = client.chat.completions.create( + model="gpt-4o", + messages=[{"role": "user", "content": "Hello"}] +) +---- + + +After (Gateway-routed with feature flag) + +[source,python] +---- +from openai import OpenAI +import os + +# Feature flag determines which client to use +use_gateway = os.getenv("USE_AI_GATEWAY", "false").lower() == "true" + +if use_gateway: + client = OpenAI( + base_url=os.getenv("REDPANDA_AI_GATEWAY_URL"), + api_key=os.getenv("REDPANDA_AI_GATEWAY_TOKEN"), + default_headers={"rp-aigw-id": os.getenv("REDPANDA_AI_GATEWAY_ID")} + ) + model = "openai/gpt-4o" # Add vendor prefix +else: + client = OpenAI( + api_key=os.getenv("OPENAI_API_KEY") + ) + model = "gpt-4o" # Original model name + +response = client.chat.completions.create( + model=model, + messages=[{"role": "user", "content": "Hello"}] +) +---- + + +Better: Abstraction function + +[source,python] +---- +from openai import OpenAI +import os + +def get_llm_client(): + """Returns configured OpenAI client (direct or gateway-routed)""" + use_gateway = os.getenv("USE_AI_GATEWAY", "false").lower() == "true" + + if use_gateway: + return OpenAI( + base_url=os.getenv("REDPANDA_AI_GATEWAY_URL"), + api_key=os.getenv("REDPANDA_AI_GATEWAY_TOKEN"), + default_headers={"rp-aigw-id": os.getenv("REDPANDA_AI_GATEWAY_ID")} + ) + else: + return OpenAI(api_key=os.getenv("OPENAI_API_KEY")) + +def get_model_name(base_model: str) -> str: + """Returns model name with vendor prefix if using gateway""" + use_gateway = os.getenv("USE_AI_GATEWAY", "false").lower() == "true" + return f"openai/{base_model}" if use_gateway else base_model + +# Usage +client = get_llm_client() +response = client.chat.completions.create( + model=get_model_name("gpt-4o"), + messages=[{"role": "user", "content": "Hello"}] +) +---- + + +==== Option B: Anthropic SDK + +Before (Direct Anthropic) + +[source,python] +---- +from anthropic import Anthropic + +client = Anthropic( + api_key=os.getenv("ANTHROPIC_API_KEY") +) + +response = client.messages.create( + model="claude-sonnet-3.5", + max_tokens=1024, + messages=[{"role": "user", "content": "Hello"}] +) +---- + + +After (Gateway via OpenAI-compatible wrapper) + +Because AI Gateway provides an OpenAI-compatible endpoint, we recommend migrating Anthropic SDK usage to OpenAI SDK for consistency: + +[source,python] +---- +from openai import OpenAI +import os + +use_gateway = os.getenv("USE_AI_GATEWAY", "false").lower() == "true" + +if use_gateway: + # Use OpenAI SDK with gateway + client = OpenAI( + base_url=os.getenv("REDPANDA_AI_GATEWAY_URL"), + api_key=os.getenv("REDPANDA_AI_GATEWAY_TOKEN"), + default_headers={"rp-aigw-id": os.getenv("REDPANDA_AI_GATEWAY_ID")} + ) + + response = client.chat.completions.create( + model="anthropic/claude-sonnet-3.5", + max_tokens=1024, + messages=[{"role": "user", "content": "Hello"}] + ) +else: + # Keep existing Anthropic SDK + from anthropic import Anthropic + client = Anthropic(api_key=os.getenv("ANTHROPIC_API_KEY")) + + response = client.messages.create( + model="claude-sonnet-3.5", + max_tokens=1024, + messages=[{"role": "user", "content": "Hello"}] + ) +---- + + +Alternative: Use OpenAI client for OpenAI-compatible gateway + +[source,python] +---- +from openai import OpenAI + +use_gateway = os.getenv("USE_AI_GATEWAY", "false").lower() == "true" + +if use_gateway: + client = OpenAI( + base_url=os.getenv("REDPANDA_AI_GATEWAY_URL"), + api_key=os.getenv("REDPANDA_AI_GATEWAY_TOKEN"), + default_headers={"rp-aigw-id": os.getenv("REDPANDA_AI_GATEWAY_ID")} + ) +else: + from anthropic import Anthropic + client = Anthropic(api_key=os.getenv("ANTHROPIC_API_KEY")) +---- + + +==== Option C: Multiple providers + +Before (Separate SDKs) + +[source,python] +---- +from openai import OpenAI +from anthropic import Anthropic + +openai_client = OpenAI(api_key=os.getenv("OPENAI_API_KEY")) +anthropic_client = Anthropic(api_key=os.getenv("ANTHROPIC_API_KEY")) + +# Different code paths +if use_openai: + response = openai_client.chat.completions.create(...) +else: + response = anthropic_client.messages.create(...) +---- + + +After (Unified via Gateway) + +[source,python] +---- +from openai import OpenAI + +# Single client for all providers +client = OpenAI( + base_url=os.getenv("REDPANDA_AI_GATEWAY_URL"), + api_key=os.getenv("REDPANDA_AI_GATEWAY_TOKEN"), + default_headers={"rp-aigw-id": os.getenv("REDPANDA_AI_GATEWAY_ID")} +) + +# Same code, different models +if use_openai: + response = client.chat.completions.create( + model="openai/gpt-4o", + messages=[...] + ) +else: + response = client.chat.completions.create( + model="anthropic/claude-sonnet-3.5", + messages=[...] + ) +---- + + +=== Test gateway connection + +Before changing the feature flag, verify gateway connectivity: + +Python Test Script + +[source,python] +---- +from openai import OpenAI +import os + +def test_gateway_connection(): + client = OpenAI( + base_url=os.getenv("REDPANDA_AI_GATEWAY_URL"), + api_key=os.getenv("REDPANDA_AI_GATEWAY_TOKEN"), + default_headers={"rp-aigw-id": os.getenv("REDPANDA_AI_GATEWAY_ID")} + ) + + try: + response = client.chat.completions.create( + model="openai/gpt-4o-mini", # Use cheap model for testing + messages=[{"role": "user", "content": "Test"}], + max_tokens=10 + ) + print("✅ Gateway connection successful") + print(f"Response: {response.choices[0].message.content}") + return True + except Exception as e: + print(f"❌ Gateway connection failed: {e}") + return False + +if __name__ == "__main__": + test_gateway_connection() +---- + + +Expected output: + +[source,text] +---- +Gateway connection successful +Response: Hello +---- + + +Common issues: + +* `401 Unauthorized` → Check `REDPANDA_AI_GATEWAY_TOKEN` +* `404 Not Found` → Check `REDPANDA_AI_GATEWAY_URL` (should end with `/v1/chat/completions` or base path) +* `Model not found` → Ensure model is enabled in gateway configuration +* No `rp-aigw-id` header → Verify header is set in `default_headers` + +=== Verify in observability dashboard + +After successful test: + +1. Open AI Gateway observability dashboard +2. Navigate to // PLACEHOLDER: specific UI path, for example, "Gateways → {GATEWAY_NAME} → Logs" +3. Verify your test request appears +4. Check fields: + * Model: `openai/gpt-4o-mini` + * Provider: OpenAI + * Status: 200 + * Token count: ~10 prompt + ~10 completion + * Cost: // PLACEHOLDER: expected cost + +*If request doesn't appear*: Verify gateway ID and authentication token are correct. + +=== Enable gateway for subset of traffic + +Gradually roll out gateway usage: + +Staged rollout strategy: + +1. *Week 1*: Internal testing only (dev team accounts) +2. *Week 2*: 10% of production traffic +3. *Week 3*: 50% of production traffic +4. *Week 4*: 100% of production traffic + +Implementation options: + +Option A: Environment-based + +[source,python] +---- +# Enable gateway in staging first +use_gateway = os.getenv("ENVIRONMENT") in ["staging", "production"] +---- + + +Option B: Percentage-based + +[source,python] +---- +import random + +# Route 10% of traffic through gateway +use_gateway = random.random() < 0.10 +---- + + +Option C: User-based + +[source,python] +---- +# Enable for internal users first +use_gateway = user.email.endswith("@yourcompany.com") +---- + + +Option D: Feature flag service (recommended) + +[source,python] +---- +# LaunchDarkly, Split.io, etc. +use_gateway = feature_flags.is_enabled("ai-gateway", user_context) +---- + + +=== Monitor and compare + +During parallel operation, compare metrics: + +Metrics to monitor: + +[cols="2,1,1,3"] +|=== +| Metric | Direct | Gateway | Notes + +| Success rate +| // track +| // track +| Should be identical + +| Latency p50 +| // track +| // track +| Gateway adds ~// PLACEHOLDER: Xms + +| Latency p99 +| // track +| // track +| Watch for outliers + +| Error rate +| // track +| // track +| Should be identical + +| Cost per 1K requests +| // track +| // track +| Compare estimated costs +|=== + +Monitoring code example: + +[source,python] +---- +import time + +def call_llm_with_metrics(use_gateway: bool, model: str, messages: list): + start_time = time.time() + + try: + client = get_llm_client(use_gateway) + response = client.chat.completions.create( + model=model, + messages=messages + ) + + latency = time.time() - start_time + + # Log metrics + metrics.record("llm.request.success", 1, tags={ + "routing": "gateway" if use_gateway else "direct", + "model": model + }) + metrics.record("llm.request.latency", latency, tags={ + "routing": "gateway" if use_gateway else "direct" + }) + + return response + + except Exception as e: + metrics.record("llm.request.error", 1, tags={ + "routing": "gateway" if use_gateway else "direct", + "error": str(e) + }) + raise +---- + + +=== Full cutover + +Once metrics confirm gateway reliability: + +1. Set feature flag to 100%: ++ +[source,bash] +---- +USE_AI_GATEWAY=true +---- + +2. Deploy updated configuration + +3. Monitor for 24-48 hours + +4. Remove direct provider credentials (optional, for security): ++ +[source,bash] +---- +# .env +# OPENAI_API_KEY=sk-... # Remove after confirming gateway stability +# ANTHROPIC_API_KEY=sk-ant-... # Remove after confirming gateway stability + +REDPANDA_AI_GATEWAY_URL=https://{GATEWAY_ENDPOINT} +REDPANDA_AI_GATEWAY_ID={GATEWAY_ID} +REDPANDA_AI_GATEWAY_TOKEN={YOUR_TOKEN} +---- + +5. Remove direct integration code (optional, for cleanup): ++ +[source,python] +---- +# Remove feature flag logic, keep only gateway path +client = OpenAI( + base_url=os.getenv("REDPANDA_AI_GATEWAY_URL"), + api_key=os.getenv("REDPANDA_AI_GATEWAY_TOKEN"), + default_headers={"rp-aigw-id": os.getenv("REDPANDA_AI_GATEWAY_ID")} +) +---- + +== Rollback procedure + +If issues arise, rollback is simple: + +Emergency rollback (< 1 minute): + +[source,bash] +---- +# Set feature flag back to false +USE_AI_GATEWAY=false + +# Restart application (if needed) +---- + + +Gradual rollback: + +[source,python] +---- +# Reduce gateway traffic percentage +use_gateway = random.random() < 0.50 # Back to 50% +use_gateway = random.random() < 0.10 # Back to 10% +use_gateway = False # Back to 0% +---- + + +*Keep direct provider credentials until you're confident in gateway stability.* + +== Framework-specific migration + +[tabs] +====== +LangChain:: ++ +-- +Before + +[source,python] +---- +from langchain_openai import ChatOpenAI + +llm = ChatOpenAI( + model="gpt-4o", + api_key=os.getenv("OPENAI_API_KEY") +) +---- + +After + +[source,python] +---- +from langchain_openai import ChatOpenAI + +use_gateway = os.getenv("USE_AI_GATEWAY", "false").lower() == "true" + +if use_gateway: + llm = ChatOpenAI( + model="openai/gpt-4o", + base_url=os.getenv("REDPANDA_AI_GATEWAY_URL"), + api_key=os.getenv("REDPANDA_AI_GATEWAY_TOKEN"), + default_headers={"rp-aigw-id": os.getenv("REDPANDA_AI_GATEWAY_ID")} + ) +else: + llm = ChatOpenAI( + model="gpt-4o", + api_key=os.getenv("OPENAI_API_KEY") + ) +---- +-- + +LlamaIndex:: ++ +-- +Before + +[source,python] +---- +from llama_index.llms.openai import OpenAI + +llm = OpenAI(model="gpt-4o") +---- + +After + +[source,python] +---- +from llama_index.llms.openai import OpenAI + +use_gateway = os.getenv("USE_AI_GATEWAY", "false").lower() == "true" + +if use_gateway: + llm = OpenAI( + model="openai/gpt-4o", + api_base=os.getenv("REDPANDA_AI_GATEWAY_URL"), + api_key=os.getenv("REDPANDA_AI_GATEWAY_TOKEN"), + default_headers={"rp-aigw-id": os.getenv("REDPANDA_AI_GATEWAY_ID")} + ) +else: + llm = OpenAI(model="gpt-4o") +---- +-- + +Vercel AI SDK:: ++ +-- +Before + +[source,typescript] +---- +import { openai } from '@ai-sdk/openai'; + +const model = openai('gpt-4o'); +---- + +After + +[source,typescript] +---- +import { createOpenAI } from '@ai-sdk/openai'; +import { openai } from '@ai-sdk/openai'; + +const useGateway = process.env.USE_AI_GATEWAY === 'true'; + +const model = useGateway + ? createOpenAI({ + baseURL: process.env.REDPANDA_AI_GATEWAY_URL, + apiKey: process.env.REDPANDA_AI_GATEWAY_TOKEN, + headers: { + 'rp-aigw-id': process.env.REDPANDA_AI_GATEWAY_ID, + }, + })('openai/gpt-4o') + : openai('gpt-4o'); +---- +-- +====== + +== Migration checklist + +Use this checklist to track your migration: + +*Prerequisites* + + * [ ] Gateway configured and tested + * [ ] Providers enabled + * [ ] Models enabled + * [ ] Gateway ID and endpoint URL obtained + +*Code Changes* + + * [ ] Environment variables added + * [ ] Feature flag implemented + * [ ] Client initialization updated + * [ ] Model name prefix added (vendor/model_id) + * [ ] Headers added (rp-aigw-id) + +*Testing* + + * [ ] Gateway connection test passes + * [ ] Test request visible in observability dashboard + * [ ] Integration tests pass with gateway + * [ ] End-to-end tests pass with gateway + +*Staged rollout* + + * [ ] Week 1: Internal testing (dev team only) + * [ ] Week 2: 10% production traffic + * [ ] Week 3: 50% production traffic + * [ ] Week 4: 100% production traffic + +*Monitoring* + + * [ ] Success rate comparison (direct vs gateway) + * [ ] Latency comparison (direct vs gateway) + * [ ] Error rate comparison (direct vs gateway) + * [ ] Cost comparison (direct vs gateway) + +*Cleanup* (optional, after 30 days stable) + + * [ ] Remove direct provider credentials + * [ ] Remove feature flag logic + * [ ] Update documentation + * [ ] Archive direct integration code + +== Common migration issues + +=== "Model not found" error + +Symptom: +[source,text] +---- +Error: Model 'openai/gpt-4o' not found +---- + + +Causes: + +1. Model not enabled in gateway configuration +2. Wrong model name format (missing vendor prefix) +3. Typo in model name + +Solution: + +1. Verify model is enabled: // PLACEHOLDER: UI path or CLI command +2. Confirm format: `vendor/model_id` (for example, `openai/gpt-4o`, not `gpt-4o`) +3. Check supported models: // PLACEHOLDER: link to model catalog + +=== Missing `rp-aigw-id` header + +Symptom: + +[source,text] +---- +Error: Missing required header 'rp-aigw-id' +---- + + +Solution: + +[source,python] +---- +# Ensure header is set in default_headers +client = OpenAI( + base_url=os.getenv("REDPANDA_AI_GATEWAY_URL"), + api_key=os.getenv("REDPANDA_AI_GATEWAY_TOKEN"), + default_headers={"rp-aigw-id": os.getenv("REDPANDA_AI_GATEWAY_ID")} # ← Required +) +---- + + +=== Higher latency than expected + +Expected gateway overhead: // PLACEHOLDER: Xms p50, Yms p99 + +If latency is significantly higher: + +1. Check geographic routing (gateway → provider region) +2. Verify provider pool configuration (no unnecessary fallbacks) +3. Review CEL routing complexity +4. Check for rate limiting (adds retry latency) + +Solution: Review geographic routing and provider pool configuration. + +=== Requests not appearing in dashboard + +Causes: + +1. Wrong gateway ID +2. Request failed before reaching gateway +3. UI delay (logs may take // PLACEHOLDER: Xs to appear) + +Solution: Verify gateway ID and check for UI delay (logs may take a few seconds to appear). + +=== Different response format + +Symptom: Response structure differs between direct and gateway + +// PLACEHOLDER: Confirm if response format is identical to OpenAI API or if there are differences + +Solution: + +* AI Gateway should return OpenAI-compatible responses +* If differences exist, file a support ticket with request ID from logs + +== Advanced migration scenarios + +=== Custom request timeouts + +Before + +[source,python] +---- +client = OpenAI(api_key=..., timeout=30.0) +---- + + +After + +[source,python] +---- +client = OpenAI( + base_url=os.getenv("REDPANDA_AI_GATEWAY_URL"), + api_key=os.getenv("REDPANDA_AI_GATEWAY_TOKEN"), + default_headers={"rp-aigw-id": os.getenv("REDPANDA_AI_GATEWAY_ID")}, + timeout=30.0 # Still supported +) +---- + + +=== Streaming responses + +// PLACEHOLDER: Verify streaming support + +Before + +[source,python] +---- +stream = client.chat.completions.create( + model="gpt-4o", + messages=[...], + stream=True +) + +for chunk in stream: + print(chunk.choices[0].delta.content, end="") +---- + + +After + +[source,python] +---- +stream = client.chat.completions.create( + model="openai/gpt-4o", # Add vendor prefix + messages=[...], + stream=True +) + +for chunk in stream: + print(chunk.choices[0].delta.content, end="") +---- + + +=== Custom headers (for example, user tracking) + +Before + +[source,python] +---- +response = client.chat.completions.create( + model="gpt-4o", + messages=[...], + extra_headers={"X-User-ID": user.id} +) +---- + + +After + +[source,python] +---- +response = client.chat.completions.create( + model="openai/gpt-4o", + messages=[...], + extra_headers={ + "X-User-ID": user.id, # Custom headers still supported + "rp-aigw-id": os.getenv("REDPANDA_AI_GATEWAY_ID") # Required gateway header + } +) +---- + + +NOTE: Gateway may use custom headers for routing (for example, CEL expressions can reference `request.headers["X-User-ID"]`) + +== Post-migration benefits + +After successful migration, you gain: + +Simplified provider management + +[source,python] +---- +# Switch providers with one config change (no code changes) +model = "anthropic/claude-sonnet-3.5" # Was openai/gpt-4o +---- + +Unified observability + +* All requests in one dashboard +* Cross-provider cost comparison +* Session reconstruction across models + +Automatic failover + +* Configure once, benefit everywhere +* No application-level retry logic needed + +Cost controls + +* Enforce budgets centrally +* Rate limit per team/customer +* No surprises in cloud bills + +A/B testing + +* Test new models without code changes +* Compare quality/cost/latency +* Gradual rollout via routing policies + +== Next steps + +* xref:ai-agents:ai-gateway/cel-routing-cookbook.adoc[]: Configure advanced routing policies. +* xref:ai-agents:ai-gateway/mcp-aggregation-guide.adoc[]: Explore MCP aggregation. diff --git a/modules/ai-agents/partials/observability-logs.adoc b/modules/ai-agents/partials/observability-logs.adoc new file mode 100644 index 000000000..0d1ad5455 --- /dev/null +++ b/modules/ai-agents/partials/observability-logs.adoc @@ -0,0 +1,774 @@ += Observability: Logs +:description: Guide to AI Gateway request logs, including where to find logs, log fields, filtering, searching, inspecting requests, common analysis tasks, log retention, export options, privacy/security, and troubleshooting. +:page-topic-type: reference +:personas: platform_admin, app_developer +:learning-objective-1: Locate and filter request logs to debug failures or reconstruct conversations +:learning-objective-2: Interpret log fields to diagnose performance and cost issues +:learning-objective-3: Export logs for compliance auditing or long-term analysis + +include::ai-agents:partial$ai-gateway-byoc-note.adoc[] + +AI Gateway logs every LLM request that passes through it, capturing the full request/response history, token usage, cost, latency, and routing decisions. This page explains how to find, filter, and interpret request logs. + +== Before you begin + +* You have an active AI Gateway with at least one request processed. +* You have access to the Redpanda Cloud Console. +* You have the appropriate permissions to view gateway logs. + +Use logs for: + +* Debugging specific failed requests +* Reconstructing user conversation sessions +* Auditing what prompts were sent and responses received +* Understanding which provider handled a request +* Investigating latency spikes or errors for specific users + +Use metrics for: Aggregate analytics, trends, cost tracking across time. See xref:ai-agents:ai-gateway/observability-metrics.adoc[]. + +== Where to find logs + +// PLACEHOLDER: Add exact UI navigation path + +1. Navigate to logs view: + * Console → AI Gateway → // PLACEHOLDER: exact path + * Or: Gateway detail page → Logs tab + +2. Select gateway: + * Filter by specific gateway, or view all gateways + * // PLACEHOLDER: screenshot of gateway selector + +3. Set time range: + * Default: Last 1 hour + * Options: Last 5 minutes, 1 hour, 24 hours, 7 days, 30 days, Custom + * // PLACEHOLDER: screenshot of time range picker + +== Request log fields + +Each log entry contains: + +=== Core request info + +[cols="1,2,2"] +|=== +| Field | Description | Example + +| *Request ID* +| Unique identifier for this request +| `req_abc123...` + +| *Timestamp* +| When request was received (UTC) +| `2025-01-11T14:32:10.123Z` + +| *Gateway ID* +| Which gateway handled this request +| `gw_abc123...` + +| *Gateway Name* +| Human-readable gateway name +| `production-gateway` + +| *Status* +| HTTP status code +| `200`, `400`, `429`, `500` + +| *Latency* +| Total request duration (ms) +| `1250ms` +|=== + +=== Model and provider info + +[cols="1,2,2"] +|=== +| Field | Description | Example + +| *Requested Model* +| Model specified in request +| `openai/gpt-4o` + +| *Actual Model* +| Model that handled request (may differ due to routing) +| `anthropic/claude-sonnet-3.5` + +| *Provider* +| Which provider handled the request +| `OpenAI`, `Anthropic` + +| *Provider Pool* +| Pool used (primary/fallback) +| `primary`, `fallback` + +| *Fallback Triggered* +| Whether fallback was used +| `true`/`false` + +| *Fallback Reason* +| Why fallback occurred +| `rate_limit`, `timeout`, `5xx_error` +|=== + +=== Token and cost info + +[cols="1,2,2"] +|=== +| Field | Description | Example + +| *Prompt Tokens* +| Input tokens consumed +| `523` + +| *Completion Tokens* +| Output tokens generated +| `187` + +| *Total Tokens* +| Prompt + completion +| `710` + +| *Estimated Cost* +| Calculated cost for this request +| `$0.0142` + +| *Cost Breakdown* +| Per-token costs +| `Prompt: $0.005, Completion: $0.0092` +|=== + +=== Request content (expandable) + +[cols="1,2,2"] +|=== +| Field | Description | Notes + +| *Request Headers* +| All headers sent +| Includes `rp-aigw-id`, custom headers + +| *Request Body* +| Full request payload +| Includes messages, parameters + +| *Response Headers* +| Headers returned +| // PLACEHOLDER: Any gateway-specific headers? + +| *Response Body* +| Full response payload +| Includes message content, metadata +|=== + +=== Routing and policy info + +[cols="1,2,2"] +|=== +| Field | Description | Example + +| *CEL Expression* +| Routing rule applied (if any) +| `request.headers["tier"] == "premium" ? ...` + +| *CEL Result* +| Model selected by CEL +| `openai/gpt-4o` + +| *Rate Limit Status* +| Whether rate limited +| `allowed`, `throttled`, `blocked` + +| *Spend Limit Status* +| Whether budget exceeded +| `allowed`, `blocked` + +| *Policy Stage* +| Where request was processed/blocked +| `rate_limit`, `routing`, `execution` +|=== + +=== Error info (if applicable) + +[cols="1,2,2"] +|=== +| Field | Description | Example + +| *Error Code* +| Gateway or provider error code +| `RATE_LIMIT_EXCEEDED`, `MODEL_NOT_FOUND` + +| *Error Message* +| Human-readable error +| `Request rate limit exceeded for gateway` + +| *Provider Error* +| Upstream provider error +| `OpenAI API returned 429: Rate limit exceeded` +|=== + +== Filter logs + +=== By gateway + +// PLACEHOLDER: Screenshot of gateway filter dropdown + +[source,text] +---- +Filter: Gateway = "production-gateway" +---- + + +Shows only requests for the selected gateway. + +Use case: Isolate production traffic from staging + +=== By model + +// PLACEHOLDER: Screenshot of model filter + +[source,text] +---- +Filter: Model = "openai/gpt-4o" +---- + + +Shows only requests for specific model. + +Use case: Compare quality/cost between models + +=== By provider + +[source,text] +---- +Filter: Provider = "OpenAI" +---- + + +Shows only requests handled by specific provider. + +Use case: Investigate provider-specific issues + +=== By status + +[source,text] +---- +Filter: Status = "429" +---- + + +Shows only requests with specific HTTP status. + +Common filters: + +* `200`: Successful requests +* `400`: Bad requests (client errors) +* `401`: Authentication errors +* `429`: Rate limited requests +* `500`: Server errors +* `5xx`: All server errors + +Use case: Find all failed requests + +=== By time range + +[source,text] +---- +Filter: Timestamp >= "2025-01-11T14:00:00Z" AND Timestamp <= "2025-01-11T15:00:00Z" +---- + + +Use case: Investigate incident during specific time window + +=== By custom header + +[source,text] +---- +Filter: request.headers["x-user-id"] = "user_123" +---- + + +Shows only requests for specific user. + +Use case: Debug user-reported issue + +=== By token range + +[source,text] +---- +Filter: Total Tokens > 10000 +---- + + +Shows only high-token requests. + +Use case: Find expensive requests + +=== By latency + +[source,text] +---- +Filter: Latency > 5000ms +---- + + +Shows only slow requests. + +Use case: Investigate performance issues + +=== Combined filters + +[source,text] +---- +Gateway = "production-gateway" +AND Status >= 500 +AND Timestamp >= "last 24 hours" +---- + + +Shows production server errors in last 24 hours. + +// PLACEHOLDER: Screenshot of multiple filters applied + +== Search logs + +=== Full-text search (if supported) + +// PLACEHOLDER: Confirm if full-text search is available + +[source,text] +---- +Search: "specific error message" +---- + + +Searches across all text fields (error messages, request/response content). + +=== Search by request content + +[source,text] +---- +Search in Request Body: "user's actual question" +---- + + +Find requests containing specific prompt text. + +Use case: "A user said the AI gave a wrong answer about X" → Search for "X" in prompts + +=== Search by response content + +[source,text] +---- +Search in Response Body: "specific AI response phrase" +---- + + +Find responses containing specific text. + +Use case: Find all requests where AI mentioned a competitor name + +== Inspect individual requests + +Click any log entry to expand full details. + +// PLACEHOLDER: Screenshot of expanded log entry + +=== Request details tab + +Shows: + +* Full request headers +* Full request body (formatted JSON) +* All parameters (temperature, max_tokens, etc.) +* Custom headers used for routing + +Example: + +[source,json] +---- +{ + "model": "openai/gpt-4o", + "messages": [ + { + "role": "system", + "content": "You are a helpful assistant." + }, + { + "role": "user", + "content": "What is Redpanda?" + } + ], + "temperature": 0.7, + "max_tokens": 500 +} +---- + + +=== Response details tab + +Shows: + +* Full response headers +* Full response body (formatted JSON) +* Finish reason (`stop`, `length`, `content_filter`) +* Response metadata + +Example: + +[source,json] +---- +{ + "id": "chatcmpl-...", + "choices": [ + { + "message": { + "role": "assistant", + "content": "Redpanda is a streaming data platform..." + }, + "finish_reason": "stop" + } + ], + "usage": { + "prompt_tokens": 24, + "completion_tokens": 87, + "total_tokens": 111 + } +} +---- + + +=== Routing details tab + +Shows: + +* CEL expression evaluated (if any) +* CEL result (which model was selected) +* Provider pool used (primary/fallback) +* Fallback trigger reason (if applicable) +* Rate limit evaluation (allowed/blocked) +* Spend limit evaluation (allowed/blocked) + +Example: + +[source,yaml] +---- +CEL Expression: | + request.headers["x-user-tier"] == "premium" + ? "openai/gpt-4o" + : "openai/gpt-4o-mini" + +CEL Result: "openai/gpt-4o" + +Provider Pool: primary +Fallback Triggered: false + +Rate Limit: allowed (45/100 requests used) +Spend Limit: allowed ($1,234 / $50,000 budget used) +---- + + +=== Performance details tab + +Shows: + +* Total latency breakdown + * Gateway processing time: // PLACEHOLDER: Xms + * Provider API call time: // PLACEHOLDER: Xms + * Network time: // PLACEHOLDER: Xms +* Token generation rate (tokens/second) +* Time to first token (for streaming, if supported) + +Example: + +[source,text] +---- +Total Latency: 1,250ms +├─ Gateway Processing: 12ms +├─ Provider API Call: 1,215ms +└─ Network Overhead: 23ms + +Token Generation Rate: 71 tokens/second +---- + + +== Common log analysis tasks + +=== Task 1: "Why did this request fail?" + +1. Find the request: + + * Filter by timestamp (when user reported issue) + * Or search by request content + * Or filter by custom header (user ID) + +2. Check status: + + * `400` → Client error (bad request format, invalid parameters) + * `401` → Authentication issue + * `404` → Model not found + * `429` → Rate limited + * `500`/`5xx` → Provider or gateway error + +3. Check error message: + + * Gateway error: Issue with configuration, rate limits, etc. + * Provider error: Issue with upstream API (OpenAI, Anthropic, etc.) + +4. Check routing: + * Was fallback triggered? (May indicate primary provider issue) + * Was CEL rule applied correctly? + +Common causes: + +* Model not enabled in gateway +* Rate limit exceeded +* Monthly budget exceeded +* Invalid API key for provider +* Provider outage/rate limit +* Malformed request + +=== Task 2: "Reconstruct a user's conversation" + +1. *Filter by user*: ++ +[source,text] +---- +Filter: request.headers["x-user-id"] = "user_123" +---- + +2. *Sort by timestamp* (ascending) + +3. *Review conversation flow*: + + * Each request shows prompt + * Each response shows AI reply + * Reconstruct full conversation thread + +Use case: User says "the AI contradicted itself" → View full conversation history + +=== Task 3: "Why is latency high for this user?" + +1. *Find user's requests*: ++ +[source,text] +---- +Filter: request.headers["x-user-id"] = "user_123" +AND Latency > 3000ms +---- + +2. *Check Performance Details*: + + * Is gateway processing slow? (Likely CEL complexity) + * Is provider API slow? (Upstream latency) + * Is token generation rate normal? (Tokens/second) + +3. *Compare to other requests*: + + * Filter for same model + * Compare latency percentiles + * Identify if issue is user-specific or model-wide + +Common causes: + +* Complex CEL routing rules +* Provider performance degradation +* Large context windows (high token count) +* Network issues + +=== Task 4: "Which requests used the fallback provider?" + +1. *Filter by fallback*: ++ +[source,text] +---- +Filter: Fallback Triggered = true +---- + +2. *Group by Fallback Reason*: + + * Rate limit exceeded (primary provider throttled) + * Timeout (primary provider slow) + * 5xx error (primary provider error) + +3. *Analyze pattern*: + + * Is fallback happening frequently? (May indicate primary provider issue) + * Is fallback successful? (Check status of fallback requests) + +Use case: Verify failover is working as expected + +=== Task 5: "What did we spend on this customer today?" + +1. *Filter by customer*: ++ +[source,text] +---- +Filter: request.headers["x-customer-id"] = "customer_abc" +AND Timestamp >= "today" +---- + +2. *Sum estimated costs* (if UI supports): + + // PLACEHOLDER: Does UI have cost aggregation for filtered results? + * Total: $X.XX + * Breakdown by model + +3. *Export to CSV* (if supported): + + // PLACEHOLDER: Is CSV export available? + * For detailed billing analysis + +Use case: Chargeback/showback to customers + +== Log retention + +// PLACEHOLDER: Confirm log retention policy + +Retention period: // PLACEHOLDER: e.g., 30 days, 90 days, configurable + +After retention period: + +* Logs are deleted automatically +* Aggregate metrics retained longer (see xref:ai-agents:ai-gateway/observability-metrics.adoc[]) + +Export logs (if needed for longer retention): + +// PLACEHOLDER: Is log export available? Via API? CSV? + +== Log export + +// PLACEHOLDER: Confirm export capabilities + +=== Export to CSV + +// PLACEHOLDER: Add UI path for export, or indicate not available + +1. Apply filters for desired logs +2. Click "Export to CSV" +3. Download includes all filtered logs with full fields + +=== Export via API + +// PLACEHOLDER: If API is available for log export + +[source,bash] +---- +curl https://{CLUSTER_ID}.cloud.redpanda.com/api/ai-gateway/logs \ + -H "Authorization: Bearer ${REDPANDA_CLOUD_TOKEN}" \ + -G \ + --data-urlencode "gateway_id=gw_abc123" \ + --data-urlencode "start_time=2025-01-11T00:00:00Z" \ + --data-urlencode "end_time=2025-01-11T23:59:59Z" +---- + + +=== Integration with observability platforms + +// PLACEHOLDER: Are there integrations with external platforms? + +Supported integrations (if any): + +* OpenTelemetry export → Send logs to Jaeger, Datadog, New Relic +* CloudWatch Logs → For AWS deployments +* // PLACEHOLDER: Others? + + +== Privacy and security + +=== What is logged + +// PLACEHOLDER: Confirm what is logged by default + +AI Gateway logs by default: + +* Request headers (including custom headers) +* Request body (full prompt content) +* Response body (full AI response) +* Token usage, cost, latency +* Routing decisions, policy evaluations + +AI Gateway does not log (if applicable): + +* // PLACEHOLDER: Anything redacted? API keys? Specific headers? + +=== Redaction options + +// PLACEHOLDER: Are there options to redact PII or sensitive data? + +If redaction is supported: + +* Configure redaction rules for specific fields +* Mask PII (email addresses, phone numbers, etc.) +* Redact custom header values + +Example: + +[source,yaml] +---- +# PLACEHOLDER: Actual configuration format +redaction: + - field: request.headers.x-api-key + action: mask + - field: request.body.messages[].content + pattern: "\\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\\.[A-Z]{2,}\\b" # Email regex + action: replace + replacement: "[REDACTED_EMAIL]" +---- + + +=== Access control + +// PLACEHOLDER: Who can view logs? RBAC? + +Permissions required: + +* View logs: // PLACEHOLDER: role/permission name +* Export logs: // PLACEHOLDER: role/permission name + +Audit trail: + +* Log access is audited (who viewed which logs, when) +* // PLACEHOLDER: Where to find audit trail? + +== Troubleshoot log issues + +=== Issue: "Logs not appearing for my request" + +Possible causes: + +1. Log ingestion delay (wait // PLACEHOLDER: Xs) +2. Wrong gateway ID filter +3. Request failed before reaching gateway (authentication error) +4. Time range filter too narrow + +Solution: + +1. Wait a moment and refresh +2. Remove all filters, search by timestamp +3. Check client-side error logs +4. Expand time range to "Last 1 hour" + +=== Issue: "Missing request/response content" + +Possible causes: + +1. Payload too large (// PLACEHOLDER: size limit?) +2. Redaction rules applied +3. // PLACEHOLDER: Other reasons? + +Solution: + +// PLACEHOLDER: How to retrieve full content if truncated? + +=== Issue: "Cost estimate incorrect" + +Possible causes: + +1. Cost estimate based on public pricing (may differ from your contract) +2. Provider changed pricing +3. // PLACEHOLDER: Other reasons? + +Note: Cost estimates are approximate. Use provider invoices for billing. + +== Next steps + +* xref:ai-agents:ai-gateway/observability-metrics.adoc[]: Aggregate analytics and cost tracking. \ No newline at end of file diff --git a/modules/ai-agents/partials/observability-metrics.adoc b/modules/ai-agents/partials/observability-metrics.adoc new file mode 100644 index 000000000..4ce3512c9 --- /dev/null +++ b/modules/ai-agents/partials/observability-metrics.adoc @@ -0,0 +1,867 @@ += Observability: Metrics and Analytics +:description: Guide to AI Gateway metrics and analytics, including where to find metrics, key metrics explained, dashboard views, filtering/grouping, alerting, exporting, common analysis tasks, retention, API access, best practices, and troubleshooting. +:page-topic-type: reference +:personas: platform_admin, app_developer +:learning-objective-1: Monitor aggregate metrics to track usage patterns and budget adherence +:learning-objective-2: Compare model and provider performance using latency and cost metrics +:learning-objective-3: Configure alerts for budget thresholds and performance degradation + +include::ai-agents:partial$ai-gateway-byoc-note.adoc[] + +AI Gateway provides aggregate metrics and analytics dashboards to help you understand usage patterns, costs, performance, and errors across all your LLM traffic. + +== Before you begin + +* You have an active AI Gateway with at least one request processed. +* You have access to the Redpanda Cloud Console. +* You have the appropriate permissions to view gateway metrics. + +Use metrics for: + +* Cost tracking and budget management +* Usage trends over time +* Performance monitoring (latency, error rates) +* Capacity planning +* Model/provider comparison + +Use logs for: Debugging specific requests, viewing full prompts/responses. See xref:ai-agents:ai-gateway/observability-logs.adoc[]. + +== Where to find metrics + +// PLACEHOLDER: Add exact UI navigation path + +1. Navigate to analytics dashboard: + * Console → AI Gateway → // PLACEHOLDER: exact path + * Or: Gateway detail page → Analytics tab + +2. Select gateway (optional): + * View all gateways (org-wide metrics) + * Or filter to specific gateway + +3. Set time range: + * Default: Last 7 days + * Options: Last 24 hours, 7 days, 30 days, 90 days, Custom + * // PLACEHOLDER: screenshot of time range picker + +== Key metrics + +=== Request volume + +What it shows: Total number of requests over time + +// PLACEHOLDER: Screenshot of request volume graph + +Graph type: Time series line chart + +Filters: + +* By gateway +* By model +* By provider +* By status (success/error) + +Use cases: + +* Identify usage patterns (peak hours, days of week) +* Detect traffic spikes or drops +* Capacity planning + +Example insights: + +* "Traffic doubles every Monday morning at 9am" → Scale infrastructure +* "Staging gateway has more traffic than prod" → Investigate runaway testing + +=== Token usage + +What it shows: Prompt, completion, and total tokens consumed + +// PLACEHOLDER: Screenshot of token usage graph + +Graph type: Stacked area chart (prompt vs completion tokens) + +Metrics: + +* Total tokens +* Prompt tokens (input) +* Completion tokens (output) +* Tokens per request (average) + +Breakdowns: + +* By gateway +* By model +* By provider + +Use cases: + +* Understand cost drivers (prompt vs completion tokens) +* Identify verbose prompts or responses +* Optimize token usage + +Example insights: + +* "90% of tokens are completion tokens" → Responses are verbose, optimize max_tokens +* "Staging uses 10x more tokens than prod" → Investigate test suite + +=== Estimated spend + +What it shows: Calculated cost based on token usage and public pricing + +// PLACEHOLDER: Screenshot of cost tracking dashboard + +Graph type: Time series line chart with cost breakdown + +Metrics: + +* Total estimated spend +* Spend by model +* Spend by provider +* Spend by gateway +* Cost per 1K requests +* Cost per 1M tokens + +Breakdowns: + +* By gateway (for chargeback/showback) +* By model (for cost optimization) +* By provider (for negotiation leverage) +* By custom header (if configured, e.g., `x-customer-id`) + +Use cases: + +* Budget tracking ("Are we staying under $50K/month?") +* Cost attribution ("Which team spent the most?") +* Model comparison ("Is Claude cheaper than GPT-4 for our use case?") +* Forecasting ("At this rate, we'll spend $X next month") + +Important notes: + +* *Estimates based on public pricing* (may differ from your contract) +* *Not a substitute for provider invoices* (use for approximation only) +* Update frequency: // PLACEHOLDER: Real-time? Hourly? Daily? + +Example insights: + +* "Customer A accounts for 60% of spend" → Consider rate limits or tiered pricing +* "GPT-4o is 3x more expensive than Claude Sonnet for similar quality" → Optimize routing + +=== Latency + +What it shows: Request duration from gateway to provider and back + +// PLACEHOLDER: Screenshot of latency histogram + +Metrics: + +* p50 (median) latency +* p95 latency +* p99 latency +* Min/max latency +* Average latency + +Breakdowns: + +* By gateway +* By model +* By provider +* By token range (longer responses = higher latency) + +Use cases: + +* Identify slow models or providers +* Set SLO targets (e.g., "p95 < 2 seconds") +* Detect performance regressions + +Example insights: + +* "GPT-4o p99 latency spiked to 10 seconds yesterday" → Investigate provider issue +* "Claude Sonnet is 30% faster than GPT-4o for same prompts" → Optimize for latency + +Latency components (if available): + +// PLACEHOLDER: Does gateway show latency breakdown? +* Gateway processing time +* Provider API time +* Network time + +=== Error rate + +What it shows: Percentage of failed requests over time + +// PLACEHOLDER: Screenshot of error rate graph + +Metrics: + +* Total error rate (%) +* Errors by status code (400, 401, 429, 500, etc.) +* Errors by model +* Errors by provider + +Graph type: Time series line chart with error percentage + +Breakdowns: + +* By error type: + * Client errors (4xx) + * Rate limits (429) + * Server errors (5xx) + * Provider errors + * Gateway errors + +Use cases: + +* Detect provider outages +* Identify configuration issues (e.g., model not enabled) +* Monitor rate limit breaches + +Example insights: + +* "Error rate spiked to 15% at 2pm" → OpenAI outage, fallback to Anthropic worked +* "10% of requests fail with 'model not found'" → Model not enabled in gateway + +=== Success rate + +What it shows: Percentage of successful (200) requests over time + +Metric: `Success Rate = (Successful Requests / Total Requests) × 100` + +Target: Typically 99%+ for production workloads + +Use cases: + +* Monitor overall health +* Set up alerts (e.g., "Alert if success rate < 95%") + +=== Fallback rate + +What it shows: Percentage of requests that used fallback provider + +// PLACEHOLDER: Screenshot of fallback rate graph + +Metric: `Fallback Rate = (Fallback Requests / Total Requests) × 100` + +Breakdowns: + +* By fallback reason: + * Rate limit exceeded + * Timeout + * 5xx error + +Use cases: + +* Monitor primary provider reliability +* Verify fallback is working +* Identify when to renegotiate rate limits + +Example insights: + +* "Fallback rate increased to 20% yesterday" → OpenAI hit rate limits, time to increase quota +* "Zero fallbacks in 30 days" → Fallback config may not be working, or primary provider is very reliable + +== Dashboard views + +=== Overview dashboard + +Shows: High-level metrics across all gateways + +// PLACEHOLDER: Screenshot of overview dashboard + +Widgets: + +* Total requests (last 24h, 7d, 30d) +* Total spend (last 24h, 7d, 30d) +* Success rate (current) +* Average latency (current) +* Top 5 models by request volume +* Top 5 gateways by spend + +Use case: Executive view, health at a glance + +=== Gateway dashboard + +Shows: Metrics for a specific gateway + +// PLACEHOLDER: Screenshot of gateway dashboard + +Widgets: + +* Request volume (time series) +* Token usage (time series) +* Estimated spend (time series) +* Latency percentiles (histogram) +* Error rate (time series) +* Model breakdown (pie chart) +* Provider breakdown (pie chart) + +Use case: Team-specific monitoring, gateway optimization + +=== Model comparison dashboard + +Shows: Side-by-side comparison of models + +// PLACEHOLDER: Screenshot of model comparison + +Metrics per model: + +* Request count +* Total tokens +* Estimated cost +* Cost per 1K requests +* Average latency +* Error rate + +Use case: Evaluate whether to switch models (cost vs performance) + +Example: + +[cols="2,1,1,1,1"] +|=== +| Model | Requests | Avg Latency | Cost per 1K | Error Rate + +| openai/gpt-4o +| 10,000 +| 1.2s +| $5.00 +| 0.5% + +| anthropic/claude-sonnet-3.5 +| 5,000 +| 0.9s +| $3.50 +| 0.3% + +| openai/gpt-4o-mini +| 20,000 +| 0.7s +| $0.50 +| 1.0% +|=== + +Insight: Claude Sonnet is 25% faster and 30% cheaper than GPT-4o with better reliability + +=== Provider comparison dashboard + +Shows: Side-by-side comparison of providers + +Metrics per provider: + +* Request count +* Total spend +* Average latency +* Error rate +* Fallback trigger rate + +Use case: Evaluate provider reliability, negotiate contracts + +=== Cost breakdown dashboard + +Shows: Detailed cost analysis + +// PLACEHOLDER: Screenshot of cost breakdown + +Widgets: + +* Spend by gateway (stacked bar chart) +* Spend by model (pie chart) +* Spend by provider (pie chart) +* Spend by custom dimension (if configured, e.g., customer ID) +* Spend trend (time series with forecast) +* Budget utilization (progress bar: $X / $Y monthly limit) + +Use case: FinOps, budget management, chargeback/showback + +== Filter and group + +=== Filter by gateway + +[source,text] +---- +Filter: Gateway = "production-gateway" +---- + + +Shows metrics for specific gateway only. + +Use case: Isolate prod from staging metrics + +=== Filter by model + +[source,text] +---- +Filter: Model = "openai/gpt-4o" +---- + + +Shows metrics for specific model only. + +Use case: Evaluate model performance in isolation + +=== Filter by provider + +[source,text] +---- +Filter: Provider = "OpenAI" +---- + + +Shows metrics for specific provider only. + +Use case: Evaluate provider reliability + +=== Filter by status + +[source,text] +---- +Filter: Status = "200" // Only successful requests +Filter: Status >= "500" // Only server errors +---- + + +Use case: Focus on errors, or calculate success rate + +=== Filter by custom dimension + +// PLACEHOLDER: Confirm if custom dimensions are supported for filtering + +[source,text] +---- +Filter: request.headers["x-customer-id"] = "customer_abc" +---- + + +Shows metrics for specific customer. + +Use case: Customer-specific cost tracking for chargeback + +=== Group by dimension + +Common groupings: + +* Group by Gateway +* Group by Model +* Group by Provider +* Group by Status +* Group by Hour/Day/Week/Month (time aggregation) + +Example: "Show me spend grouped by model, for production gateway, over last 30 days" + +== Alerting + +// PLACEHOLDER: Confirm if alerting is supported + +If alerting is supported: + +=== Alert types + +Budget alerts: + +* Alert when spend exceeds X% of monthly budget +* Alert when spend grows Y% week-over-week + +Performance alerts: + +* Alert when error rate > X% +* Alert when p99 latency > Xms +* Alert when success rate < X% + +Usage alerts: + +* Alert when request volume drops (potential outage) +* Alert when fallback rate > X% (primary provider issue) + +=== Alert channels + +// PLACEHOLDER: Supported notification channels +* Email +* Slack +* PagerDuty +* Webhook +* // PLACEHOLDER: Others? + +=== Example alert configuration + +[source,yaml] +---- +# PLACEHOLDER: Actual alert configuration format +alerts: + - name: "High Error Rate" + condition: error_rate > 5% + duration: 5 minutes + channels: [slack, email] + + - name: "Budget Threshold" + condition: monthly_spend > 80% of budget + channels: [email] + + - name: "Latency Spike" + condition: p99_latency > 5000ms + duration: 10 minutes + channels: [pagerduty] +---- + +== Export metrics + +// PLACEHOLDER: Confirm export capabilities + +=== Export to CSV + +1. Apply filters for desired metrics +2. Click "Export to CSV" +3. Download includes time series data + +Use case: Import into spreadsheet for analysis, reporting + +=== Export via API + +// PLACEHOLDER: If API is available for metrics + +[source,bash] +---- +curl https://{CLUSTER_ID}.cloud.redpanda.com/api/ai-gateway/metrics \ + -H "Authorization: Bearer ${REDPANDA_CLOUD_TOKEN}" \ + -G \ + --data-urlencode "gateway_id=gw_abc123" \ + --data-urlencode "start_time=2025-01-01T00:00:00Z" \ + --data-urlencode "end_time=2025-01-31T23:59:59Z" \ + --data-urlencode "metric=requests,tokens,cost" +---- + + +Response: + +[source,json] +---- +{ + "gateway_id": "gw_abc123", + "start_time": "2025-01-01T00:00:00Z", + "end_time": "2025-01-31T23:59:59Z", + "metrics": { + "requests": 1000000, + "tokens": 500000000, + "estimated_cost": 2500.00 + } +} +---- + + +=== Integration with observability platforms + +// PLACEHOLDER: OpenTelemetry support? Other integrations? + +Supported integrations (if any): + +* Prometheus: Metrics endpoint for scraping +* OpenTelemetry: Export metrics to OTel collector +* Datadog: Direct integration +* Grafana: Pre-built dashboards +* // PLACEHOLDER: Others? + +== Common analysis tasks + +=== "Are we staying within budget?" + +1. View cost breakdown dashboard +2. Check budget utilization widget: + * Current spend: $X + * Monthly budget: $Y + * Utilization: X% + * Days remaining in month: Z +3. Forecast: + * At current rate: $X × (30 / days_elapsed) + * On track to exceed budget? Yes/No + +Action: + +* If approaching limit: Adjust rate limits, optimize models, pause non-prod usage +* If well under budget: Opportunity to test more expensive models + +=== "Which team is using the most resources?" + +1. Filter by gateway (assuming one gateway per team) +2. *Sort by Spend* (descending) +3. View table: + +[cols="2,1,1,1,1"] +|=== +| Gateway | Requests | Tokens | Spend | % of Total + +| team-ml +| 500K +| 250M +| $1,250 +| 50% + +| team-product +| 300K +| 150M +| $750 +| 30% + +| team-eng +| 200K +| 100M +| $500 +| 20% +|=== + +Action: Chargeback costs to teams, or investigate high-usage teams + +=== "Is this model worth the extra cost?" + +1. *Open Model Comparison Dashboard* +2. Select models to compare: + * Expensive model: `openai/gpt-4o` + * Cheap model: `openai/gpt-4o-mini` +3. Compare metrics: + +[cols="2,1,1,2"] +|=== +| Metric | GPT-4o | GPT-4o-mini | Difference + +| Cost per 1K requests +| $5.00 +| $0.50 +| *10x* + +| Avg Latency +| 1.2s +| 0.7s +| 58% *faster* (mini) + +| Error Rate +| 0.5% +| 1.0% +| 2x errors (mini) +|=== + +Decision: If mini's error rate is acceptable, save 10x on costs + +=== "Why did costs spike yesterday?" + +1. View cost trend graph +2. Identify spike (e.g., Jan 10th: $500 vs usual $100) +3. Drill down: + * By gateway: Which gateway caused the spike? + * By model: Did someone switch to expensive model? + * By hour: What time did spike occur? +4. Cross-reference with logs: + * Filter logs to spike timeframe + * Check for unusual request patterns + * Identify custom header (user ID, customer ID) if present + +Common causes: + +* Test suite running against prod gateway +* A/B test routing all traffic to expensive model +* User error (wrong model in config) +* Runaway loop in application code + +=== "Is provider X more reliable than provider Y?" + +1. Open provider comparison dashboard +2. Compare error rates: + +[cols="2,1,1,2"] +|=== +| Provider | Requests | Error Rate | Fallback Triggers + +| OpenAI +| 500K +| 0.8% +| 50 (rate limits) + +| Anthropic +| 300K +| 0.3% +| 5 (timeouts) +|=== + +Insight: Anthropic has 62% lower error rate + +3. Compare latencies: + +[cols="2,1,1"] +|=== +| Provider | p50 Latency | p99 Latency + +| OpenAI +| 1.0s +| 3.5s + +| Anthropic +| 0.8s +| 2.5s +|=== + +Insight: Anthropic is 20% faster at p50, 28% faster at p99 + +Decision: Prioritize Anthropic in routing pools + +== Metrics retention + +// PLACEHOLDER: Confirm metrics retention policy + +Retention period: + +* *High-resolution* (1-minute granularity): // PLACEHOLDER: for example, 7 days +* *Medium-resolution* (1-hour granularity): // PLACEHOLDER: for example, 30 days +* *Low-resolution* (1-day granularity): // PLACEHOLDER: for example, 1 year + +Note: Aggregate metrics retained longer than individual request logs + +== API access to metrics + +// PLACEHOLDER: Document metrics API if available + +=== List available metrics + +[source,bash] +---- +curl https://{CLUSTER_ID}.cloud.redpanda.com/api/ai-gateway/metrics/list \ + -H "Authorization: Bearer ${REDPANDA_CLOUD_TOKEN}" +---- + + +Response: + +[source,json] +---- +{ + "metrics": [ + "requests", + "tokens.prompt", + "tokens.completion", + "tokens.total", + "cost.estimated", + "latency.p50", + "latency.p95", + "latency.p99", + "errors.rate", + "success.rate", + "fallback.rate" + ] +} +---- + + +=== Query specific metric + +[source,bash] +---- +curl https://{CLUSTER_ID}.cloud.redpanda.com/api/ai-gateway/metrics/query \ + -H "Authorization: Bearer ${REDPANDA_CLOUD_TOKEN}" \ + -H "Content-Type: application/json" \ + -d '{ + "metric": "requests", + "gateway_id": "gw_abc123", + "start_time": "2025-01-01T00:00:00Z", + "end_time": "2025-01-31T23:59:59Z", + "granularity": "1d", + "group_by": ["model"] + }' +---- + + +Response: + +[source,json] +---- +{ + "metric": "requests", + "granularity": "1d", + "data": [ + { + "timestamp": "2025-01-01T00:00:00Z", + "model": "openai/gpt-4o", + "value": 10000 + }, + { + "timestamp": "2025-01-01T00:00:00Z", + "model": "anthropic/claude-sonnet-3.5", + "value": 5000 + }, + ... + ] +} +---- + + +== Best practices + +Set up budget alerts early + +* Don't wait for surprise bills +* Alert at 50%, 80%, 90% of budget +* Include multiple stakeholders (eng, finance) + +Create team dashboards + +* One dashboard per team showing their gateway(s) +* Empowers teams to self-optimize +* Reduces central ops burden + +Monitor fallback rate + +* Low fallback rate (0-5%): Normal, failover working +* High fallback rate (>20%): Investigate primary provider issues +* Zero fallback rate: Verify fallback config is correct + +Compare models regularly + +* Run A/B tests with metrics +* Reassess as pricing and models change +* Don't assume expensive = better quality for your use case + +Track trends, not point-in-time + +* Day-to-day variance is normal +* Look for week-over-week and month-over-month trends +* Seasonal patterns (e.g., more usage on weekdays) + +== Troubleshoot metrics issues + +=== Issue: "Metrics don't match my provider invoice" + +Possible causes: + +1. Metrics are estimates based on public pricing +2. Your contract has custom pricing +3. Provider changed pricing mid-month + +Solution: + +* Use metrics for trends and optimization decisions +* Use provider invoices for actual billing +* // PLACEHOLDER: Can users configure custom pricing in gateway? + +=== Issue: "Metrics are delayed or missing" + +Possible causes: + +1. Metrics aggregation has delay (// PLACEHOLDER: typical delay?) +2. Time range outside retention period +3. No requests in selected time range (empty data) + +Solution: + +1. Wait and refresh (// PLACEHOLDER: Xminutes typical delay) +2. Check retention policy +3. Verify requests were sent (check logs) + +=== Issue: "Dashboard shows 'no data'" + +Possible causes: + +1. Filters too restrictive (no matching requests) +2. Gateway has no traffic yet +3. Permissions issue (can't access this gateway's metrics) + +Solution: + +1. Remove filters, widen time range +2. Send test request (see xref:ai-agents:ai-gateway/gateway-quickstart.adoc[]) +3. Check permissions with admin + +== Next steps + +* xref:ai-agents:ai-gateway/observability-logs.adoc[]: View individual requests and debug issues. diff --git a/modules/ai-agents/partials/transcripts-ui-guide.adoc b/modules/ai-agents/partials/transcripts-ui-guide.adoc new file mode 100644 index 000000000..5d7d00604 --- /dev/null +++ b/modules/ai-agents/partials/transcripts-ui-guide.adoc @@ -0,0 +1,89 @@ +// ============================================================================= +// PARTIAL: transcripts-ui-guide.adoc +// ============================================================================= +// +// PURPOSE: +// Documents the Transcripts UI interface for both AI agents and MCP servers. +// Single-sources UI navigation and component descriptions that are identical +// across both contexts. +// +// INCLUDED BY: +// - cloud-docs: modules/ai-agents/pages/agents/monitor-agents.adoc +// - cloud-docs: modules/ai-agents/pages/mcp/remote/monitor-mcp-servers.adoc +// +// INCLUDE SYNTAX: +// :context: agent +// include::partial$transcripts-ui-guide.adoc[] +// +// :context: mcp +// include::partial$transcripts-ui-guide.adoc[] +// +// ATTRIBUTES USED: +// - context: Controls agent-specific vs MCP-specific content +// Valid values: "agent" | "mcp" +// +// DEPENDENCIES: +// - xref:ai-agents:observability/concepts.adoc#agent-trace-hierarchy[] +// - xref:ai-agents:observability/concepts.adoc#mcp-server-trace-hierarchy[] +// +// CONTENT TYPE: +// UI navigation and interface explanation (procedural context for how-to pages) +// +// ============================================================================= + +=== Navigate the transcripts view + +// Navigation is identical for both contexts +. In the left navigation panel, click *Transcripts*. +ifeval::["{context}" == "agent"] +. Select a recent transcript from your agent executions. +endif::[] +ifeval::["{context}" == "mcp"] +. Select a recent transcript from your MCP server tool invocations. +endif::[] + +The transcripts view displays: + +* *Timeline* (top): Visual history of recent executions with success/error indicators +* *Trace list* (middle): Hierarchical view of traces and spans +* *Summary panel* (right): Detailed metrics when you select a transcript + +// UI component descriptions +==== Timeline visualization + +The timeline at the top shows execution patterns over time: + +* Green bars: Successful executions +* Red bars: Failed executions with errors +* Gray bars: Incomplete traces or traces still loading +* Time range: Displays the last few hours by default + +Use the timeline to spot patterns like error clusters, performance degradation over time, or gaps indicating downtime. + +==== Trace hierarchy + +The trace list shows nested operations with visual duration bars indicating how long each operation took. Click the expand arrows (▶) to drill into nested spans and see the complete execution flow. + +// Link to appropriate concepts section based on context +ifeval::["{context}" == "agent"] +For details on span types, see xref:ai-agents:observability/concepts.adoc#agent-trace-hierarchy[Agent trace hierarchy]. +endif::[] +ifeval::["{context}" == "mcp"] +For details on span types, see xref:ai-agents:observability/concepts.adoc#mcp-server-trace-hierarchy[MCP server trace hierarchy]. +endif::[] + +==== Summary panel + +When you select a transcript, the right panel shows: + +* Duration: Total execution time for this request +* Total Spans: Number of operations in the trace +ifeval::["{context}" == "agent"] +* Token Usage: Input tokens, output tokens, and total (critical for cost tracking) +* LLM Calls: How many times the agent called the language model +* Service: The agent identifier +* Conversation ID: Links to session data topics +endif::[] +ifeval::["{context}" == "mcp"] +* Service: The MCP server identifier +endif::[] diff --git a/modules/billing/pages/billing.adoc b/modules/billing/pages/billing.adoc index f51b7b39d..cf6f32e72 100644 --- a/modules/billing/pages/billing.adoc +++ b/modules/billing/pages/billing.adoc @@ -140,7 +140,7 @@ Pricing per MCP server depends on the compute units you allocate. The cost of a NOTE: Compute units for Remote MCP use the same definition and rates as those for Redpanda Connect. -MCP servers automatically emit OpenTelemetry traces to the xref:ai-agents:mcp/remote/monitor-activity.adoc#opentelemetry-traces-topic[`redpanda.otel_traces` topic]. For Serverless clusters, usage of this system-managed traces topic is not billed. You will not incur ingress, egress, storage, or partition charges for trace data. For Dedicated and BYOC clusters, standard billing metrics apply to the traces topic. +MCP servers automatically emit OpenTelemetry traces to the xref:ai-agents:observability/concepts.adoc#opentelemetry-traces-topic[`redpanda.otel_traces` topic]. For Serverless clusters, usage of this system-managed traces topic is not billed. You will not incur ingress, egress, storage, or partition charges for trace data. For Dedicated and BYOC clusters, standard billing metrics apply to the traces topic. == Support plans diff --git a/modules/develop/examples/cookbooks/jira/create-issue.yaml b/modules/develop/examples/cookbooks/jira/create-issue.yaml new file mode 100644 index 000000000..1ed695933 --- /dev/null +++ b/modules/develop/examples/cookbooks/jira/create-issue.yaml @@ -0,0 +1,28 @@ +# tag::config[] +input: + generate: + count: 1 + mapping: | + root.fields = { + "project": {"key": "MYPROJECT"}, + "summary": "Issue created from Redpanda Connect", + "description": { + "type": "doc", + "version": 1, + "content": [{"type": "paragraph", "content": [{"type": "text", "text": "Created via API"}]}] + }, + "issuetype": {"name": "Task"} + } + +pipeline: + processors: + - http: + url: "${secrets.JIRA_BASE_URL}/rest/api/3/issue" + verb: POST + headers: + Content-Type: application/json + Authorization: "Basic ${secrets.JIRA_AUTH_TOKEN}" + +output: + stdout: {} +# end::config[] diff --git a/modules/develop/examples/cookbooks/jira/input-once.yaml b/modules/develop/examples/cookbooks/jira/input-once.yaml new file mode 100644 index 000000000..b8f13b5ca --- /dev/null +++ b/modules/develop/examples/cookbooks/jira/input-once.yaml @@ -0,0 +1,18 @@ +# tag::config[] +input: + generate: + count: 1 + mapping: | + root.jql = "project = MYPROJECT AND status = Open" + root.maxResults = 100 + +pipeline: + processors: + - jira: + base_url: "${secrets.JIRA_BASE_URL}" + username: "${secrets.JIRA_USERNAME}" + api_token: "${secrets.JIRA_API_TOKEN}" + +output: + stdout: {} +# end::config[] diff --git a/modules/develop/examples/cookbooks/jira/input-periodic.yaml b/modules/develop/examples/cookbooks/jira/input-periodic.yaml new file mode 100644 index 000000000..991a40b0d --- /dev/null +++ b/modules/develop/examples/cookbooks/jira/input-periodic.yaml @@ -0,0 +1,19 @@ +# tag::config[] +input: + generate: + interval: 30s + mapping: | + root.jql = "project = MYPROJECT AND updated >= -1h ORDER BY updated DESC" + root.maxResults = 50 + root.fields = ["key", "summary", "status", "assignee", "priority"] + +pipeline: + processors: + - jira: + base_url: "${secrets.JIRA_BASE_URL}" + username: "${secrets.JIRA_USERNAME}" + api_token: "${secrets.JIRA_API_TOKEN}" + +output: + stdout: {} +# end::config[] diff --git a/modules/develop/examples/cookbooks/jira/test-jira-examples.sh b/modules/develop/examples/cookbooks/jira/test-jira-examples.sh new file mode 100755 index 000000000..c795f76ca --- /dev/null +++ b/modules/develop/examples/cookbooks/jira/test-jira-examples.sh @@ -0,0 +1,53 @@ +#!/usr/bin/env bash +# +# Test script for Jira cookbook examples +# +# This script validates YAML syntax using `rpk connect lint` +# +# Usage: +# ./test-jira-examples.sh + +set -euo pipefail + +# Colors for output +RED='\033[0;31m' +GREEN='\033[0;32m' +NC='\033[0m' + +# Get script directory +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +cd "$SCRIPT_DIR" + +echo "Testing Jira cookbook examples..." +echo "" + +TOTAL=0 +PASSED=0 +FAILED=0 + +for file in *.yaml; do + if [[ -f "$file" ]]; then + TOTAL=$((TOTAL + 1)) + echo -n " $file... " + + if output=$(rpk connect lint --skip-env-var-check "$file" 2>&1); then + echo -e "${GREEN}PASSED${NC}" + PASSED=$((PASSED + 1)) + else + echo -e "${RED}FAILED${NC}" + echo "$output" | sed 's/^/ /' + FAILED=$((FAILED + 1)) + fi + fi +done + +echo "" +echo "Results: $PASSED/$TOTAL passed" + +if [[ $FAILED -gt 0 ]]; then + echo -e "${RED}Some tests failed${NC}" + exit 1 +else + echo -e "${GREEN}All tests passed${NC}" + exit 0 +fi diff --git a/modules/develop/pages/connect/components/inputs/http_server.adoc b/modules/develop/pages/connect/components/inputs/http_server.adoc new file mode 100644 index 000000000..48b7a3343 --- /dev/null +++ b/modules/develop/pages/connect/components/inputs/http_server.adoc @@ -0,0 +1,3 @@ += http_server +:page-aliases: components:inputs/http_server.adoc +include::redpanda-connect:components:inputs/http_server.adoc[tag=single-source] diff --git a/modules/develop/pages/connect/components/processors/a2a_message.adoc b/modules/develop/pages/connect/components/processors/a2a_message.adoc new file mode 100644 index 000000000..b4067524b --- /dev/null +++ b/modules/develop/pages/connect/components/processors/a2a_message.adoc @@ -0,0 +1,3 @@ += a2a_message +:page-aliases: components:processors/a2a_message.adoc +include::redpanda-connect:components:partial$components/cloud-only/processors/a2a_message.adoc[tag=single-source] diff --git a/modules/develop/pages/connect/configuration/resource-management.adoc b/modules/develop/pages/connect/configuration/resource-management.adoc index 802e1d8fb..c48a3e58e 100644 --- a/modules/develop/pages/connect/configuration/resource-management.adoc +++ b/modules/develop/pages/connect/configuration/resource-management.adoc @@ -125,7 +125,7 @@ To view resources already allocated to a data pipeline: [tabs] ===== -Cloud Console:: +Cloud UI:: + -- . Log in to https://cloud.redpanda.com[Redpanda Cloud^]. @@ -152,7 +152,7 @@ To scale the resources for a pipeline: [tabs] ===== -Cloud Console:: +Cloud UI:: + -- . Log in to https://cloud.redpanda.com[Redpanda Cloud^]. diff --git a/modules/develop/pages/connect/configuration/secret-management.adoc b/modules/develop/pages/connect/configuration/secret-management.adoc index 026616216..ac0fee981 100644 --- a/modules/develop/pages/connect/configuration/secret-management.adoc +++ b/modules/develop/pages/connect/configuration/secret-management.adoc @@ -15,7 +15,7 @@ You can create a secret and reference it in multiple data pipelines on the same [tabs] ===== -Cloud Console:: +Cloud UI:: + -- . Log in to https://cloud.redpanda.com[Redpanda Cloud^]. @@ -71,7 +71,7 @@ NOTE: Changes to secret values do not take effect until a pipeline is restarted. [tabs] ===== -Cloud Console:: +Cloud UI:: + -- . Log in to https://cloud.redpanda.com[Redpanda Cloud^]. @@ -122,7 +122,7 @@ NOTE: Changes do not affect pipelines that are already running. [tabs] ===== -Cloud Console:: +Cloud UI:: + -- . Log in to https://cloud.redpanda.com[Redpanda Cloud^]. @@ -158,7 +158,7 @@ You must include the following values: [tabs] ===== -Cloud Console:: +Cloud UI:: + -- . Go to the **Connect** page, and create a pipeline (or open an existing pipeline to edit). diff --git a/modules/develop/pages/connect/cookbooks/jira.adoc b/modules/develop/pages/connect/cookbooks/jira.adoc new file mode 100644 index 000000000..f8015ae1d --- /dev/null +++ b/modules/develop/pages/connect/cookbooks/jira.adoc @@ -0,0 +1,159 @@ += Work with Jira Issues +:description: Learn how to query, filter, and create Jira issues using Redpanda Connect pipelines. +:page-aliases: cookbooks:jira.adoc +:page-topic-type: cookbook +:personas: streaming_developer, data_engineer +:learning-objective-1: Query Jira issues using JQL patterns with the Jira processor +:learning-objective-2: Combine generate input with Jira processor for scheduled queries +:learning-objective-3: Create Jira issues using the HTTP processor and REST API + +The Jira processor enables querying Jira issues using JQL (Jira Query Language) and returning structured data. It’s a processor, so you can use it in pipelines for input-style flows (pair with `generate`) or output-style flows (pair with `drop`). + + +Use this cookbook to: + +* [ ] Query Jira issues on a schedule or on-demand +* [ ] Filter issues using JQL patterns +* [ ] Create Jira issues using the HTTP processor + +== Prerequisites + +The examples in this cookbook use the Secrets Store for Jira credentials. This keeps sensitive credentials secure and separate from your pipeline configuration. + +. link:https://id.atlassian.com/manage-profile/security/api-tokens[Generate a Jira API token^]. + +. Add your Jira credentials to the xref:develop:connect/configuration/secret-management.adoc[Secrets Store]: ++ +- `JIRA_BASE_URL`: Your Jira instance URL (for example, `\https://your-domain.atlassian.net`) +- `JIRA_USERNAME`: Your Jira account email address +- `JIRA_API_TOKEN`: The API token generated from your Atlassian account +- `JIRA_AUTH_TOKEN` (optional, for creating issues): Base64-encoded `username:api_token` string + +== Use Jira as an input + +To use Jira as an input, combine the `generate` input with the Jira processor. This pattern triggers Jira queries at regular intervals or on-demand. + +TIP: Replace `MYPROJECT` in the examples with your actual Jira project key. + +=== Query Jira periodically + +This example queries Jira every 30 seconds for recent issues: + +[source,yaml] +---- +include::develop:example$cookbooks/jira/input-periodic.yaml[tag=config,indent=0] +---- + +=== One-time query + +For a single query, use `count` instead of `interval`: + +[source,yaml] +---- +include::develop:example$cookbooks/jira/input-once.yaml[tag=config,indent=0] +---- + +== Input message format + +The Jira processor expects input messages containing valid Jira queries in JSON format: + +[source,json] +---- +{ + "jql": "project = MYPROJECT AND status = Open", + "maxResults": 50, + "fields": ["key", "summary", "status", "assignee"] +} +---- + +=== Required fields + +- `jql`: The JQL (Jira Query Language) query string + +=== Optional fields + +- `maxResults`: Maximum number of results to return (default: 50) +- `fields`: Array of field names to include in the response + +== JQL query patterns + +Here are common JQL patterns for filtering issues: + +=== Recent issues by project + +[source,jql] +---- +project = AND created >= -7d ORDER BY created DESC +---- + +=== Issues assigned to current user + +[source,jql] +---- +assignee = currentUser() AND status != Done +---- + +=== Issues by status + +[source,jql] +---- +project = AND status IN (Open, 'In Progress', 'To Do') +---- + +=== Issues by priority + +[source,jql] +---- +project = AND priority = High ORDER BY created DESC +---- + +== Output message format + +The Jira processor returns individual issue messages, rather than a response object with an `issues` array. + +Each message output by the Jira processor represents a single issue: + +[source,json] +---- +{ + "id": "12345", + "key": "DOC-123", + "fields": { + "summary": "Example issue", + "status": { + "name": "In Progress" + }, + "assignee": { + "displayName": "John Doe" + } + } +} +---- + +== Pagination handling + +The Jira processor automatically handles pagination internally. The processor: + +. Makes the initial request with `startAt=0`. +. Checks if more results are available. +. Automatically fetches subsequent pages until all results are retrieved. +. Outputs each issue as an individual message. + +You don't need to handle pagination manually. + +== Create and update Jira issues + +The Jira processor is read-only and only supports querying. To create or update Jira issues, use the xref:develop:connect/components/processors/http.adoc[`http` processor] with the Jira REST API. + +=== Create a Jira issue + +[source,yaml] +---- +include::develop:example$cookbooks/jira/create-issue.yaml[tag=config,indent=0] +---- + +== See also + +- xref:develop:connect/components/processors/jira.adoc[Jira processor reference] +- https://developer.atlassian.com/cloud/jira/platform/rest/v3/intro/[Jira REST API documentation^] +- https://www.atlassian.com/software/jira/guides/jql[JQL query guide^] diff --git a/modules/get-started/pages/cloud-overview.adoc b/modules/get-started/pages/cloud-overview.adoc index 9e03a35b8..39d583615 100644 --- a/modules/get-started/pages/cloud-overview.adoc +++ b/modules/get-started/pages/cloud-overview.adoc @@ -1,14 +1,47 @@ = Redpanda Cloud Overview -:description: Learn about Redpanda Serverless, Bring Your Own Cloud (BYOC), and Dedicated clusters. +:description: Learn about the Redpanda Agentic Data Plane (ADP) and deployment options including BYOC, Dedicated, and Serverless clusters. :page-aliases: cloud:dedicated-byoc.adoc, deploy:deployment-option/cloud/dedicated-byoc.adoc, deploy:deployment-option/cloud/cloud-overview.adoc -Redpanda Cloud is a complete data streaming platform delivered as a fully-managed service. It provides automated upgrades and patching, data balancing, and support while continuously monitoring your clusters and underlying infrastructure to meet strict performance, availability, reliability, and security requirements. All Redpanda Cloud clusters are deployed with an integrated glossterm:Redpanda Console[], and all clusters have access to unlimited retention and 300+ data connectors with xref:develop:connect/about.adoc[Redpanda Connect]. +Redpanda Cloud is a complete data streaming and agentic data plane platform delivered as a fully-managed service. It provides automated upgrades and patching, data balancing, and support while continuously monitoring your clusters and underlying infrastructure to meet strict performance, availability, reliability, and security requirements. All Redpanda Cloud clusters are deployed with an integrated glossterm:Redpanda Console[], and all clusters have access to unlimited retention and 300+ data connectors with xref:develop:connect/about.adoc[Redpanda Connect]. -TIP: For more detailed information about the Redpanda platform, see xref:get-started:intro-to-events.adoc[] and xref:get-started:architecture.adoc[]. +== Redpanda Agentic Data Plane (ADP) -== Redpanda Cloud cluster types +Redpanda ADP is an enterprise-grade infrastructure for building, deploying, and managing AI agents at scale. Redpanda ADP provides unified governance, observability, and security for agentic applications while leveraging Redpanda's streaming and analytical capabilities as the foundational data fabric. -Redpanda Cloud offers three fully-managed cloud deployment options, each designed for different use cases: +NOTE: The Agentic Data Plane is supported on BYOC clusters running with AWS and Redpanda version 25.3 and later. + +Redpanda ADP is built on open standards and protocols, allowing you to pick and choose components that fit your needs. Integrate with existing agent frameworks, data processing systems, or custom code. It includes the following key components: + +* **AI agents**: Deploy declarative agents or bring your own agent frameworks (LangChain, LlamaIndex, and others). Build multi-agent systems where specialized sub-agents handle specific responsibilities, following single-responsibility principles. +* **MCP servers**: Build lightweight data and action interfaces using a low-code framework based on xref:develop:connect/about.adoc[Redpanda Connect]. Connect to hundreds of data sources (databases, cloud storage, APIs) and enforce fine-grained policies that programmatically prevent prompt injection and SQL injection attacks. MCP servers are extremely lightweight—run dozens on minimal resources—with OIDC-based access control and real-time debugging capabilities. +* **AI Gateway**: Manage LLM provider access with cost controls, rate limiting, intelligent routing, and failover support across multiple providers. +//* **Catalog**: Maintain a centralized repository of agents, MCP servers, tools, and policies. Share components across teams and enforce organization-wide standards. + +=== Enterprise capabilities for AI agents + +Redpanda ADP addresses critical enterprise requirements for AI agent deployments: + +*Security by design*: MCP servers enforce policies at the tool level, programmatically preventing prompt injection, SQL injection, and other agent-based attacks. Policy enforcement is deterministic and controlled. Agents cannot bypass security constraints even through creative prompting. + +*Unified authorization*: All components use OIDC-based authentication with an "on-behalf-of" authorization model. When a user invokes an agent, the agent inherits the intersection of its own permissions and the user's permissions, ensuring proper data access scoping. + +*Complete observability*: Redpanda ADP provides two levels of inspection: execution logs (transcripts) capture every agent action with 100% sampling using OpenTelemetry standards, while real-time debugging tools allow inspection of individual MCP server calls. Traces span across services and go as deep as needed, down to individual tool invocations with full timing data. View detailed agent actions in Redpanda Console and replay data for agent evaluations. + +*Compliance and audit*: For industries requiring multi-year audit trails, Redpanda ADP records every agent action and data source used in decision-making. Execution logs are stored in Redpanda topics and can be materialized to Iceberg tables for long-term retention and analysis. + +=== Use cases + +Organizations can use Redpanda ADP to: + +* *Automate operational workflows*: Create specialized agents for building management, infrastructure monitoring, compliance reporting, and other domain-specific tasks +* *Monitor manufacturing and operations*: Deploy multi-agent systems that analyze factory machine telemetry in real-time, detect anomalies, search equipment manuals, and create maintenance tickets automatically. Use data aggregation patterns (like tumbling windows) to process high-volume sensor data before sending insights to agents. +* *Extend enterprise productivity tools*: Integrate Microsoft Copilot or other workplace agents with internal data sources and systems that are otherwise inaccessible + +See also: xref:ai-agents:index.adoc[]. + +== Redpanda Cloud deployment options + +Redpanda Cloud applications are supported by three fully-managed deployment options: * **<>**: Fastest way to get started with automatic scaling * **<>**: Production clusters in Redpanda's cloud with enhanced isolation @@ -33,6 +66,11 @@ Redpanda Cloud offers three fully-managed cloud deployment options, each designe | Redpanda's cloud (AWS/Azure/GCP) | Your cloud account (AWS/Azure/GCP) +| *Redpanda ADP* +| ✗ +| ✗ +| ✓ + | *Tenancy* | Multi-tenant | Single-tenant @@ -59,7 +97,7 @@ Redpanda Cloud offers three fully-managed cloud deployment options, each designe | 20 (default), 32 (max) | *Private networking* -| ✗ +| ✓ | ✓ | ✓ @@ -128,8 +166,7 @@ include::get-started:partial$get-started-serverless.adoc[] === Dedicated -With Dedicated clusters, you host your data on Redpanda Cloud resources (AWS, GCP, or Azure), and Redpanda handles provisioning, operations, and maintenance. Dedicated clusters are single-tenant deployments that support private networking (for example, VPC peering to talk over private IPs) for better data isolation. -When you create a Dedicated cluster, you select the supported xref:reference:tiers/dedicated-tiers.adoc[tier] that meets your compute and storage needs. +With Dedicated clusters, you host your data on Redpanda Cloud resources (AWS, GCP, or Azure), and Redpanda handles provisioning, operations, and maintenance. When you create a Dedicated cluster, you select the supported xref:reference:tiers/dedicated-tiers.adoc[tier] that meets your compute and storage needs. ==== Sign up for Dedicated @@ -153,6 +190,8 @@ Redpanda creates a cloud organization for you and sends you a welcome email. === Bring Your Own Cloud (BYOC) +With BYOC clusters, the Redpanda data plane (including Redpanda ADP components and Redpanda brokers) deploys into your existing VPC or VNet, ensuring all data remains in your environment. + With BYOC clusters, you deploy the Redpanda glossterm:data plane[] into your existing VPC (for AWS and GCP) or VNet (for Azure), and all data is contained in your own environment. This provides an additional layer of security and isolation. (See xref:get-started:byoc-arch.adoc[].) Redpanda manages provisioning, monitoring, upgrades, and security policies, and it manages required resources in your VPC or VNet, including subnets (subnetworks in GCP), IAM roles, and object storage resources (for example, S3 buckets or Azure Storage accounts). @@ -162,6 +201,8 @@ With BYOVPC/BYOVNet clusters, you take full control of the networking lifecycle. The BYOC infrastructure that Redpanda manages should not be used to deploy any other workloads. +For details about the control plane - data plane framework in BYOC, see xref:get-started:byoc-arch.adoc[BYOC architecture]. + ==== Sign up for BYOC To start using BYOC, contact https://redpanda.com/try-redpanda?section=enterprise-trial[Redpanda sales^] to request a private offer with possible discounts. You are billed directly or through Google Cloud Marketplace or AWS Marketplace. @@ -177,7 +218,7 @@ Serverless clusters are a good fit for the following use cases: Consider BYOC or Dedicated if you need more control over the deployment or if you have workloads with consistently-high throughput. BYOC and Dedicated clusters offer the following features: -* Private networking +* Redpanda Agentic Data Plane (ADP): BYOC only * Multiple availability zones (AZs). A multi-AZ cluster provides higher resiliency in the event of a failure in one of the zones. * Role-based access control (RBAC) in the data plane * Kafka Connect @@ -187,7 +228,11 @@ Consider BYOC or Dedicated if you need more control over the deployment or if yo When you sign up for a Redpanda account, Redpanda creates an organization for you. Your organization contains all your Redpanda resources, including your clusters and networks. Within your organization, Redpanda creates a default resource group to contain your resources. You can rename this resource group, and you can create more resource groups. For example, you may want different resource groups for production and testing. -For details about the control plane - data plane framework in BYOC, see xref:get-started:byoc-arch.adoc[BYOC architecture]. +[TIP] +==== + +For more detailed information about the Redpanda platform, see xref:get-started:intro-to-events.adoc[] and xref:get-started:architecture.adoc[]. +==== == Shared responsibility model @@ -382,7 +427,6 @@ Features in limited availability are production-ready and are covered by Redpand The following features are currently in limited availability in Redpanda Cloud: -* Serverless * Dedicated for Azure == Features in beta @@ -391,14 +435,14 @@ Features in beta are available for testing and feedback. They are not covered by The following features are currently in beta in Redpanda Cloud: -* Serverless on GCP * BYOVPC for AWS * BYOVNet for Azure * Secrets Management for BYOVPC on GCP and AWS -* xref:ai-agents:index.adoc[AI agents with MCP servers] +* xref:ai-agents:index.adoc[Redpanda ADP] including AI agents, MCP servers, AI Gateway, and Transcripts * Several Redpanda Connect components == Next steps +* xref:ai-agents:index.adoc[Build AI agents with Redpanda ADP] * xref:manage:maintenance.adoc[Learn about upgrades and maintenance] * xref:get-started:cluster-types/serverless.adoc[Create a Serverless cluster] * xref:get-started:cluster-types/byoc/index.adoc[Create a BYOC cluster] diff --git a/modules/get-started/pages/whats-new-cloud.adoc b/modules/get-started/pages/whats-new-cloud.adoc index db4d78403..049eae167 100644 --- a/modules/get-started/pages/whats-new-cloud.adoc +++ b/modules/get-started/pages/whats-new-cloud.adoc @@ -6,6 +6,20 @@ This page lists new features added to Redpanda Cloud. +== February 2026 + +=== Agentic Data Plane (ADP) + +Redpanda ADP is an enterprise-grade infrastructure for building, deploying, and managing AI agents at scale. Redpanda ADP provides unified governance, observability, and security for agentic applications while leveraging Redpanda's streaming and analytical capabilities as the foundational data fabric. + +NOTE: The Agentic Data Plane is supported on BYOC clusters running with AWS and Redpanda version 25.3 and later. + +Redpanda ADP is built on open standards and protocols, allowing you to pick and choose components that fit your needs. Integrate with existing agent frameworks, data processing systems, or custom code. It includes the following key components: + +* **AI agents**: Deploy declarative agents or bring your own agent frameworks (LangChain, LlamaIndex, and others). Build multi-agent systems where specialized sub-agents handle specific responsibilities, following single-responsibility principles. +* **MCP servers**: Build lightweight data and action interfaces using a low-code framework based on xref:develop:connect/about.adoc[Redpanda Connect]. Connect to hundreds of data sources (databases, cloud storage, APIs) and enforce fine-grained policies that programmatically prevent prompt injection and SQL injection attacks. MCP servers are extremely lightweight—run dozens on minimal resources—with OIDC-based access control and real-time debugging capabilities. +* **AI Gateway**: Manage LLM provider access with cost controls, rate limiting, intelligent routing, and failover support across multiple providers. + == January 2026 === Redpanda Connect updates