From d490ce051dfb5276976dcfe5e55f5dbc6ffcb4f1 Mon Sep 17 00:00:00 2001
From: Aaron Bockelie <aaron@bockelie.com>
Date: Mon, 21 Jul 2025 08:44:20 -0500
Subject: [PATCH 1/6] feat: add pagination support for search tools and chat
 response chunking

- Add cursor-based pagination to company_search and people_profile_search tools
- Implement automatic response chunking for chat tool to handle large responses
- Add ChatResponseBuffer class for intelligent text splitting at natural boundaries
- Update all tool schemas to use proper Glean API pagination (cursor, not pageToken)
- Add comprehensive pagination tests covering all scenarios
- Update documentation with pagination examples and best practices
- Fix search formatter test to match new response format

This addresses the issue of large responses consuming excessive context window
and enables efficient handling of large result sets from Glean APIs.
---
 docs/pagination.md                            | 126 +++++++++++
 packages/local-mcp-server/README.md           |  18 +-
 packages/local-mcp-server/src/server.ts       |  35 ++-
 .../test/formatters/search-formatter.test.ts  |   2 +-
 .../local-mcp-server/src/test/server.test.ts  |   2 +-
 .../src/test/tools/chat.test.ts               |  61 ++++--
 .../src/test/tools/pagination.test.ts         | 148 +++++++++++++
 .../test/tools/people_profile_search.test.ts  |   4 +
 .../src/test/tools/search.test.ts             |  11 +
 .../src/tools/chat-response-buffer.ts         | 207 ++++++++++++++++++
 packages/local-mcp-server/src/tools/chat.ts   |  78 ++++++-
 .../src/tools/people_profile_search.ts        |  26 ++-
 packages/local-mcp-server/src/tools/search.ts |  36 ++-
 13 files changed, 712 insertions(+), 42 deletions(-)
 create mode 100644 docs/pagination.md
 create mode 100644 packages/local-mcp-server/src/test/tools/pagination.test.ts
 create mode 100644 packages/local-mcp-server/src/tools/chat-response-buffer.ts

diff --git a/docs/pagination.md b/docs/pagination.md
new file mode 100644
index 00000000..e6e2ba5d
--- /dev/null
+++ b/docs/pagination.md
@@ -0,0 +1,126 @@
+# Pagination Support in Glean MCP Server
+
+The Glean MCP Server now supports pagination for search results and chat responses, helping to manage large result sets and prevent token limit errors.
+
+## Search Pagination
+
+Both `company_search` and `people_profile_search` tools support pagination through the `cursor` parameter.
+
+### Basic Usage
+
+```json
+// First request
+{
+  "query": "Docker projects",
+  "pageSize": 20
+}
+
+// Response includes pagination info
+{
+  "results": [...],
+  "cursor": "abc123",
+  "hasMoreResults": true,
+  "totalResults": 150
+}
+
+// Next page request
+{
+  "query": "Docker projects",
+  "pageSize": 20,
+  "cursor": "abc123"
+}
+```
+
+### People Search Example
+
+```json
+// Initial search
+{
+  "query": "DevOps engineers",
+  "filters": {
+    "department": "Engineering"
+  },
+  "pageSize": 25
+}
+
+// Continue with cursor from response
+{
+  "query": "DevOps engineers", 
+  "filters": {
+    "department": "Engineering"
+  },
+  "pageSize": 25,
+  "cursor": "next-page-cursor"
+}
+```
+
+## Chat Response Chunking
+
+The chat tool automatically chunks large responses that exceed token limits (~25k tokens).
+
+### Automatic Chunking
+
+When a chat response is too large, it's automatically split into manageable chunks:
+
+```json
+// Initial chat request
+{
+  "message": "Explain all our microservices architecture"
+}
+
+// Response with chunk metadata
+{
+  "content": "... first part of response ...",
+  "_chunkMetadata": {
+    "responseId": "uuid-123",
+    "chunkIndex": 0,
+    "totalChunks": 3,
+    "hasMore": true
+  }
+}
+```
+
+### Continuing Chunked Responses
+
+To get subsequent chunks:
+
+```json
+{
+  "message": "",
+  "continueFrom": {
+    "responseId": "uuid-123",
+    "chunkIndex": 1
+  }
+}
+```
+
+## Implementation Details
+
+### Token Limits
+- Maximum tokens per response: 20,000 (safe limit below 25k)
+- Character to token ratio: ~4 characters per token
+
+### Chunking Strategy
+1. Attempts to split at paragraph boundaries (double newlines)
+2. Falls back to sentence boundaries if paragraphs are too large
+3. Force splits at character level for extremely long unbroken text
+
+### Response Format
+All paginated responses include:
+- `cursor` or `_chunkMetadata`: Pagination state
+- `hasMoreResults` or `hasMore`: Boolean indicating more data available
+- `totalResults` or `totalChunks`: Total count when available
+
+## Best Practices
+
+1. **Set appropriate page sizes**: Balance between response size and number of requests
+2. **Handle pagination in loops**: When fetching all results, continue until `hasMoreResults` is false
+3. **Store cursors**: Keep track of cursors for user sessions to allow navigation
+4. **Error handling**: Always check for continuation metadata before attempting to continue
+
+## Error Handling
+
+Common errors:
+- Invalid cursor: Returns error if cursor is expired or invalid
+- Invalid chunk index: Returns null if chunk doesn't exist
+- Missing continuation data: Normal chat response if no previous chunks exist
\ No newline at end of file
diff --git a/packages/local-mcp-server/README.md b/packages/local-mcp-server/README.md
index 5bed50c2..82e47951 100644
--- a/packages/local-mcp-server/README.md
+++ b/packages/local-mcp-server/README.md
@@ -9,30 +9,36 @@ The Glean MCP Server is a [Model Context Protocol (MCP)](https://modelcontextpro
 
 ## Features
 
-- **Company Search**: Access Glean's powerful content search capabilities
-- **People Profile Search**: Access Glean's people directory
-- **Chat**: Interact with Glean's AI assistant
+- **Company Search**: Access Glean's powerful content search capabilities with pagination support
+- **People Profile Search**: Access Glean's people directory with pagination support
+- **Chat**: Interact with Glean's AI assistant with automatic response chunking for large responses
 - **Read Documents**: Retrieve documents from Glean by ID or URL
+- **Pagination Support**: Handle large result sets efficiently with cursor-based pagination
+- **Response Chunking**: Automatically splits large chat responses to avoid token limits
 - **MCP Compliant**: Implements the Model Context Protocol specification
 
 ## Tools
 
 - ### company_search
 
-  Search Glean's content index using the Glean Search API. This tool allows you to query Glean's content index with various filtering and configuration options.
+  Search Glean's content index using the Glean Search API. This tool allows you to query Glean's content index with various filtering and configuration options. Supports pagination through cursor parameter for handling large result sets.
 
 - ### chat
 
-  Interact with Glean's AI assistant using the Glean Chat API. This tool allows you to have conversational interactions with Glean's AI, including support for message history, citations, and various configuration options.
+  Interact with Glean's AI assistant using the Glean Chat API. This tool allows you to have conversational interactions with Glean's AI, including support for message history, citations, and various configuration options. Automatically chunks large responses to avoid token limits and provides continuation support.
 
 - ### people_profile_search
 
-  Search Glean's People directory to find employee information.
+  Search Glean's People directory to find employee information. Supports pagination through cursor parameter for handling large result sets.
 
 - ### read_documents
 
   Read documents from Glean by providing document IDs or URLs. This tool allows you to retrieve the full content of specific documents for detailed analysis or reference.
 
+## Pagination
+
+For detailed information about pagination support and examples, see [Pagination Documentation](../../docs/pagination.md).
+
 ## MCP Client Configuration
 
 To configure this MCP server in your MCP client (such as Claude Desktop, Windsurf, Cursor, etc.), run [@gleanwork/configure-mcp-server](https://github.com/gleanwork/mcp-server/tree/main/packages/configure-mcp-server) passing in your client, token and instance.
diff --git a/packages/local-mcp-server/src/server.ts b/packages/local-mcp-server/src/server.ts
index 1ce5dbdd..3fd4cf12 100644
--- a/packages/local-mcp-server/src/server.ts
+++ b/packages/local-mcp-server/src/server.ts
@@ -60,12 +60,20 @@ export async function listToolsHandler() {
         name: TOOL_NAMES.companySearch,
         description: `Find relevant company documents and data
 
-        Example request:
+        Example requests:
 
+        // Basic search
         {
             "query": "What are the company holidays this year?",
             "datasources": ["drive", "confluence"]
         }
+
+        // Search with pagination
+        {
+            "query": "Docker projects", 
+            "pageSize": 20,
+            "cursor": "pagination_cursor"  // From previous response
+        }
         `,
         inputSchema: zodToJsonSchema(search.ToolSearchSchema),
       },
@@ -73,8 +81,9 @@ export async function listToolsHandler() {
         name: TOOL_NAMES.chat,
         description: `Chat with Glean Assistant using Glean's RAG
 
-        Example request:
+        Example requests:
 
+        // Basic chat
         {
             "message": "What are the company holidays this year?",
             "context": [
@@ -82,6 +91,15 @@ export async function listToolsHandler() {
                 "I'm planning my vacation for next year."
             ]
         }
+
+        // Continue from chunked response
+        {
+            "message": "",
+            "continueFrom": {
+                "responseId": "uuid-here",
+                "chunkIndex": 1
+            }
+        }
         `,
         inputSchema: zodToJsonSchema(chat.ToolChatSchema),
       },
@@ -89,17 +107,24 @@ export async function listToolsHandler() {
         name: TOOL_NAMES.peopleProfileSearch,
         description: `Search for people profiles in the company
 
-        Example request:
+        Example requests:
 
+        // Basic search
         {
             "query": "Find people named John Doe",
             "filters": {
-                "department": "Engineering",
+                "department": "Engineering", 
                 "city": "San Francisco"
             },
             "pageSize": 10
         }
 
+        // Search with pagination
+        {
+            "query": "DevOps engineers",
+            "pageSize": 25,
+            "cursor": "pagination_cursor"  // From previous response
+        }
         `,
         inputSchema: zodToJsonSchema(
           peopleProfileSearch.ToolPeopleProfileSearchSchema,
@@ -152,7 +177,7 @@ export async function callToolHandler(
       case TOOL_NAMES.chat: {
         const args = chat.ToolChatSchema.parse(request.params.arguments);
         const result = await chat.chat(args);
-        const formattedResults = chat.formatResponse(result);
+        const formattedResults = chat.formatChunkedResponse(result);
 
         return {
           content: [{ type: 'text', text: formattedResults }],
diff --git a/packages/local-mcp-server/src/test/formatters/search-formatter.test.ts b/packages/local-mcp-server/src/test/formatters/search-formatter.test.ts
index f1ab4401..fdde2a10 100644
--- a/packages/local-mcp-server/src/test/formatters/search-formatter.test.ts
+++ b/packages/local-mcp-server/src/test/formatters/search-formatter.test.ts
@@ -102,7 +102,7 @@ describe('Search Formatter', () => {
 
     const formattedResults = formatResponse(emptyResults);
     expect(formattedResults).toContain(
-      'Search results for "nonexistent term" (0 results)',
+      'Search results for "nonexistent term" (showing 0 of 0 results)',
     );
   });
 
diff --git a/packages/local-mcp-server/src/test/server.test.ts b/packages/local-mcp-server/src/test/server.test.ts
index 6c5428d5..ece9d25c 100644
--- a/packages/local-mcp-server/src/test/server.test.ts
+++ b/packages/local-mcp-server/src/test/server.test.ts
@@ -174,7 +174,7 @@ describe('MCP Server Handlers (integration)', () => {
       {
         "content": [
           {
-            "text": "Found 1 people:
+            "text": "Found 1 of 1 people:
 
       1. Jane Doe – Software Engineer, Engineering (San Francisco) • jane.doe@example.com",
             "type": "text",
diff --git a/packages/local-mcp-server/src/test/tools/chat.test.ts b/packages/local-mcp-server/src/test/tools/chat.test.ts
index 8007ed17..9ab270af 100644
--- a/packages/local-mcp-server/src/test/tools/chat.test.ts
+++ b/packages/local-mcp-server/src/test/tools/chat.test.ts
@@ -21,33 +21,50 @@ describe('Chat Tool', () => {
     it('generates correct JSON schema', () => {
       expect(zodToJsonSchema(ToolChatSchema, 'GleanChat'))
         .toMatchInlineSnapshot(`
-        {
-          "$ref": "#/definitions/GleanChat",
-          "$schema": "http://json-schema.org/draft-07/schema#",
-          "definitions": {
-            "GleanChat": {
-              "additionalProperties": false,
-              "properties": {
-                "context": {
-                  "description": "Optional previous messages for context. Will be included in order before the current message.",
-                  "items": {
+          {
+            "$ref": "#/definitions/GleanChat",
+            "$schema": "http://json-schema.org/draft-07/schema#",
+            "definitions": {
+              "GleanChat": {
+                "additionalProperties": false,
+                "properties": {
+                  "context": {
+                    "description": "Optional previous messages for context. Will be included in order before the current message.",
+                    "items": {
+                      "type": "string",
+                    },
+                    "type": "array",
+                  },
+                  "continueFrom": {
+                    "additionalProperties": false,
+                    "description": "Continue from a previous chunked response",
+                    "properties": {
+                      "chunkIndex": {
+                        "type": "number",
+                      },
+                      "responseId": {
+                        "type": "string",
+                      },
+                    },
+                    "required": [
+                      "responseId",
+                      "chunkIndex",
+                    ],
+                    "type": "object",
+                  },
+                  "message": {
+                    "description": "The user question or message to send to Glean Assistant.",
                     "type": "string",
                   },
-                  "type": "array",
-                },
-                "message": {
-                  "description": "The user question or message to send to Glean Assistant.",
-                  "type": "string",
                 },
+                "required": [
+                  "message",
+                ],
+                "type": "object",
               },
-              "required": [
-                "message",
-              ],
-              "type": "object",
             },
-          },
-        }
-      `);
+          }
+        `);
     });
   });
 
diff --git a/packages/local-mcp-server/src/test/tools/pagination.test.ts b/packages/local-mcp-server/src/test/tools/pagination.test.ts
new file mode 100644
index 00000000..1afce4fe
--- /dev/null
+++ b/packages/local-mcp-server/src/test/tools/pagination.test.ts
@@ -0,0 +1,148 @@
+import { describe, it, expect, vi, beforeEach } from 'vitest';
+import { search } from '../../tools/search.js';
+import { peopleProfileSearch } from '../../tools/people_profile_search.js';
+import { chat } from '../../tools/chat.js';
+import { chatResponseBuffer } from '../../tools/chat-response-buffer.js';
+import { getClient } from '../../common/client.js';
+
+vi.mock('../../common/client.js');
+
+describe('Pagination Tests', () => {
+  beforeEach(() => {
+    vi.clearAllMocks();
+  });
+
+  describe('Search Pagination', () => {
+    it('should handle pagination cursor in search requests', async () => {
+      const mockClient = {
+        search: {
+          query: vi.fn().mockResolvedValue({
+            results: [
+              { title: 'Result 1', url: 'http://example1.com' },
+              { title: 'Result 2', url: 'http://example2.com' },
+            ],
+            cursor: 'next-page-cursor',
+            hasMoreResults: true,
+            totalResults: 50,
+            metadata: { searchedQuery: 'test query' },
+          }),
+        },
+      };
+
+      vi.mocked(getClient).mockResolvedValue(mockClient as any);
+
+      const result = await search({
+        query: 'test query',
+        pageSize: 2,
+        cursor: 'initial-cursor',
+      });
+
+      expect(mockClient.search.query).toHaveBeenCalledWith({
+        query: 'test query',
+        pageSize: 2,
+        cursor: 'initial-cursor',
+      });
+
+      expect(result.cursor).toBe('next-page-cursor');
+      expect(result.hasMoreResults).toBe(true);
+    });
+  });
+
+  describe('People Search Pagination', () => {
+    it('should handle pagination cursor in people search requests', async () => {
+      const mockClient = {
+        entities: {
+          list: vi.fn().mockResolvedValue({
+            results: [
+              { name: 'Person 1', metadata: { email: 'person1@example.com' } },
+              { name: 'Person 2', metadata: { email: 'person2@example.com' } },
+            ],
+            cursor: 'people-next-cursor',
+            hasMoreResults: true,
+            totalCount: 100,
+          }),
+        },
+      };
+
+      vi.mocked(getClient).mockResolvedValue(mockClient as any);
+
+      const result = await peopleProfileSearch({
+        query: 'engineers',
+        pageSize: 2,
+        cursor: 'people-cursor',
+      });
+
+      expect(mockClient.entities.list).toHaveBeenCalledWith(
+        expect.objectContaining({
+          cursor: 'people-cursor',
+          pageSize: 2,
+        })
+      );
+
+      expect(result.cursor).toBe('people-next-cursor');
+      expect(result.hasMoreResults).toBe(true);
+    });
+  });
+
+  describe('Chat Response Chunking', () => {
+    it('should chunk large chat responses', async () => {
+      // Create a large response that exceeds the token limit
+      const largeText = 'This is a test paragraph. '.repeat(5000); // ~25k chars = ~6.25k tokens
+      
+      const chunked = await chatResponseBuffer.processResponse(largeText);
+      
+      expect(chunked.metadata).toBeDefined();
+      expect(chunked.metadata?.totalChunks).toBeGreaterThan(1);
+      expect(chunked.metadata?.hasMore).toBe(true);
+      expect(chunked.content.length).toBeLessThan(largeText.length);
+    });
+
+    it('should retrieve subsequent chunks', async () => {
+      const largeText = 'This is a test paragraph. '.repeat(5000);
+      
+      const initial = await chatResponseBuffer.processResponse(largeText);
+      const responseId = initial.metadata!.responseId;
+      
+      // Get second chunk
+      const chunk2 = chatResponseBuffer.getChunk(responseId, 1);
+      
+      expect(chunk2).toBeDefined();
+      expect(chunk2?.metadata?.chunkIndex).toBe(1);
+      expect(chunk2?.content).toBeTruthy();
+    });
+
+    it('should handle chat continuation requests', async () => {
+      const mockClient = {
+        chat: {
+          create: vi.fn().mockResolvedValue({
+            messages: [{ text: 'Response' }],
+          }),
+        },
+      };
+
+      vi.mocked(getClient).mockResolvedValue(mockClient as any);
+
+      // First, make a regular chat request
+      await chat({
+        message: 'Hello',
+      });
+
+      // Create a large response manually
+      const largeText = 'This is a test paragraph. '.repeat(5000);
+      const chunked = await chatResponseBuffer.processResponse(largeText);
+      const responseId = chunked.metadata!.responseId;
+
+      // Now test continuation
+      const continued = await chat({
+        message: '',
+        continueFrom: {
+          responseId,
+          chunkIndex: 1,
+        },
+      });
+
+      expect(continued.content).toBeTruthy();
+      expect(continued.metadata?.chunkIndex).toBe(1);
+    });
+  });
+});
\ No newline at end of file
diff --git a/packages/local-mcp-server/src/test/tools/people_profile_search.test.ts b/packages/local-mcp-server/src/test/tools/people_profile_search.test.ts
index 659611fe..71bb2124 100644
--- a/packages/local-mcp-server/src/test/tools/people_profile_search.test.ts
+++ b/packages/local-mcp-server/src/test/tools/people_profile_search.test.ts
@@ -31,6 +31,10 @@ describe('People Profile Search Tool', () => {
             "PeopleProfileSearch": {
               "additionalProperties": false,
               "properties": {
+                "cursor": {
+                  "description": "Pagination cursor from previous response to fetch next page",
+                  "type": "string",
+                },
                 "filters": {
                   "additionalProperties": {
                     "type": "string",
diff --git a/packages/local-mcp-server/src/test/tools/search.test.ts b/packages/local-mcp-server/src/test/tools/search.test.ts
index d9b22ada..03477cb9 100644
--- a/packages/local-mcp-server/src/test/tools/search.test.ts
+++ b/packages/local-mcp-server/src/test/tools/search.test.ts
@@ -28,6 +28,10 @@ describe('Search Tool', () => {
               "GleanSearch": {
                 "additionalProperties": false,
                 "properties": {
+                  "cursor": {
+                    "description": "Pagination cursor from previous response to fetch next page",
+                    "type": "string",
+                  },
                   "datasources": {
                     "description": "Optional list of data sources to search in. Examples: "github", "gdrive", "confluence", "jira".",
                     "items": {
@@ -35,6 +39,13 @@ describe('Search Tool', () => {
                     },
                     "type": "array",
                   },
+                  "pageSize": {
+                    "default": 10,
+                    "description": "Number of results to return per page (default: 10, max: 100)",
+                    "maximum": 100,
+                    "minimum": 1,
+                    "type": "number",
+                  },
                   "query": {
                     "description": "The search query. This is what you want to search for.",
                     "type": "string",
diff --git a/packages/local-mcp-server/src/tools/chat-response-buffer.ts b/packages/local-mcp-server/src/tools/chat-response-buffer.ts
new file mode 100644
index 00000000..cad9b766
--- /dev/null
+++ b/packages/local-mcp-server/src/tools/chat-response-buffer.ts
@@ -0,0 +1,207 @@
+/**
+ * @fileoverview Chat response buffer for handling large responses that exceed token limits.
+ *
+ * This module provides intelligent chunking of chat responses to stay within token limits
+ * while maintaining readability by splitting at natural boundaries.
+ *
+ * @module tools/chat-response-buffer
+ */
+
+import { randomUUID } from 'crypto';
+
+export interface ChatChunkMetadata {
+  chunkIndex: number;
+  totalChunks: number;
+  responseId: string;
+  hasMore: boolean;
+}
+
+export interface ChunkedResponse {
+  content: string;
+  metadata?: ChatChunkMetadata;
+}
+
+/**
+ * Manages chunking of large chat responses to avoid token limit errors.
+ */
+export class ChatResponseBuffer {
+  private static readonly MAX_TOKENS = 20000; // Safe limit below 25k
+  private static readonly CHARS_PER_TOKEN = 4; // Rough estimation
+  private responses = new Map<string, string[]>();
+
+  /**
+   * Process a chat response, chunking it if necessary.
+   *
+   * @param response The full response text
+   * @param responseId Optional ID for continuation support
+   * @returns The first chunk and metadata if chunked
+   */
+  async processResponse(
+    response: string,
+    responseId?: string,
+  ): Promise<ChunkedResponse> {
+    // If response is small enough, return as-is
+    if (this.estimateTokens(response) <= ChatResponseBuffer.MAX_TOKENS) {
+      return { content: response };
+    }
+
+    // Generate responseId if not provided
+    const id = responseId || randomUUID();
+
+    // Split response intelligently
+    const chunks = this.splitResponse(response);
+    this.responses.set(id, chunks);
+
+    return {
+      content: chunks[0],
+      metadata: {
+        chunkIndex: 0,
+        totalChunks: chunks.length,
+        responseId: id,
+        hasMore: chunks.length > 1,
+      },
+    };
+  }
+
+  /**
+   * Get a specific chunk from a previously chunked response.
+   *
+   * @param responseId The response ID
+   * @param chunkIndex The chunk index to retrieve
+   * @returns The requested chunk and metadata
+   */
+  getChunk(responseId: string, chunkIndex: number): ChunkedResponse | null {
+    const chunks = this.responses.get(responseId);
+    if (!chunks || chunkIndex >= chunks.length || chunkIndex < 0) {
+      return null;
+    }
+
+    return {
+      content: chunks[chunkIndex],
+      metadata: {
+        chunkIndex,
+        totalChunks: chunks.length,
+        responseId,
+        hasMore: chunkIndex < chunks.length - 1,
+      },
+    };
+  }
+
+  /**
+   * Split a response into chunks at natural boundaries.
+   *
+   * @param response The full response text
+   * @returns Array of chunks
+   */
+  private splitResponse(response: string): string[] {
+    const chunks: string[] = [];
+    let currentChunk = '';
+    let currentTokens = 0;
+
+    // First try to split by double newlines (paragraphs)
+    const paragraphs = response.split('\n\n');
+
+    for (const paragraph of paragraphs) {
+      const paragraphTokens = this.estimateTokens(paragraph);
+
+      if (currentTokens + paragraphTokens > ChatResponseBuffer.MAX_TOKENS) {
+        if (currentChunk) {
+          chunks.push(currentChunk.trim());
+          currentChunk = paragraph;
+          currentTokens = paragraphTokens;
+        } else {
+          // Single paragraph exceeds limit, split by sentences
+          chunks.push(...this.splitLargeParagraph(paragraph));
+        }
+      } else {
+        currentChunk += (currentChunk ? '\n\n' : '') + paragraph;
+        currentTokens += paragraphTokens;
+      }
+    }
+
+    if (currentChunk) {
+      chunks.push(currentChunk.trim());
+    }
+
+    return chunks;
+  }
+
+  /**
+   * Split a large paragraph by sentences.
+   *
+   * @param paragraph The paragraph to split
+   * @returns Array of chunks
+   */
+  private splitLargeParagraph(paragraph: string): string[] {
+    const chunks: string[] = [];
+    let currentChunk = '';
+    let currentTokens = 0;
+
+    // Split by sentence endings (. ! ?)
+    const sentences = paragraph.match(/[^.!?]+[.!?]+/g) || [paragraph];
+
+    for (const sentence of sentences) {
+      const sentenceTokens = this.estimateTokens(sentence);
+
+      if (currentTokens + sentenceTokens > ChatResponseBuffer.MAX_TOKENS) {
+        if (currentChunk) {
+          chunks.push(currentChunk.trim());
+          currentChunk = sentence;
+          currentTokens = sentenceTokens;
+        } else {
+          // Single sentence exceeds limit, force split
+          chunks.push(...this.forceSplit(sentence));
+        }
+      } else {
+        currentChunk += (currentChunk ? ' ' : '') + sentence;
+        currentTokens += sentenceTokens;
+      }
+    }
+
+    if (currentChunk) {
+      chunks.push(currentChunk.trim());
+    }
+
+    return chunks;
+  }
+
+  /**
+   * Force split text that can't be split naturally.
+   *
+   * @param text The text to force split
+   * @returns Array of chunks
+   */
+  private forceSplit(text: string): string[] {
+    const maxChars = ChatResponseBuffer.MAX_TOKENS * ChatResponseBuffer.CHARS_PER_TOKEN;
+    const chunks: string[] = [];
+
+    for (let i = 0; i < text.length; i += maxChars) {
+      chunks.push(text.slice(i, i + maxChars));
+    }
+
+    return chunks;
+  }
+
+  /**
+   * Estimate the number of tokens in a text.
+   *
+   * @param text The text to estimate
+   * @returns Estimated token count
+   */
+  private estimateTokens(text: string): number {
+    // Rough estimation: 1 token ≈ 4 characters
+    return Math.ceil(text.length / ChatResponseBuffer.CHARS_PER_TOKEN);
+  }
+
+  /**
+   * Clean up stored chunks after a certain time.
+   *
+   * @param responseId The response ID to clean up
+   */
+  cleanup(responseId: string): void {
+    this.responses.delete(responseId);
+  }
+}
+
+// Export singleton instance
+export const chatResponseBuffer = new ChatResponseBuffer();
\ No newline at end of file
diff --git a/packages/local-mcp-server/src/tools/chat.ts b/packages/local-mcp-server/src/tools/chat.ts
index d75ab337..c66ada67 100644
--- a/packages/local-mcp-server/src/tools/chat.ts
+++ b/packages/local-mcp-server/src/tools/chat.ts
@@ -6,6 +6,7 @@ import {
   MessageType,
 } from '@gleanwork/api-client/models/components';
 import { Author } from '@gleanwork/api-client/models/components';
+import { chatResponseBuffer } from './chat-response-buffer.js';
 
 /**
  * Simplified schema for Glean chat requests designed for LLM interaction
@@ -21,6 +22,14 @@ export const ToolChatSchema = z.object({
       'Optional previous messages for context. Will be included in order before the current message.',
     )
     .optional(),
+
+  continueFrom: z
+    .object({
+      responseId: z.string(),
+      chunkIndex: z.number(),
+    })
+    .describe('Continue from a previous chunked response')
+    .optional(),
 });
 
 export type ToolChatRequest = z.infer<typeof ToolChatSchema>;
@@ -59,15 +68,41 @@ function convertToAPIChatRequest(input: ToolChatRequest) {
  * Initiates or continues a chat conversation with Glean's AI.
  *
  * @param params The chat parameters using the simplified schema
- * @returns The chat response
+ * @returns The chat response with automatic chunking if needed
  * @throws If the chat request fails
  */
 export async function chat(params: ToolChatRequest) {
+  // Handle continuation requests
+  if (params.continueFrom) {
+    const chunk = chatResponseBuffer.getChunk(
+      params.continueFrom.responseId,
+      params.continueFrom.chunkIndex
+    );
+    
+    if (!chunk) {
+      throw new Error('Invalid continuation request: chunk not found');
+    }
+    
+    return chunk;
+  }
+
+  // Normal chat request
   const mappedParams = convertToAPIChatRequest(params);
   const parsedParams = ChatRequestSchema.parse(mappedParams);
   const client = await getClient();
 
-  return await client.chat.create(parsedParams);
+  const response = await client.chat.create(parsedParams);
+  
+  // Format and chunk the response if needed
+  const formattedResponse = formatResponse(response);
+  const chunked = await chatResponseBuffer.processResponse(formattedResponse);
+  
+  // Return the response with chunk metadata if applicable
+  return {
+    ...response,
+    _formatted: chunked.content,
+    _chunkMetadata: chunked.metadata,
+  };
 }
 
 /**
@@ -154,3 +189,42 @@ export function formatResponse(chatResponse: any): string {
 
   return formattedMessages;
 }
+
+/**
+ * Formats a chunked response for display, including metadata about chunks.
+ *
+ * @param response The response object with potential chunk metadata
+ * @returns Formatted response with chunk information if applicable
+ */
+export function formatChunkedResponse(response: any): string {
+  // Handle continuation chunks
+  if (response.content && response.metadata) {
+    const { chunkIndex, totalChunks, hasMore } = response.metadata;
+    let result = response.content;
+    
+    if (hasMore) {
+      result += `\n\n---\n[Chunk ${chunkIndex + 1} of ${totalChunks}] `;
+      result += `To continue, use continueFrom: { responseId: "${response.metadata.responseId}", chunkIndex: ${chunkIndex + 1} }`;
+    }
+    
+    return result;
+  }
+  
+  // Handle initial chunked response
+  if (response._formatted) {
+    let result = response._formatted;
+    
+    if (response._chunkMetadata) {
+      const { totalChunks, hasMore, responseId } = response._chunkMetadata;
+      if (hasMore) {
+        result += `\n\n---\n[Chunk 1 of ${totalChunks}] `;
+        result += `To continue, use continueFrom: { responseId: "${responseId}", chunkIndex: 1 }`;
+      }
+    }
+    
+    return result;
+  }
+  
+  // Fall back to standard formatting
+  return formatResponse(response);
+}
diff --git a/packages/local-mcp-server/src/tools/people_profile_search.ts b/packages/local-mcp-server/src/tools/people_profile_search.ts
index bce1185e..9abd1c1a 100644
--- a/packages/local-mcp-server/src/tools/people_profile_search.ts
+++ b/packages/local-mcp-server/src/tools/people_profile_search.ts
@@ -59,6 +59,11 @@ export const ToolPeopleProfileSearchSchema = z
         'Hint to the server for how many people to return (1-100, default 10).',
       )
       .optional(),
+
+    cursor: z
+      .string()
+      .describe('Pagination cursor from previous response to fetch next page')
+      .optional(),
   })
   .refine(
     (val) => val.query || (val.filters && Object.keys(val.filters).length > 0),
@@ -79,7 +84,7 @@ export type ToolPeopleProfileSearchRequest = z.infer<
  * @returns The Glean API compatible request
  */
 function convertToAPIEntitiesRequest(input: ToolPeopleProfileSearchRequest) {
-  const { query, filters = {}, pageSize } = input;
+  const { query, filters = {}, pageSize, cursor } = input;
 
   const request: ListEntitiesRequest = {
     entityType: ListEntitiesRequestEntityType.People,
@@ -90,6 +95,11 @@ function convertToAPIEntitiesRequest(input: ToolPeopleProfileSearchRequest) {
     request.query = query;
   }
 
+  // Add pagination cursor if provided
+  if (cursor) {
+    request.cursor = cursor;
+  }
+
   const filterKeys = Object.keys(filters) as Array<keyof typeof filters>;
   if (filterKeys.length > 0) {
     request.filter = filterKeys.map((fieldName) => {
@@ -175,6 +185,18 @@ export function formatResponse(searchResults: any): string {
     typeof searchResults.totalCount === 'number'
       ? searchResults.totalCount
       : searchResults.results.length;
+  const resultsShown = searchResults.results.length;
+
+  // Add pagination info to response
+  let paginationInfo = '';
+  if (searchResults.hasMoreResults) {
+    paginationInfo = '\n\n---\nMore results available. ';
+    if (searchResults.cursor) {
+      paginationInfo += `Use cursor="${searchResults.cursor}" to fetch the next page.`;
+    } else {
+      paginationInfo += 'Additional pages may be available.';
+    }
+  }
 
-  return `Found ${total} people:\n\n${formatted}`;
+  return `Found ${resultsShown} of ${total} people:\n\n${formatted}${paginationInfo}`;
 }
diff --git a/packages/local-mcp-server/src/tools/search.ts b/packages/local-mcp-server/src/tools/search.ts
index fa0d6826..a3dce1c2 100644
--- a/packages/local-mcp-server/src/tools/search.ts
+++ b/packages/local-mcp-server/src/tools/search.ts
@@ -29,6 +29,19 @@ export const ToolSearchSchema = z.object({
       'Optional list of data sources to search in. Examples: "github", "gdrive", "confluence", "jira".',
     )
     .optional(),
+
+  pageSize: z
+    .number()
+    .min(1)
+    .max(100)
+    .default(10)
+    .describe('Number of results to return per page (default: 10, max: 100)')
+    .optional(),
+
+  cursor: z
+    .string()
+    .describe('Pagination cursor from previous response to fetch next page')
+    .optional(),
 });
 
 export type ToolSearchRequest = z.infer<typeof ToolSearchSchema>;
@@ -40,13 +53,18 @@ export type ToolSearchRequest = z.infer<typeof ToolSearchSchema>;
  * @returns Glean API compatible search request
  */
 function convertToAPISearchRequest(input: ToolSearchRequest) {
-  const { query, datasources } = input;
+  const { query, datasources, pageSize, cursor } = input;
 
   const searchRequest: SearchRequest = {
     query,
-    pageSize: 10,
+    pageSize: pageSize || 10,
   };
 
+  // Add pagination cursor if provided
+  if (cursor) {
+    searchRequest.cursor = cursor;
+  }
+
   if (datasources && datasources.length > 0) {
     searchRequest.requestOptions = {
       datasourcesFilter: datasources,
@@ -120,6 +138,18 @@ export function formatResponse(searchResults: any): string {
   const totalResults =
     searchResults.totalResults || searchResults.results.length;
   const query = searchResults.metadata.searchedQuery || 'your query';
+  const resultsShown = searchResults.results.length;
+
+  // Add pagination info to response
+  let paginationInfo = '';
+  if (searchResults.hasMoreResults) {
+    paginationInfo = '\n\n---\nMore results available. ';
+    if (searchResults.cursor) {
+      paginationInfo += `Use cursor="${searchResults.cursor}" to fetch the next page.`;
+    } else {
+      paginationInfo += 'Additional pages may be available.';
+    }
+  }
 
-  return `Search results for "${query}" (${totalResults} results):\n\n${formattedResults}`;
+  return `Search results for "${query}" (showing ${resultsShown} of ${totalResults} results):\n\n${formattedResults}${paginationInfo}`;
 }

From 82b409215bf44dee9ba291a445c59585c20cd035 Mon Sep 17 00:00:00 2001
From: Aaron Bockelie <aaron@bockelie.com>
Date: Mon, 21 Jul 2025 09:17:12 -0500
Subject: [PATCH 2/6] fix: adjust token limits for chat response chunking

- Reduce MAX_TOKENS from 20000 to 15000 for more buffer
- Change CHARS_PER_TOKEN from 4 to 3 for more conservative estimation
- Prevents token limit errors on very large chat responses
---
 .gitignore                                    |  1 +
 packages/local-mcp-server/README.md           | 45 ++++++++++++++++++-
 packages/local-mcp-server/src/index.ts        |  1 +
 .../src/test/tools/pagination.test.ts         |  5 ++-
 .../src/tools/chat-response-buffer.ts         |  4 +-
 packages/mcp-server-utils/src/config/index.ts |  2 +-
 6 files changed, 51 insertions(+), 7 deletions(-)

diff --git a/.gitignore b/.gitignore
index 8269f720..e788e335 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,3 +3,4 @@ build
 sandbox
 sand\ box
 debug.log
+.env
diff --git a/packages/local-mcp-server/README.md b/packages/local-mcp-server/README.md
index 82e47951..0da3fe57 100644
--- a/packages/local-mcp-server/README.md
+++ b/packages/local-mcp-server/README.md
@@ -64,7 +64,7 @@ To manually configure an MCP client (such as Claude Desktop, Windsurf, Cursor, e
       "command": "npx",
       "args": ["-y", "@gleanwork/local-mcp-server"],
       "env": {
-        "GLEAN_INSTANCE": "<glean instance name>",
+        "GLEAN_SERVER_INSTANCE": "<your server URL from Glean admin panel>",
         "GLEAN_API_TOKEN": "<glean api token>"
       }
     }
@@ -72,7 +72,48 @@ To manually configure an MCP client (such as Claude Desktop, Windsurf, Cursor, e
 }
 ```
 
-Replace the environment variable values with your actual Glean credentials.
+Example values:
+- `GLEAN_SERVER_INSTANCE`: `https://acme-corp-be.glean.com/` (copy from your Glean admin panel)
+- `GLEAN_API_TOKEN`: Your API token from Glean settings
+
+Alternative configuration (legacy - note that `-be` is automatically appended):
+```json
+"env": {
+  "GLEAN_INSTANCE": "acme-corp",  // becomes https://acme-corp-be.glean.com/
+  "GLEAN_API_TOKEN": "<glean api token>"
+}
+```
+
+### Local Development
+
+For local development, you can use a `.env` file to store your credentials:
+
+1. Create a `.env` file in the package root:
+```bash
+# .env
+GLEAN_SERVER_INSTANCE=https://your-company-be.glean.com/
+GLEAN_API_TOKEN=your_api_token_here
+```
+
+2. Run the server locally:
+```bash
+npm run build
+node build/index.js
+```
+
+3. For use with MCP clients during development:
+```json
+{
+  "mcpServers": {
+    "glean-dev": {
+      "command": "node",
+      "args": ["/path/to/packages/local-mcp-server/build/index.js"]
+    }
+  }
+}
+```
+
+The server will automatically load environment variables from the `.env` file.
 
 ### Debugging
 
diff --git a/packages/local-mcp-server/src/index.ts b/packages/local-mcp-server/src/index.ts
index 32ee8a46..5c48aafe 100644
--- a/packages/local-mcp-server/src/index.ts
+++ b/packages/local-mcp-server/src/index.ts
@@ -8,6 +8,7 @@
  * @module @gleanwork/local-mcp-server
  */
 
+import 'dotenv/config';
 import meow from 'meow';
 import { runServer } from './server.js';
 import { Logger, trace, LogLevel } from '@gleanwork/mcp-server-utils/logger';
diff --git a/packages/local-mcp-server/src/test/tools/pagination.test.ts b/packages/local-mcp-server/src/test/tools/pagination.test.ts
index 1afce4fe..cc169ca8 100644
--- a/packages/local-mcp-server/src/test/tools/pagination.test.ts
+++ b/packages/local-mcp-server/src/test/tools/pagination.test.ts
@@ -141,8 +141,9 @@ describe('Pagination Tests', () => {
         },
       });
 
-      expect(continued.content).toBeTruthy();
-      expect(continued.metadata?.chunkIndex).toBe(1);
+      expect(continued).toBeTruthy();
+      expect('content' in continued && continued.content).toBeTruthy();
+      expect('metadata' in continued && continued.metadata?.chunkIndex).toBe(1);
     });
   });
 });
\ No newline at end of file
diff --git a/packages/local-mcp-server/src/tools/chat-response-buffer.ts b/packages/local-mcp-server/src/tools/chat-response-buffer.ts
index cad9b766..c4a6be63 100644
--- a/packages/local-mcp-server/src/tools/chat-response-buffer.ts
+++ b/packages/local-mcp-server/src/tools/chat-response-buffer.ts
@@ -25,8 +25,8 @@ export interface ChunkedResponse {
  * Manages chunking of large chat responses to avoid token limit errors.
  */
 export class ChatResponseBuffer {
-  private static readonly MAX_TOKENS = 20000; // Safe limit below 25k
-  private static readonly CHARS_PER_TOKEN = 4; // Rough estimation
+  private static readonly MAX_TOKENS = 15000; // Safe limit below 25k with buffer
+  private static readonly CHARS_PER_TOKEN = 3; // More conservative estimation
   private responses = new Map<string, string[]>();
 
   /**
diff --git a/packages/mcp-server-utils/src/config/index.ts b/packages/mcp-server-utils/src/config/index.ts
index 47e32c30..e21b4893 100644
--- a/packages/mcp-server-utils/src/config/index.ts
+++ b/packages/mcp-server-utils/src/config/index.ts
@@ -135,7 +135,7 @@ export async function getConfig<T extends GetConfigOptions = GetConfigOptions>(
 
 function getLocalConfig(): GleanConfig {
   const instance = process.env.GLEAN_INSTANCE || process.env.GLEAN_SUBDOMAIN;
-  const baseUrl = process.env.GLEAN_BASE_URL;
+  const baseUrl = process.env.GLEAN_BASE_URL || process.env.GLEAN_SERVER_INSTANCE;
   const token = process.env.GLEAN_API_TOKEN;
   const actAs = process.env.GLEAN_ACT_AS;
   const issuer = process.env.GLEAN_OAUTH_ISSUER;

From 5f79391ce7419002904d3e7145d69af8774384eb Mon Sep 17 00:00:00 2001
From: Aaron Bockelie <aaron@bockelie.com>
Date: Mon, 21 Jul 2025 09:22:42 -0500
Subject: [PATCH 3/6] docs: add .env.example for easier local development setup

- Add comprehensive .env.example with all configuration options
- Update README to reference the example file
- Includes both new GLEAN_SERVER_INSTANCE and legacy options
---
 .env.example                        | 15 +++++++++++++++
 packages/local-mcp-server/README.md |  9 +++++++--
 2 files changed, 22 insertions(+), 2 deletions(-)
 create mode 100644 .env.example

diff --git a/.env.example b/.env.example
new file mode 100644
index 00000000..8fd63fb9
--- /dev/null
+++ b/.env.example
@@ -0,0 +1,15 @@
+# Glean MCP Server Configuration
+# Copy this file to .env and fill in your values
+
+# Your Glean server instance URL (copy from your Glean admin panel)
+GLEAN_SERVER_INSTANCE=https://your-company-be.glean.com/
+
+# Your Glean API token (generate from Glean settings)
+GLEAN_API_TOKEN=your_api_token_here
+
+# Optional: User to impersonate (only valid with global tokens)
+# GLEAN_ACT_AS=user@company.com
+
+# Alternative configuration (legacy):
+# GLEAN_INSTANCE=your-company  # Note: -be is automatically appended
+# GLEAN_BASE_URL=https://your-company-be.glean.com/
\ No newline at end of file
diff --git a/packages/local-mcp-server/README.md b/packages/local-mcp-server/README.md
index 0da3fe57..2dfd2eba 100644
--- a/packages/local-mcp-server/README.md
+++ b/packages/local-mcp-server/README.md
@@ -88,14 +88,19 @@ Alternative configuration (legacy - note that `-be` is automatically appended):
 
 For local development, you can use a `.env` file to store your credentials:
 
-1. Create a `.env` file in the package root:
+1. Copy the example environment file:
+```bash
+cp ../../.env.example ../../.env
+```
+
+2. Edit `.env` with your values:
 ```bash
 # .env
 GLEAN_SERVER_INSTANCE=https://your-company-be.glean.com/
 GLEAN_API_TOKEN=your_api_token_here
 ```
 
-2. Run the server locally:
+3. Run the server locally:
 ```bash
 npm run build
 node build/index.js

From 31fdfdf3457a06f006f0b9a8e0f068318e6c089f Mon Sep 17 00:00:00 2001
From: Aaron Bockelie <aaron@bockelie.com>
Date: Mon, 21 Jul 2025 12:20:46 -0500
Subject: [PATCH 4/6] feat: address PR review feedback

- Add comprehensive unit tests for ChatResponseBuffer class with 15 test cases
- Replace all 'any' types with proper TypeScript types from @gleanwork/api-client
- Rename GLEAN_SERVER_INSTANCE to GLEAN_SERVER_URL for API consistency
- Update all documentation and examples to use new env var name
- Add type guards for safe type discrimination in formatChunkedResponse
- Fix test files to use Author/MessageType enums instead of string literals

Co-Authored-By: Claude <noreply@anthropic.com>
---
 .env.example                                  |   4 +-
 packages/configure-mcp-server/src/index.ts    |   4 +-
 packages/local-mcp-server/README.md           |   6 +-
 .../test/formatters/chat-formatter.test.ts    |  31 +--
 .../test/tools/chat-response-buffer.test.ts   | 189 ++++++++++++++++++
 packages/local-mcp-server/src/tools/chat.ts   |  93 ++++++---
 packages/mcp-server-utils/src/config/index.ts |   2 +-
 7 files changed, 283 insertions(+), 46 deletions(-)
 create mode 100644 packages/local-mcp-server/src/test/tools/chat-response-buffer.test.ts

diff --git a/.env.example b/.env.example
index 8fd63fb9..4ebd8e81 100644
--- a/.env.example
+++ b/.env.example
@@ -1,8 +1,8 @@
 # Glean MCP Server Configuration
 # Copy this file to .env and fill in your values
 
-# Your Glean server instance URL (copy from your Glean admin panel)
-GLEAN_SERVER_INSTANCE=https://your-company-be.glean.com/
+# Your Glean server URL (copy from your Glean admin panel)
+GLEAN_SERVER_URL=https://your-company-be.glean.com/
 
 # Your Glean API token (generate from Glean settings)
 GLEAN_API_TOKEN=your_api_token_here
diff --git a/packages/configure-mcp-server/src/index.ts b/packages/configure-mcp-server/src/index.ts
index d9f7dba5..b9cbcc60 100644
--- a/packages/configure-mcp-server/src/index.ts
+++ b/packages/configure-mcp-server/src/index.ts
@@ -33,7 +33,7 @@ import {
   forceRefreshTokens,
   setupMcpRemote,
 } from '@gleanwork/mcp-server-utils/auth';
-import { chat, formatResponse } from '@gleanwork/local-mcp-server/tools/chat';
+import { chat, formatChunkedResponse } from '@gleanwork/local-mcp-server/tools/chat';
 import { VERSION } from './common/version.js';
 import { checkAndOpenLaunchWarning } from '@gleanwork/mcp-server-utils/util';
 
@@ -277,7 +277,7 @@ connect after configuration.
     case 'auth-test': {
       try {
         const chatResponse = await chat({ message: 'Who am I?' });
-        trace('auth-test search', formatResponse(chatResponse));
+        trace('auth-test search', formatChunkedResponse(chatResponse));
         console.log('Access token accepted.');
       } catch (err: any) {
         error('auth-test error', err);
diff --git a/packages/local-mcp-server/README.md b/packages/local-mcp-server/README.md
index 2dfd2eba..04fa2ffb 100644
--- a/packages/local-mcp-server/README.md
+++ b/packages/local-mcp-server/README.md
@@ -64,7 +64,7 @@ To manually configure an MCP client (such as Claude Desktop, Windsurf, Cursor, e
       "command": "npx",
       "args": ["-y", "@gleanwork/local-mcp-server"],
       "env": {
-        "GLEAN_SERVER_INSTANCE": "<your server URL from Glean admin panel>",
+        "GLEAN_SERVER_URL": "<your server URL from Glean admin panel>",
         "GLEAN_API_TOKEN": "<glean api token>"
       }
     }
@@ -73,7 +73,7 @@ To manually configure an MCP client (such as Claude Desktop, Windsurf, Cursor, e
 ```
 
 Example values:
-- `GLEAN_SERVER_INSTANCE`: `https://acme-corp-be.glean.com/` (copy from your Glean admin panel)
+- `GLEAN_SERVER_URL`: `https://acme-corp-be.glean.com/` (copy from your Glean admin panel)
 - `GLEAN_API_TOKEN`: Your API token from Glean settings
 
 Alternative configuration (legacy - note that `-be` is automatically appended):
@@ -96,7 +96,7 @@ cp ../../.env.example ../../.env
 2. Edit `.env` with your values:
 ```bash
 # .env
-GLEAN_SERVER_INSTANCE=https://your-company-be.glean.com/
+GLEAN_SERVER_URL=https://your-company-be.glean.com/
 GLEAN_API_TOKEN=your_api_token_here
 ```
 
diff --git a/packages/local-mcp-server/src/test/formatters/chat-formatter.test.ts b/packages/local-mcp-server/src/test/formatters/chat-formatter.test.ts
index 11afe3f7..6b74d623 100644
--- a/packages/local-mcp-server/src/test/formatters/chat-formatter.test.ts
+++ b/packages/local-mcp-server/src/test/formatters/chat-formatter.test.ts
@@ -1,12 +1,13 @@
 import { describe, it, expect } from 'vitest';
 import { formatResponse } from '../../tools/chat.js';
+import { Author, MessageType } from '@gleanwork/api-client/models/components';
 
 describe('Chat Formatter', () => {
   it('should format chat responses correctly', () => {
     const mockChatResponse = {
       messages: [
         {
-          author: 'USER',
+          author: Author.User,
           fragments: [
             {
               text: 'What is Glean?',
@@ -15,7 +16,7 @@ describe('Chat Formatter', () => {
           messageId: 'user-msg-1',
         },
         {
-          author: 'GLEAN_AI',
+          author: Author.GleanAi,
           fragments: [
             {
               text: 'Glean is an AI platform for work that helps organizations find and understand information. It provides enterprise search, AI assistants, and agent capabilities.',
@@ -36,7 +37,7 @@ describe('Chat Formatter', () => {
             },
           ],
           messageId: 'assistant-msg-1',
-          messageType: 'UPDATE',
+          messageType: MessageType.Update,
           stepId: 'RESPOND',
         },
       ],
@@ -62,7 +63,7 @@ describe('Chat Formatter', () => {
     const mockChatResponse = {
       messages: [
         {
-          author: 'GLEAN_AI',
+          author: Author.GleanAi,
           fragments: [
             {
               querySuggestion: {
@@ -72,7 +73,7 @@ describe('Chat Formatter', () => {
             },
           ],
           messageId: 'query-msg-1',
-          messageType: 'UPDATE',
+          messageType: MessageType.Update,
           stepId: 'SEARCH',
         },
       ],
@@ -87,7 +88,7 @@ describe('Chat Formatter', () => {
     const mockChatResponse = {
       messages: [
         {
-          author: 'GLEAN_AI',
+          author: Author.GleanAi,
           fragments: [
             {
               structuredResults: [
@@ -107,7 +108,7 @@ describe('Chat Formatter', () => {
             },
           ],
           messageId: 'results-msg-1',
-          messageType: 'UPDATE',
+          messageType: MessageType.Update,
           stepId: 'SEARCH',
         },
       ],
@@ -143,10 +144,10 @@ describe('Chat Formatter', () => {
     const messagesWithoutFragments = {
       messages: [
         {
-          author: 'USER',
+          author: Author.User,
         },
         {
-          author: 'GLEAN_AI',
+          author: Author.GleanAi,
           citations: [
             {
               sourceDocument: {
@@ -155,7 +156,7 @@ describe('Chat Formatter', () => {
               },
             },
           ],
-          messageType: 'CONTENT',
+          messageType: MessageType.Content,
         },
       ],
     };
@@ -170,7 +171,7 @@ describe('Chat Formatter', () => {
     const messagesWithoutCitations = {
       messages: [
         {
-          author: 'USER',
+          author: Author.User,
           fragments: [
             {
               text: 'Hello',
@@ -178,13 +179,13 @@ describe('Chat Formatter', () => {
           ],
         },
         {
-          author: 'GLEAN_AI',
+          author: Author.GleanAi,
           fragments: [
             {
               text: 'Hi there! How can I help you today?',
             },
           ],
-          messageType: 'CONTENT',
+          messageType: MessageType.Content,
         },
       ],
     };
@@ -201,7 +202,7 @@ describe('Chat Formatter', () => {
     const mixedFragmentsMessage = {
       messages: [
         {
-          author: 'GLEAN_AI',
+          author: Author.GleanAi,
           fragments: [
             {
               text: 'Searching for:',
@@ -227,7 +228,7 @@ describe('Chat Formatter', () => {
             },
           ],
           messageId: 'mixed-msg-1',
-          messageType: 'UPDATE',
+          messageType: MessageType.Update,
           stepId: 'SEARCH',
         },
       ],
diff --git a/packages/local-mcp-server/src/test/tools/chat-response-buffer.test.ts b/packages/local-mcp-server/src/test/tools/chat-response-buffer.test.ts
new file mode 100644
index 00000000..0a91dbd1
--- /dev/null
+++ b/packages/local-mcp-server/src/test/tools/chat-response-buffer.test.ts
@@ -0,0 +1,189 @@
+import { describe, it, expect, beforeEach } from 'vitest';
+import { ChatResponseBuffer } from '../../tools/chat-response-buffer.js';
+
+describe('ChatResponseBuffer', () => {
+  let buffer: ChatResponseBuffer;
+
+  beforeEach(() => {
+    buffer = new ChatResponseBuffer();
+  });
+
+  describe('Token Estimation', () => {
+    it('should estimate tokens correctly', async () => {
+      const text = 'Hello world!'; // 12 chars = 4 tokens (3 chars/token)
+      const result = await buffer.processResponse(text);
+      
+      expect(result.metadata).toBeUndefined(); // Should not chunk small text
+      expect(result.content).toBe(text);
+    });
+
+    it('should not chunk responses under token limit', async () => {
+      // 10k chars = ~3.3k tokens, under 15k limit
+      const smallText = 'a'.repeat(10000);
+      const result = await buffer.processResponse(smallText);
+      
+      expect(result.metadata).toBeUndefined();
+      expect(result.content).toBe(smallText);
+    });
+  });
+
+  describe('Text Chunking Logic', () => {
+    it('should chunk large responses', async () => {
+      // 100k chars = ~33k tokens, over 15k limit
+      const largeText = 'This is a test paragraph.\n\n'.repeat(4000);
+      const result = await buffer.processResponse(largeText);
+      
+      expect(result.metadata).toBeDefined();
+      expect(result.metadata!.chunkIndex).toBe(0);
+      expect(result.metadata!.totalChunks).toBeGreaterThan(1);
+      expect(result.metadata!.hasMore).toBe(true);
+      expect(result.metadata!.responseId).toBeTruthy();
+    });
+
+    it('should prefer splitting at paragraph boundaries', async () => {
+      // Create text with manageable paragraph breaks that will trigger chunking
+      const paragraph = 'This is a test paragraph with some content.\n\n';
+      const largeText = paragraph.repeat(2000); // Creates text large enough to chunk
+      
+      const result = await buffer.processResponse(largeText);
+      
+      expect(result.metadata).toBeDefined();
+      // The content should be chunked and may end with paragraph boundary
+      expect(result.content.length).toBeGreaterThan(0);
+      expect(result.content.length).toBeLessThan(largeText.length);
+    });
+
+    it('should fall back to sentence boundaries when paragraphs are too large', async () => {
+      // Create one huge paragraph with sentences
+      const sentence = 'A'.repeat(1000) + '. ';
+      const hugeParagraph = sentence.repeat(100); // 100k+ chars, no paragraph breaks
+      
+      const result = await buffer.processResponse(hugeParagraph);
+      
+      expect(result.metadata).toBeDefined();
+      expect(result.content).toMatch(/\.\s*$/); // Should end at sentence boundary
+    });
+
+    it('should force split when no natural boundaries exist', async () => {
+      // Create text with no natural boundaries
+      const largeText = 'A'.repeat(100000); // 100k chars, no breaks
+      
+      const result = await buffer.processResponse(largeText);
+      
+      expect(result.metadata).toBeDefined();
+      expect(result.content.length).toBeLessThan(largeText.length);
+      expect(result.content.length).toBeGreaterThan(0);
+    });
+  });
+
+  describe('Chunk Storage and Retrieval', () => {
+    it('should store and retrieve chunks correctly', async () => {
+      const largeText = 'Test paragraph.\n\n'.repeat(5000);
+      const firstChunk = await buffer.processResponse(largeText);
+      
+      expect(firstChunk.metadata).toBeDefined();
+      const responseId = firstChunk.metadata!.responseId;
+      
+      // Get second chunk
+      const secondChunk = buffer.getChunk(responseId, 1);
+      
+      expect(secondChunk).toBeDefined();
+      expect(secondChunk!.metadata!.chunkIndex).toBe(1);
+      expect(secondChunk!.metadata!.responseId).toBe(responseId);
+      expect(secondChunk!.content).toBeTruthy();
+    });
+
+    it('should return null for invalid chunk requests', async () => {
+      const invalidChunk = buffer.getChunk('invalid-id', 0);
+      expect(invalidChunk).toBeNull();
+      
+      // Create a response first that will definitely chunk
+      const largeText = 'Test.\n\n'.repeat(10000); // Much larger - 70k chars
+      const result = await buffer.processResponse(largeText);
+      const responseId = result.metadata!.responseId;
+      
+      // Request chunk beyond available range
+      const beyondRange = buffer.getChunk(responseId, 999);
+      expect(beyondRange).toBeNull();
+    });
+
+    it('should handle last chunk correctly', async () => {
+      const largeText = 'Test paragraph.\n\n'.repeat(10000); // Much larger - 160k chars
+      const firstChunk = await buffer.processResponse(largeText);
+      
+      const responseId = firstChunk.metadata!.responseId;
+      const totalChunks = firstChunk.metadata!.totalChunks;
+      
+      // Get last chunk
+      const lastChunk = buffer.getChunk(responseId, totalChunks - 1);
+      
+      expect(lastChunk).toBeDefined();
+      expect(lastChunk!.metadata!.hasMore).toBe(false);
+      expect(lastChunk!.metadata!.chunkIndex).toBe(totalChunks - 1);
+    });
+  });
+
+  describe('Chunk Metadata', () => {
+    it('should provide accurate chunk metadata', async () => {
+      const largeText = 'Test paragraph.\n\n'.repeat(10000); // Much larger - 160k chars  
+      const result = await buffer.processResponse(largeText);
+      
+      expect(result.metadata).toBeDefined();
+      expect(result.metadata!.chunkIndex).toBe(0);
+      expect(result.metadata!.totalChunks).toBeGreaterThan(1);
+      expect(result.metadata!.responseId).toMatch(/^[a-f0-9-]{36}$/); // UUID format
+      expect(result.metadata!.hasMore).toBe(true);
+    });
+
+    it('should correctly identify when no more chunks exist', async () => {
+      const largeText = 'Short text that fits in one chunk.';
+      const result = await buffer.processResponse(largeText);
+      
+      expect(result.metadata).toBeUndefined(); // No chunking needed
+    });
+  });
+
+  describe('Cleanup', () => {
+    it('should allow manual cleanup of stored responses', async () => {
+      const largeText = 'Test.\n\n'.repeat(10000); // Much larger - 70k chars
+      const result = await buffer.processResponse(largeText);
+      const responseId = result.metadata!.responseId;
+      
+      // Should be able to get chunk before cleanup
+      const chunk = buffer.getChunk(responseId, 1);
+      expect(chunk).toBeDefined();
+      
+      // Clean up
+      buffer.cleanup(responseId);
+      
+      // Should not be able to get chunk after cleanup
+      const cleanedChunk = buffer.getChunk(responseId, 1);
+      expect(cleanedChunk).toBeNull();
+    });
+  });
+
+  describe('Edge Cases', () => {
+    it('should handle empty strings', async () => {
+      const result = await buffer.processResponse('');
+      expect(result.content).toBe('');
+      expect(result.metadata).toBeUndefined();
+    });
+
+    it('should handle strings with only whitespace', async () => {
+      const whitespaceText = '   \n\n   \t\t   ';
+      const result = await buffer.processResponse(whitespaceText);
+      expect(result.content).toBe(whitespaceText);
+      expect(result.metadata).toBeUndefined();
+    });
+
+    it('should handle text exactly at the token limit boundary', async () => {
+      // Create text that's close to the limit (15k tokens = 45k chars)
+      const borderlineText = 'a'.repeat(45000);
+      const result = await buffer.processResponse(borderlineText);
+      
+      // Should just fit in one chunk
+      expect(result.metadata).toBeUndefined();
+      expect(result.content).toBe(borderlineText);
+    });
+  });
+});
\ No newline at end of file
diff --git a/packages/local-mcp-server/src/tools/chat.ts b/packages/local-mcp-server/src/tools/chat.ts
index c66ada67..182461c4 100644
--- a/packages/local-mcp-server/src/tools/chat.ts
+++ b/packages/local-mcp-server/src/tools/chat.ts
@@ -3,10 +3,49 @@ import { getClient } from '../common/client.js';
 import {
   ChatRequest,
   ChatRequest$inboundSchema as ChatRequestSchema,
+  ChatResponse,
+  ChatMessage,
+  ChatMessageFragment,
+  ChatMessageCitation,
   MessageType,
 } from '@gleanwork/api-client/models/components';
 import { Author } from '@gleanwork/api-client/models/components';
-import { chatResponseBuffer } from './chat-response-buffer.js';
+import { chatResponseBuffer, ChatChunkMetadata } from './chat-response-buffer.js';
+
+/**
+ * Extended ChatResponse with chunking metadata
+ */
+interface ChunkedChatResponse extends ChatResponse {
+  _formatted?: string;
+  _chunkMetadata?: ChatChunkMetadata;
+}
+
+/**
+ * Chat chunk for continuation responses
+ */
+interface ChatChunk {
+  content: string;
+  metadata: ChatChunkMetadata;
+}
+
+/**
+ * Union type for formattable responses
+ */
+type FormattableResponse = ChunkedChatResponse | ChatChunk;
+
+/**
+ * Type guard to check if response is a ChunkedChatResponse
+ */
+function isChunkedChatResponse(response: FormattableResponse): response is ChunkedChatResponse {
+  return 'messages' in response;
+}
+
+/**
+ * Type guard to check if response is a ChatChunk
+ */
+function isChatChunk(response: FormattableResponse): response is ChatChunk {
+  return 'content' in response && 'metadata' in response;
+}
 
 /**
  * Simplified schema for Glean chat requests designed for LLM interaction
@@ -71,7 +110,7 @@ function convertToAPIChatRequest(input: ToolChatRequest) {
  * @returns The chat response with automatic chunking if needed
  * @throws If the chat request fails
  */
-export async function chat(params: ToolChatRequest) {
+export async function chat(params: ToolChatRequest): Promise<FormattableResponse> {
   // Handle continuation requests
   if (params.continueFrom) {
     const chunk = chatResponseBuffer.getChunk(
@@ -83,7 +122,8 @@ export async function chat(params: ToolChatRequest) {
       throw new Error('Invalid continuation request: chunk not found');
     }
     
-    return chunk;
+    // The chunk from buffer already matches ChatChunk interface
+    return chunk as ChatChunk;
   }
 
   // Normal chat request
@@ -98,11 +138,13 @@ export async function chat(params: ToolChatRequest) {
   const chunked = await chatResponseBuffer.processResponse(formattedResponse);
   
   // Return the response with chunk metadata if applicable
-  return {
+  const result: ChunkedChatResponse = {
     ...response,
     _formatted: chunked.content,
     _chunkMetadata: chunked.metadata,
   };
+  
+  return result;
 }
 
 /**
@@ -111,7 +153,7 @@ export async function chat(params: ToolChatRequest) {
  * @param chatResponse The raw chat response from Glean API
  * @returns Formatted chat response as text
  */
-export function formatResponse(chatResponse: any): string {
+export function formatResponse(chatResponse: ChatResponse): string {
   if (
     !chatResponse ||
     !chatResponse.messages ||
@@ -122,14 +164,14 @@ export function formatResponse(chatResponse: any): string {
   }
 
   const formattedMessages = chatResponse.messages
-    .map((message: any) => {
+    .map((message: ChatMessage) => {
       const author = message.author || 'Unknown';
 
       let messageText = '';
 
       if (message.fragments && Array.isArray(message.fragments)) {
         messageText = message.fragments
-          .map((fragment: any) => {
+          .map((fragment: ChatMessageFragment) => {
             if (fragment.text) {
               return fragment.text;
             } else if (fragment.querySuggestion) {
@@ -139,7 +181,7 @@ export function formatResponse(chatResponse: any): string {
               Array.isArray(fragment.structuredResults)
             ) {
               return fragment.structuredResults
-                .map((result: any) => {
+                .map((result) => {
                   if (result.document) {
                     const doc = result.document;
 
@@ -169,7 +211,7 @@ export function formatResponse(chatResponse: any): string {
         citationsText =
           '\n\nSources:\n' +
           message.citations
-            .map((citation: any, index: number) => {
+            .map((citation: ChatMessageCitation, index: number) => {
               const sourceDoc = citation.sourceDocument || {};
               const title = sourceDoc.title || 'Unknown source';
               const url = sourceDoc.url || '';
@@ -181,7 +223,7 @@ export function formatResponse(chatResponse: any): string {
       const messageType = message.messageType
         ? ` (${message.messageType})`
         : '';
-      const stepId = message.stepId ? ` [Step: ${message.stepId}]` : '';
+      const stepId = (message as any).stepId ? ` [Step: ${(message as any).stepId}]` : '';
 
       return `${author}${messageType}${stepId}: ${messageText}${citationsText}`;
     })
@@ -196,9 +238,9 @@ export function formatResponse(chatResponse: any): string {
  * @param response The response object with potential chunk metadata
  * @returns Formatted response with chunk information if applicable
  */
-export function formatChunkedResponse(response: any): string {
+export function formatChunkedResponse(response: FormattableResponse): string {
   // Handle continuation chunks
-  if (response.content && response.metadata) {
+  if (isChatChunk(response)) {
     const { chunkIndex, totalChunks, hasMore } = response.metadata;
     let result = response.content;
     
@@ -211,20 +253,25 @@ export function formatChunkedResponse(response: any): string {
   }
   
   // Handle initial chunked response
-  if (response._formatted) {
-    let result = response._formatted;
-    
-    if (response._chunkMetadata) {
-      const { totalChunks, hasMore, responseId } = response._chunkMetadata;
-      if (hasMore) {
-        result += `\n\n---\n[Chunk 1 of ${totalChunks}] `;
-        result += `To continue, use continueFrom: { responseId: "${responseId}", chunkIndex: 1 }`;
+  if (isChunkedChatResponse(response)) {
+    if (response._formatted) {
+      let result = response._formatted;
+      
+      if (response._chunkMetadata) {
+        const { totalChunks, hasMore, responseId } = response._chunkMetadata;
+        if (hasMore) {
+          result += `\n\n---\n[Chunk 1 of ${totalChunks}] `;
+          result += `To continue, use continueFrom: { responseId: "${responseId}", chunkIndex: 1 }`;
+        }
       }
+      
+      return result;
     }
     
-    return result;
+    // Fall back to standard formatting
+    return formatResponse(response);
   }
   
-  // Fall back to standard formatting
-  return formatResponse(response);
+  // This should never happen with proper types
+  throw new Error('Unknown response type');
 }
diff --git a/packages/mcp-server-utils/src/config/index.ts b/packages/mcp-server-utils/src/config/index.ts
index e21b4893..15e5c152 100644
--- a/packages/mcp-server-utils/src/config/index.ts
+++ b/packages/mcp-server-utils/src/config/index.ts
@@ -135,7 +135,7 @@ export async function getConfig<T extends GetConfigOptions = GetConfigOptions>(
 
 function getLocalConfig(): GleanConfig {
   const instance = process.env.GLEAN_INSTANCE || process.env.GLEAN_SUBDOMAIN;
-  const baseUrl = process.env.GLEAN_BASE_URL || process.env.GLEAN_SERVER_INSTANCE;
+  const baseUrl = process.env.GLEAN_BASE_URL || process.env.GLEAN_SERVER_URL;
   const token = process.env.GLEAN_API_TOKEN;
   const actAs = process.env.GLEAN_ACT_AS;
   const issuer = process.env.GLEAN_OAUTH_ISSUER;

From c3c18dee3d7d1879ffa71fd93c08464b2f49ac21 Mon Sep 17 00:00:00 2001
From: Aaron Bockelie <aaron@bockelie.com>
Date: Mon, 21 Jul 2025 12:28:24 -0500
Subject: [PATCH 5/6] fix: increase timeout for heavy force-split test

The test that creates 100k characters with no natural boundaries
was timing out in CI (5 second default). Increased to 10 seconds
to handle the heavy processing load.

Co-Authored-By: Claude <noreply@anthropic.com>
---
 .../src/test/tools/chat-response-buffer.test.ts                 | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/packages/local-mcp-server/src/test/tools/chat-response-buffer.test.ts b/packages/local-mcp-server/src/test/tools/chat-response-buffer.test.ts
index 0a91dbd1..a38471ea 100644
--- a/packages/local-mcp-server/src/test/tools/chat-response-buffer.test.ts
+++ b/packages/local-mcp-server/src/test/tools/chat-response-buffer.test.ts
@@ -73,7 +73,7 @@ describe('ChatResponseBuffer', () => {
       expect(result.metadata).toBeDefined();
       expect(result.content.length).toBeLessThan(largeText.length);
       expect(result.content.length).toBeGreaterThan(0);
-    });
+    }, 10000); // Increase timeout to 10 seconds for this heavy test
   });
 
   describe('Chunk Storage and Retrieval', () => {

From a328c8285bc9743c5a55d21da44201297aaf86eb Mon Sep 17 00:00:00 2001
From: Aaron Bockelie <aaron@bockelie.com>
Date: Mon, 21 Jul 2025 12:33:21 -0500
Subject: [PATCH 6/6] fix: reduce force-split test size to prevent CI timeouts

Reduced test from 100k to 50k characters to test force split logic
without causing timeouts in slower CI environments. The test still
validates the force split behavior with 2 chunks.

Co-Authored-By: Claude <noreply@anthropic.com>
---
 .../src/test/tools/chat-response-buffer.test.ts             | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/packages/local-mcp-server/src/test/tools/chat-response-buffer.test.ts b/packages/local-mcp-server/src/test/tools/chat-response-buffer.test.ts
index a38471ea..610a339b 100644
--- a/packages/local-mcp-server/src/test/tools/chat-response-buffer.test.ts
+++ b/packages/local-mcp-server/src/test/tools/chat-response-buffer.test.ts
@@ -66,14 +66,16 @@ describe('ChatResponseBuffer', () => {
 
     it('should force split when no natural boundaries exist', async () => {
       // Create text with no natural boundaries
-      const largeText = 'A'.repeat(100000); // 100k chars, no breaks
+      // Use 50k chars (just over the 45k chunk limit) to test force split
+      const largeText = 'A'.repeat(50000); 
       
       const result = await buffer.processResponse(largeText);
       
       expect(result.metadata).toBeDefined();
       expect(result.content.length).toBeLessThan(largeText.length);
       expect(result.content.length).toBeGreaterThan(0);
-    }, 10000); // Increase timeout to 10 seconds for this heavy test
+      expect(result.metadata!.totalChunks).toBe(2); // Should split into 2 chunks
+    });
   });
 
   describe('Chunk Storage and Retrieval', () => {