From 236de38f4344d01e1706e13bf22313ca54a407e1 Mon Sep 17 00:00:00 2001 From: Claude Date: Fri, 30 Jan 2026 18:01:23 +0000 Subject: [PATCH 1/3] fix(pdf-server): handle HTTP 200 fallback in remote range requests When a remote server ignores the Range header and returns HTTP 200 with the full body, readPdfRange previously passed the entire file (potentially 10MB+) through as a single chunk. This bypassed the 512KB limit because: 1. The error check `!response.ok && status !== 206` short-circuits on 200 (ok is true), so the full body is read via arrayBuffer() 2. No Content-Range header on a 200 response leaves totalBytes at 0 3. hasMore becomes `offset + fullSize < 0` = false, so the client stops after one oversized message Fix: detect HTTP 200, cache the full body in memory (to avoid re-downloading on every subsequent chunk request), then slice to the requested range. The 512KB per-message limit is now enforced for all remote URLs regardless of Range request support. --- examples/pdf-server/server.ts | 36 ++++++++++++++++++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) diff --git a/examples/pdf-server/server.ts b/examples/pdf-server/server.ts index 40de9aff..7225bdf3 100644 --- a/examples/pdf-server/server.ts +++ b/examples/pdf-server/server.ts @@ -129,6 +129,26 @@ export function validateUrl(url: string): { valid: boolean; error?: string } { // Range Request Helpers // ============================================================================= +/** + * Cache for remote PDFs from servers that don't support Range requests. + * When a server returns HTTP 200 (full body) instead of 206 (partial), + * we store the full response here so subsequent chunk requests don't + * re-download the entire file. + */ +const remoteFullBodyCache = new Map(); + +/** Slice a cached or freshly-fetched full body to the requested range. */ +function sliceToChunk( + fullData: Uint8Array, + offset: number, + clampedByteCount: number, +): { data: Uint8Array; totalBytes: number } { + const totalBytes = fullData.length; + const start = Math.min(offset, totalBytes); + const end = Math.min(start + clampedByteCount, totalBytes); + return { data: fullData.slice(start, end), totalBytes }; +} + export async function readPdfRange( url: string, offset: number, @@ -162,6 +182,12 @@ export async function readPdfRange( return { data: new Uint8Array(buffer), totalBytes }; } + // Serve from cache if we previously downloaded the full body + const cached = remoteFullBodyCache.get(normalized); + if (cached) { + return sliceToChunk(cached, offset, clampedByteCount); + } + // Remote URL - Range request const response = await fetch(normalized, { headers: { @@ -175,7 +201,15 @@ export async function readPdfRange( ); } - // Parse total size from Content-Range header + // HTTP 200 means the server ignored our Range header and sent the full body. + // Cache it so subsequent chunk requests don't re-download, then slice. + if (response.status === 200) { + const fullData = new Uint8Array(await response.arrayBuffer()); + remoteFullBodyCache.set(normalized, fullData); + return sliceToChunk(fullData, offset, clampedByteCount); + } + + // HTTP 206 Partial Content — parse total size from Content-Range header const contentRange = response.headers.get("content-range"); let totalBytes = 0; if (contentRange) { From 6050371adb33ce137f0c5c22807bfdce6fbc3070 Mon Sep 17 00:00:00 2001 From: Olivier Chafik Date: Fri, 30 Jan 2026 18:28:18 +0000 Subject: [PATCH 2/3] Add session-based PDF caching with timeout cleanup and size limits - Add dual timeout strategy for cache cleanup: - 10s inactivity timeout (resets on each access) - 60s max lifetime (absolute timeout from creation) - Add 50MB max size limit with both Content-Length and actual size checks - Add unit tests for caching behavior including: - Cache on HTTP 200 response (no range support) - No cache on HTTP 206 response (range supported) - Slice cached data for subsequent range requests - Reject PDFs exceeding size limits - Export getCacheSize() and clearCache() for testing --- examples/pdf-server/server.test.ts | 168 +++++++++++++++++++++++++++++ examples/pdf-server/server.ts | 108 ++++++++++++++++++- 2 files changed, 273 insertions(+), 3 deletions(-) create mode 100644 examples/pdf-server/server.test.ts diff --git a/examples/pdf-server/server.test.ts b/examples/pdf-server/server.test.ts new file mode 100644 index 00000000..cdde23fd --- /dev/null +++ b/examples/pdf-server/server.test.ts @@ -0,0 +1,168 @@ +import { describe, it, expect, beforeEach, afterEach, spyOn } from "bun:test"; +import { + getCacheSize, + clearCache, + CACHE_INACTIVITY_TIMEOUT_MS, + CACHE_MAX_LIFETIME_MS, + CACHE_MAX_PDF_SIZE_BYTES, + readPdfRange, +} from "./server"; + +describe("PDF Cache with Timeouts", () => { + beforeEach(() => { + clearCache(); + }); + + afterEach(() => { + clearCache(); + }); + + describe("cache configuration", () => { + it("should have 10 second inactivity timeout", () => { + expect(CACHE_INACTIVITY_TIMEOUT_MS).toBe(10_000); + }); + + it("should have 60 second max lifetime timeout", () => { + expect(CACHE_MAX_LIFETIME_MS).toBe(60_000); + }); + + it("should have 50MB max PDF size limit", () => { + expect(CACHE_MAX_PDF_SIZE_BYTES).toBe(50 * 1024 * 1024); + }); + }); + + describe("cache management", () => { + it("should start with empty cache", () => { + expect(getCacheSize()).toBe(0); + }); + + it("should clear all entries", () => { + clearCache(); + expect(getCacheSize()).toBe(0); + }); + }); + + describe("readPdfRange caching behavior", () => { + const testUrl = "https://arxiv.org/pdf/test-pdf"; + const testData = new Uint8Array([0x25, 0x50, 0x44, 0x46]); // %PDF header + + it("should cache full body when server returns HTTP 200", async () => { + // Mock fetch to return HTTP 200 (full body, no range support) + const mockFetch = spyOn(globalThis, "fetch").mockResolvedValueOnce( + new Response(testData, { + status: 200, + headers: { "Content-Type": "application/pdf" }, + }), + ); + + try { + // First request - should fetch and cache + const result1 = await readPdfRange(testUrl, 0, 1024); + expect(result1.data).toEqual(testData); + expect(result1.totalBytes).toBe(testData.length); + expect(getCacheSize()).toBe(1); + + // Second request - should serve from cache (no new fetch) + const result2 = await readPdfRange(testUrl, 0, 1024); + expect(result2.data).toEqual(testData); + expect(mockFetch).toHaveBeenCalledTimes(1); // Only one fetch call + } finally { + mockFetch.mockRestore(); + } + }); + + it("should not cache when server returns HTTP 206 (range supported)", async () => { + const chunkData = new Uint8Array([0x25, 0x50]); // First 2 bytes + + const mockFetch = spyOn(globalThis, "fetch").mockResolvedValue( + new Response(chunkData, { + status: 206, + headers: { + "Content-Type": "application/pdf", + "Content-Range": "bytes 0-1/100", + }, + }), + ); + + try { + await readPdfRange(testUrl, 0, 2); + expect(getCacheSize()).toBe(0); // Not cached when 206 + } finally { + mockFetch.mockRestore(); + } + }); + + it("should slice cached data for subsequent range requests", async () => { + const fullData = new Uint8Array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]); + + const mockFetch = spyOn(globalThis, "fetch").mockResolvedValueOnce( + new Response(fullData, { status: 200 }), + ); + + try { + // First request caches full body + await readPdfRange(testUrl, 0, 1024); + expect(getCacheSize()).toBe(1); + + // Subsequent request gets slice from cache + const result = await readPdfRange(testUrl, 2, 3); + expect(result.data).toEqual(new Uint8Array([3, 4, 5])); + expect(result.totalBytes).toBe(10); + expect(mockFetch).toHaveBeenCalledTimes(1); + } finally { + mockFetch.mockRestore(); + } + }); + + it("should reject PDFs larger than max size limit", async () => { + const testUrl = "https://arxiv.org/pdf/huge-pdf"; + // Create data larger than the limit + const hugeData = new Uint8Array(CACHE_MAX_PDF_SIZE_BYTES + 1); + + const mockFetch = spyOn(globalThis, "fetch").mockResolvedValueOnce( + new Response(hugeData, { + status: 200, + headers: { "Content-Type": "application/pdf" }, + }), + ); + + try { + await expect(readPdfRange(testUrl, 0, 1024)).rejects.toThrow( + /PDF too large to cache/, + ); + expect(getCacheSize()).toBe(0); // Should not be cached + } finally { + mockFetch.mockRestore(); + } + }); + + it("should reject when Content-Length header exceeds limit", async () => { + const testUrl = "https://arxiv.org/pdf/huge-pdf-header"; + const smallData = new Uint8Array([1, 2, 3, 4]); + + const mockFetch = spyOn(globalThis, "fetch").mockResolvedValueOnce( + new Response(smallData, { + status: 200, + headers: { + "Content-Type": "application/pdf", + "Content-Length": String(CACHE_MAX_PDF_SIZE_BYTES + 1), + }, + }), + ); + + try { + await expect(readPdfRange(testUrl, 0, 1024)).rejects.toThrow( + /PDF too large to cache/, + ); + expect(getCacheSize()).toBe(0); + } finally { + mockFetch.mockRestore(); + } + }); + }); + + // Note: Timer-based tests (inactivity/max lifetime) would require + // using fake timers which can be complex with async code. + // The timeout behavior is straightforward and can be verified + // through manual testing or E2E tests. +}); diff --git a/examples/pdf-server/server.ts b/examples/pdf-server/server.ts index 7225bdf3..41c9d841 100644 --- a/examples/pdf-server/server.ts +++ b/examples/pdf-server/server.ts @@ -33,6 +33,15 @@ export const DEFAULT_PDF = "https://arxiv.org/pdf/1706.03762"; // Attention Is A export const MAX_CHUNK_BYTES = 512 * 1024; // 512KB max per request export const RESOURCE_URI = "ui://pdf-viewer/mcp-app.html"; +/** Inactivity timeout: clear cache entry if not accessed for this long */ +export const CACHE_INACTIVITY_TIMEOUT_MS = 10_000; // 10 seconds + +/** Max lifetime: clear cache entry after this time regardless of access */ +export const CACHE_MAX_LIFETIME_MS = 60_000; // 60 seconds + +/** Max size for cached PDFs (defensive limit to prevent memory exhaustion) */ +export const CACHE_MAX_PDF_SIZE_BYTES = 50 * 1024 * 1024; // 50MB + /** Allowed remote origins (security allowlist) */ export const allowedRemoteOrigins = new Set([ "https://agrirxiv.org", @@ -129,13 +138,87 @@ export function validateUrl(url: string): { valid: boolean; error?: string } { // Range Request Helpers // ============================================================================= +/** + * Cache entry for remote PDFs from servers that don't support Range requests. + * Tracks both inactivity and max lifetime for automatic cleanup. + */ +interface CacheEntry { + /** The cached PDF data */ + data: Uint8Array; + /** Timestamp when entry was created (for max lifetime) */ + createdAt: number; + /** Timer that fires after CACHE_INACTIVITY_TIMEOUT_MS of no access */ + inactivityTimer: ReturnType; + /** Timer that fires after CACHE_MAX_LIFETIME_MS from creation */ + maxLifetimeTimer: ReturnType; +} + /** * Cache for remote PDFs from servers that don't support Range requests. * When a server returns HTTP 200 (full body) instead of 206 (partial), * we store the full response here so subsequent chunk requests don't * re-download the entire file. + * + * Entries are automatically cleared after: + * - 10 seconds of inactivity (no access) + * - 60 seconds max lifetime (regardless of access) */ -const remoteFullBodyCache = new Map(); +const remoteFullBodyCache = new Map(); + +/** Delete a cache entry and clear its timers */ +function deleteCacheEntry(url: string): void { + const entry = remoteFullBodyCache.get(url); + if (entry) { + clearTimeout(entry.inactivityTimer); + clearTimeout(entry.maxLifetimeTimer); + remoteFullBodyCache.delete(url); + } +} + +/** Get cached data and refresh the inactivity timer */ +function getCacheEntry(url: string): Uint8Array | undefined { + const entry = remoteFullBodyCache.get(url); + if (!entry) return undefined; + + // Refresh inactivity timer on access + clearTimeout(entry.inactivityTimer); + entry.inactivityTimer = setTimeout(() => { + deleteCacheEntry(url); + }, CACHE_INACTIVITY_TIMEOUT_MS); + + return entry.data; +} + +/** Add data to cache with both inactivity and max lifetime timers */ +function setCacheEntry(url: string, data: Uint8Array): void { + // Clear any existing entry first + deleteCacheEntry(url); + + const entry: CacheEntry = { + data, + createdAt: Date.now(), + inactivityTimer: setTimeout(() => { + deleteCacheEntry(url); + }, CACHE_INACTIVITY_TIMEOUT_MS), + maxLifetimeTimer: setTimeout(() => { + deleteCacheEntry(url); + }, CACHE_MAX_LIFETIME_MS), + }; + + remoteFullBodyCache.set(url, entry); +} + +/** Get current cache size (for testing) */ +export function getCacheSize(): number { + return remoteFullBodyCache.size; +} + +/** Clear all cache entries (for testing) */ +export function clearCache(): void { + for (const url of [...remoteFullBodyCache.keys()]) { + deleteCacheEntry(url); + } +} /** Slice a cached or freshly-fetched full body to the requested range. */ function sliceToChunk( @@ -183,7 +266,7 @@ export async function readPdfRange( } // Serve from cache if we previously downloaded the full body - const cached = remoteFullBodyCache.get(normalized); + const cached = getCacheEntry(normalized); if (cached) { return sliceToChunk(cached, offset, clampedByteCount); } @@ -204,8 +287,27 @@ export async function readPdfRange( // HTTP 200 means the server ignored our Range header and sent the full body. // Cache it so subsequent chunk requests don't re-download, then slice. if (response.status === 200) { + // Check Content-Length header first as a preliminary size check + const contentLength = response.headers.get("content-length"); + if (contentLength) { + const declaredSize = parseInt(contentLength, 10); + if (declaredSize > CACHE_MAX_PDF_SIZE_BYTES) { + throw new Error( + `PDF too large to cache: ${declaredSize} bytes exceeds ${CACHE_MAX_PDF_SIZE_BYTES} byte limit`, + ); + } + } + const fullData = new Uint8Array(await response.arrayBuffer()); - remoteFullBodyCache.set(normalized, fullData); + + // Check actual size (may differ from Content-Length) + if (fullData.length > CACHE_MAX_PDF_SIZE_BYTES) { + throw new Error( + `PDF too large to cache: ${fullData.length} bytes exceeds ${CACHE_MAX_PDF_SIZE_BYTES} byte limit`, + ); + } + + setCacheEntry(normalized, fullData); return sliceToChunk(fullData, offset, clampedByteCount); } From 7302d4e12a01818dfb64aef8e2baad0239685732 Mon Sep 17 00:00:00 2001 From: Olivier Chafik Date: Fri, 30 Jan 2026 20:14:46 +0000 Subject: [PATCH 3/3] refactor(pdf-server): make PDF cache session-local via factory pattern - Replace module-level global cache with createPdfCache() factory - Each server instance now gets its own isolated cache - Export PdfCache interface for type-safe usage - Update tests to use per-test cache instances - Add test verifying cache isolation between sessions --- examples/pdf-server/server.test.ts | 56 +++--- examples/pdf-server/server.ts | 296 +++++++++++++++-------------- 2 files changed, 192 insertions(+), 160 deletions(-) diff --git a/examples/pdf-server/server.test.ts b/examples/pdf-server/server.test.ts index cdde23fd..92ef4535 100644 --- a/examples/pdf-server/server.test.ts +++ b/examples/pdf-server/server.test.ts @@ -1,20 +1,22 @@ import { describe, it, expect, beforeEach, afterEach, spyOn } from "bun:test"; import { - getCacheSize, - clearCache, + createPdfCache, CACHE_INACTIVITY_TIMEOUT_MS, CACHE_MAX_LIFETIME_MS, CACHE_MAX_PDF_SIZE_BYTES, - readPdfRange, + type PdfCache, } from "./server"; describe("PDF Cache with Timeouts", () => { + let pdfCache: PdfCache; + beforeEach(() => { - clearCache(); + // Each test gets its own session-local cache + pdfCache = createPdfCache(); }); afterEach(() => { - clearCache(); + pdfCache.clearCache(); }); describe("cache configuration", () => { @@ -33,12 +35,22 @@ describe("PDF Cache with Timeouts", () => { describe("cache management", () => { it("should start with empty cache", () => { - expect(getCacheSize()).toBe(0); + expect(pdfCache.getCacheSize()).toBe(0); }); it("should clear all entries", () => { - clearCache(); - expect(getCacheSize()).toBe(0); + pdfCache.clearCache(); + expect(pdfCache.getCacheSize()).toBe(0); + }); + + it("should isolate caches between sessions", () => { + // Create two independent cache instances + const cache1 = createPdfCache(); + const cache2 = createPdfCache(); + + // They should be independent (both start empty) + expect(cache1.getCacheSize()).toBe(0); + expect(cache2.getCacheSize()).toBe(0); }); }); @@ -57,13 +69,13 @@ describe("PDF Cache with Timeouts", () => { try { // First request - should fetch and cache - const result1 = await readPdfRange(testUrl, 0, 1024); + const result1 = await pdfCache.readPdfRange(testUrl, 0, 1024); expect(result1.data).toEqual(testData); expect(result1.totalBytes).toBe(testData.length); - expect(getCacheSize()).toBe(1); + expect(pdfCache.getCacheSize()).toBe(1); // Second request - should serve from cache (no new fetch) - const result2 = await readPdfRange(testUrl, 0, 1024); + const result2 = await pdfCache.readPdfRange(testUrl, 0, 1024); expect(result2.data).toEqual(testData); expect(mockFetch).toHaveBeenCalledTimes(1); // Only one fetch call } finally { @@ -85,8 +97,8 @@ describe("PDF Cache with Timeouts", () => { ); try { - await readPdfRange(testUrl, 0, 2); - expect(getCacheSize()).toBe(0); // Not cached when 206 + await pdfCache.readPdfRange(testUrl, 0, 2); + expect(pdfCache.getCacheSize()).toBe(0); // Not cached when 206 } finally { mockFetch.mockRestore(); } @@ -101,11 +113,11 @@ describe("PDF Cache with Timeouts", () => { try { // First request caches full body - await readPdfRange(testUrl, 0, 1024); - expect(getCacheSize()).toBe(1); + await pdfCache.readPdfRange(testUrl, 0, 1024); + expect(pdfCache.getCacheSize()).toBe(1); // Subsequent request gets slice from cache - const result = await readPdfRange(testUrl, 2, 3); + const result = await pdfCache.readPdfRange(testUrl, 2, 3); expect(result.data).toEqual(new Uint8Array([3, 4, 5])); expect(result.totalBytes).toBe(10); expect(mockFetch).toHaveBeenCalledTimes(1); @@ -115,7 +127,7 @@ describe("PDF Cache with Timeouts", () => { }); it("should reject PDFs larger than max size limit", async () => { - const testUrl = "https://arxiv.org/pdf/huge-pdf"; + const hugeUrl = "https://arxiv.org/pdf/huge-pdf"; // Create data larger than the limit const hugeData = new Uint8Array(CACHE_MAX_PDF_SIZE_BYTES + 1); @@ -127,17 +139,17 @@ describe("PDF Cache with Timeouts", () => { ); try { - await expect(readPdfRange(testUrl, 0, 1024)).rejects.toThrow( + await expect(pdfCache.readPdfRange(hugeUrl, 0, 1024)).rejects.toThrow( /PDF too large to cache/, ); - expect(getCacheSize()).toBe(0); // Should not be cached + expect(pdfCache.getCacheSize()).toBe(0); // Should not be cached } finally { mockFetch.mockRestore(); } }); it("should reject when Content-Length header exceeds limit", async () => { - const testUrl = "https://arxiv.org/pdf/huge-pdf-header"; + const headerUrl = "https://arxiv.org/pdf/huge-pdf-header"; const smallData = new Uint8Array([1, 2, 3, 4]); const mockFetch = spyOn(globalThis, "fetch").mockResolvedValueOnce( @@ -151,10 +163,10 @@ describe("PDF Cache with Timeouts", () => { ); try { - await expect(readPdfRange(testUrl, 0, 1024)).rejects.toThrow( + await expect(pdfCache.readPdfRange(headerUrl, 0, 1024)).rejects.toThrow( /PDF too large to cache/, ); - expect(getCacheSize()).toBe(0); + expect(pdfCache.getCacheSize()).toBe(0); } finally { mockFetch.mockRestore(); } diff --git a/examples/pdf-server/server.ts b/examples/pdf-server/server.ts index 41c9d841..46f54036 100644 --- a/examples/pdf-server/server.ts +++ b/examples/pdf-server/server.ts @@ -135,7 +135,7 @@ export function validateUrl(url: string): { valid: boolean; error?: string } { } // ============================================================================= -// Range Request Helpers +// Session-Local PDF Cache // ============================================================================= /** @@ -154,175 +154,192 @@ interface CacheEntry { } /** - * Cache for remote PDFs from servers that don't support Range requests. - * When a server returns HTTP 200 (full body) instead of 206 (partial), - * we store the full response here so subsequent chunk requests don't - * re-download the entire file. + * Session-local PDF cache utilities. + * Each call to createPdfCache() creates an independent cache instance. + */ +export interface PdfCache { + /** Read a range of bytes from a PDF, using cache for servers without Range support */ + readPdfRange( + url: string, + offset: number, + byteCount: number, + ): Promise<{ data: Uint8Array; totalBytes: number }>; + /** Get current number of cached entries */ + getCacheSize(): number; + /** Clear all cached entries and their timers */ + clearCache(): void; +} + +/** + * Creates a session-local PDF cache with automatic timeout-based cleanup. + * + * When a remote server returns HTTP 200 (full body) instead of 206 (partial), + * the full response is cached so subsequent chunk requests don't re-download. * * Entries are automatically cleared after: - * - 10 seconds of inactivity (no access) - * - 60 seconds max lifetime (regardless of access) + * - CACHE_INACTIVITY_TIMEOUT_MS of no access (resets on each access) + * - CACHE_MAX_LIFETIME_MS from creation (absolute timeout) */ -const remoteFullBodyCache = new Map(); - -/** Delete a cache entry and clear its timers */ -function deleteCacheEntry(url: string): void { - const entry = remoteFullBodyCache.get(url); - if (entry) { - clearTimeout(entry.inactivityTimer); - clearTimeout(entry.maxLifetimeTimer); - remoteFullBodyCache.delete(url); +export function createPdfCache(): PdfCache { + const cache = new Map(); + + /** Delete a cache entry and clear its timers */ + function deleteCacheEntry(url: string): void { + const entry = cache.get(url); + if (entry) { + clearTimeout(entry.inactivityTimer); + clearTimeout(entry.maxLifetimeTimer); + cache.delete(url); + } } -} - -/** Get cached data and refresh the inactivity timer */ -function getCacheEntry(url: string): Uint8Array | undefined { - const entry = remoteFullBodyCache.get(url); - if (!entry) return undefined; - - // Refresh inactivity timer on access - clearTimeout(entry.inactivityTimer); - entry.inactivityTimer = setTimeout(() => { - deleteCacheEntry(url); - }, CACHE_INACTIVITY_TIMEOUT_MS); - - return entry.data; -} -/** Add data to cache with both inactivity and max lifetime timers */ -function setCacheEntry(url: string, data: Uint8Array): void { - // Clear any existing entry first - deleteCacheEntry(url); + /** Get cached data and refresh the inactivity timer */ + function getCacheEntry(url: string): Uint8Array | undefined { + const entry = cache.get(url); + if (!entry) return undefined; - const entry: CacheEntry = { - data, - createdAt: Date.now(), - inactivityTimer: setTimeout(() => { - deleteCacheEntry(url); - }, CACHE_INACTIVITY_TIMEOUT_MS), - maxLifetimeTimer: setTimeout(() => { + // Refresh inactivity timer on access + clearTimeout(entry.inactivityTimer); + entry.inactivityTimer = setTimeout(() => { deleteCacheEntry(url); - }, CACHE_MAX_LIFETIME_MS), - }; - - remoteFullBodyCache.set(url, entry); -} - -/** Get current cache size (for testing) */ -export function getCacheSize(): number { - return remoteFullBodyCache.size; -} + }, CACHE_INACTIVITY_TIMEOUT_MS); -/** Clear all cache entries (for testing) */ -export function clearCache(): void { - for (const url of [...remoteFullBodyCache.keys()]) { - deleteCacheEntry(url); + return entry.data; } -} - -/** Slice a cached or freshly-fetched full body to the requested range. */ -function sliceToChunk( - fullData: Uint8Array, - offset: number, - clampedByteCount: number, -): { data: Uint8Array; totalBytes: number } { - const totalBytes = fullData.length; - const start = Math.min(offset, totalBytes); - const end = Math.min(start + clampedByteCount, totalBytes); - return { data: fullData.slice(start, end), totalBytes }; -} -export async function readPdfRange( - url: string, - offset: number, - byteCount: number, -): Promise<{ data: Uint8Array; totalBytes: number }> { - const normalized = isArxivUrl(url) ? normalizeArxivUrl(url) : url; - const clampedByteCount = Math.min(byteCount, MAX_CHUNK_BYTES); + /** Add data to cache with both inactivity and max lifetime timers */ + function setCacheEntry(url: string, data: Uint8Array): void { + // Clear any existing entry first + deleteCacheEntry(url); - if (isFileUrl(normalized)) { - const filePath = fileUrlToPath(normalized); - const stats = await fs.promises.stat(filePath); - const totalBytes = stats.size; + const entry: CacheEntry = { + data, + createdAt: Date.now(), + inactivityTimer: setTimeout(() => { + deleteCacheEntry(url); + }, CACHE_INACTIVITY_TIMEOUT_MS), + maxLifetimeTimer: setTimeout(() => { + deleteCacheEntry(url); + }, CACHE_MAX_LIFETIME_MS), + }; + + cache.set(url, entry); + } - // Clamp to file bounds + /** Slice a cached or freshly-fetched full body to the requested range. */ + function sliceToChunk( + fullData: Uint8Array, + offset: number, + clampedByteCount: number, + ): { data: Uint8Array; totalBytes: number } { + const totalBytes = fullData.length; const start = Math.min(offset, totalBytes); const end = Math.min(start + clampedByteCount, totalBytes); + return { data: fullData.slice(start, end), totalBytes }; + } - if (start >= totalBytes) { - return { data: new Uint8Array(0), totalBytes }; + async function readPdfRange( + url: string, + offset: number, + byteCount: number, + ): Promise<{ data: Uint8Array; totalBytes: number }> { + const normalized = isArxivUrl(url) ? normalizeArxivUrl(url) : url; + const clampedByteCount = Math.min(byteCount, MAX_CHUNK_BYTES); + + if (isFileUrl(normalized)) { + const filePath = fileUrlToPath(normalized); + const stats = await fs.promises.stat(filePath); + const totalBytes = stats.size; + + // Clamp to file bounds + const start = Math.min(offset, totalBytes); + const end = Math.min(start + clampedByteCount, totalBytes); + + if (start >= totalBytes) { + return { data: new Uint8Array(0), totalBytes }; + } + + // Read range from local file + const buffer = Buffer.alloc(end - start); + const fd = await fs.promises.open(filePath, "r"); + try { + await fd.read(buffer, 0, end - start, start); + } finally { + await fd.close(); + } + + return { data: new Uint8Array(buffer), totalBytes }; } - // Read range from local file - const buffer = Buffer.alloc(end - start); - const fd = await fs.promises.open(filePath, "r"); - try { - await fd.read(buffer, 0, end - start, start); - } finally { - await fd.close(); + // Serve from cache if we previously downloaded the full body + const cached = getCacheEntry(normalized); + if (cached) { + return sliceToChunk(cached, offset, clampedByteCount); } - return { data: new Uint8Array(buffer), totalBytes }; - } + // Remote URL - Range request + const response = await fetch(normalized, { + headers: { + Range: `bytes=${offset}-${offset + clampedByteCount - 1}`, + }, + }); - // Serve from cache if we previously downloaded the full body - const cached = getCacheEntry(normalized); - if (cached) { - return sliceToChunk(cached, offset, clampedByteCount); - } + if (!response.ok && response.status !== 206) { + throw new Error( + `Range request failed: ${response.status} ${response.statusText}`, + ); + } - // Remote URL - Range request - const response = await fetch(normalized, { - headers: { - Range: `bytes=${offset}-${offset + clampedByteCount - 1}`, - }, - }); + // HTTP 200 means the server ignored our Range header and sent the full body. + // Cache it so subsequent chunk requests don't re-download, then slice. + if (response.status === 200) { + // Check Content-Length header first as a preliminary size check + const contentLength = response.headers.get("content-length"); + if (contentLength) { + const declaredSize = parseInt(contentLength, 10); + if (declaredSize > CACHE_MAX_PDF_SIZE_BYTES) { + throw new Error( + `PDF too large to cache: ${declaredSize} bytes exceeds ${CACHE_MAX_PDF_SIZE_BYTES} byte limit`, + ); + } + } - if (!response.ok && response.status !== 206) { - throw new Error( - `Range request failed: ${response.status} ${response.statusText}`, - ); - } + const fullData = new Uint8Array(await response.arrayBuffer()); - // HTTP 200 means the server ignored our Range header and sent the full body. - // Cache it so subsequent chunk requests don't re-download, then slice. - if (response.status === 200) { - // Check Content-Length header first as a preliminary size check - const contentLength = response.headers.get("content-length"); - if (contentLength) { - const declaredSize = parseInt(contentLength, 10); - if (declaredSize > CACHE_MAX_PDF_SIZE_BYTES) { + // Check actual size (may differ from Content-Length) + if (fullData.length > CACHE_MAX_PDF_SIZE_BYTES) { throw new Error( - `PDF too large to cache: ${declaredSize} bytes exceeds ${CACHE_MAX_PDF_SIZE_BYTES} byte limit`, + `PDF too large to cache: ${fullData.length} bytes exceeds ${CACHE_MAX_PDF_SIZE_BYTES} byte limit`, ); } - } - const fullData = new Uint8Array(await response.arrayBuffer()); - - // Check actual size (may differ from Content-Length) - if (fullData.length > CACHE_MAX_PDF_SIZE_BYTES) { - throw new Error( - `PDF too large to cache: ${fullData.length} bytes exceeds ${CACHE_MAX_PDF_SIZE_BYTES} byte limit`, - ); + setCacheEntry(normalized, fullData); + return sliceToChunk(fullData, offset, clampedByteCount); } - setCacheEntry(normalized, fullData); - return sliceToChunk(fullData, offset, clampedByteCount); - } - - // HTTP 206 Partial Content — parse total size from Content-Range header - const contentRange = response.headers.get("content-range"); - let totalBytes = 0; - if (contentRange) { - const match = contentRange.match(/bytes \d+-\d+\/(\d+)/); - if (match) { - totalBytes = parseInt(match[1], 10); + // HTTP 206 Partial Content — parse total size from Content-Range header + const contentRange = response.headers.get("content-range"); + let totalBytes = 0; + if (contentRange) { + const match = contentRange.match(/bytes \d+-\d+\/(\d+)/); + if (match) { + totalBytes = parseInt(match[1], 10); + } } + + const data = new Uint8Array(await response.arrayBuffer()); + return { data, totalBytes }; } - const data = new Uint8Array(await response.arrayBuffer()); - return { data, totalBytes }; + return { + readPdfRange, + getCacheSize: () => cache.size, + clearCache: () => { + for (const url of [...cache.keys()]) { + deleteCacheEntry(url); + } + }, + }; } // ============================================================================= @@ -332,6 +349,9 @@ export async function readPdfRange( export function createServer(): McpServer { const server = new McpServer({ name: "PDF Server", version: "2.0.0" }); + // Create session-local cache (isolated per server instance) + const { readPdfRange } = createPdfCache(); + // Tool: list_pdfs - List available PDFs (local files + allowed origins) server.tool( "list_pdfs",