fix(kb): added proper pagination for documents in kb (#937)

waleedlatif1 · web-flow · commit 2a333c7cf721 · 2025-08-11T14:16:15.000-07:00
diff --git a/apps/sim/app/workspace/[workspaceId]/knowledge/[id]/base.tsx b/apps/sim/app/workspace/[workspaceId]/knowledge/[id]/base.tsx
@@ -1121,7 +1121,7 @@ export function KnowledgeBase({
                               key={page}
                               onClick={() => goToPage(page)}
                               disabled={isLoadingDocuments}
-                              className={`font-medium text-sm transition-colors hover:text-foreground disabled:cursor-not-allowed disabled:opacity-50 ${
+                              className={`font-medium text-sm transition-colors hover:text-foreground disabled:opacity-50 ${
                                 page === currentPage ? 'text-foreground' : 'text-muted-foreground'
                               }`}
                             >
diff --git a/apps/sim/hooks/use-knowledge.ts b/apps/sim/hooks/use-knowledge.ts
@@ -41,7 +41,6 @@ export function useKnowledgeBase(id: string) {
 }
 
 // Constants
-const MAX_DOCUMENTS_LIMIT = 10000
 const DEFAULT_PAGE_SIZE = 50
 
 export function useKnowledgeBaseDocuments(
@@ -54,69 +53,61 @@ export function useKnowledgeBaseDocuments(
   const [error, setError] = useState<string | null>(null)
 
   const documentsCache = getCachedDocuments(knowledgeBaseId)
-  const allDocuments = documentsCache?.documents || []
   const isLoading = loadingDocuments.has(knowledgeBaseId)
-  const hasBeenLoaded = documentsCache !== null // Check if we have any cache entry, even if empty
 
-  // Load all documents on initial mount
+  // Load documents with server-side pagination and search
+  const requestLimit = options?.limit || DEFAULT_PAGE_SIZE
+  const requestOffset = options?.offset || 0
+  const requestSearch = options?.search
+
   useEffect(() => {
-    if (!knowledgeBaseId || hasBeenLoaded || isLoading) return
+    if (!knowledgeBaseId || isLoading) return
 
     let isMounted = true
 
-    const loadAllDocuments = async () => {
+    const loadDocuments = async () => {
       try {
         setError(null)
-        await getDocuments(knowledgeBaseId, { limit: MAX_DOCUMENTS_LIMIT })
+        await getDocuments(knowledgeBaseId, {
+          search: requestSearch,
+          limit: requestLimit,
+          offset: requestOffset,
+        })
       } catch (err) {
         if (isMounted) {
           setError(err instanceof Error ? err.message : 'Failed to load documents')
         }
       }
     }
 
-    loadAllDocuments()
+    loadDocuments()
 
     return () => {
       isMounted = false
     }
-  }, [knowledgeBaseId, hasBeenLoaded, isLoading, getDocuments])
-
-  // Client-side filtering and pagination
-  const { documents, pagination } = useMemo(() => {
-    let filteredDocs = allDocuments
-
-    // Apply search filter
-    if (options?.search) {
-      const searchLower = options.search.toLowerCase()
-      filteredDocs = filteredDocs.filter((doc) => doc.filename.toLowerCase().includes(searchLower))
-    }
-
-    // Apply pagination
-    const offset = options?.offset || 0
-    const limit = options?.limit || DEFAULT_PAGE_SIZE
-    const total = filteredDocs.length
-    const paginatedDocs = filteredDocs.slice(offset, offset + limit)
+  }, [knowledgeBaseId, isLoading, getDocuments, requestSearch, requestLimit, requestOffset])
 
-    return {
-      documents: paginatedDocs,
-      pagination: {
-        total,
-        limit,
-        offset,
-        hasMore: offset + limit < total,
-      },
-    }
-  }, [allDocuments, options?.search, options?.limit, options?.offset])
+  // Use server-side filtered and paginated results directly
+  const documents = documentsCache?.documents || []
+  const pagination = documentsCache?.pagination || {
+    total: 0,
+    limit: requestLimit,
+    offset: requestOffset,
+    hasMore: false,
+  }
 
   const refreshDocumentsData = useCallback(async () => {
     try {
       setError(null)
-      await refreshDocuments(knowledgeBaseId, { limit: MAX_DOCUMENTS_LIMIT })
+      await refreshDocuments(knowledgeBaseId, {
+        search: requestSearch,
+        limit: requestLimit,
+        offset: requestOffset,
+      })
     } catch (err) {
       setError(err instanceof Error ? err.message : 'Failed to refresh documents')
     }
-  }, [knowledgeBaseId, refreshDocuments])
+  }, [knowledgeBaseId, refreshDocuments, requestSearch, requestLimit, requestOffset])
 
   const updateDocumentLocal = useCallback(
     (documentId: string, updates: Partial<DocumentData>) => {
diff --git a/apps/sim/stores/knowledge/store.ts b/apps/sim/stores/knowledge/store.ts
@@ -261,9 +261,18 @@ export const useKnowledgeStore = create<KnowledgeStore>((set, get) => ({
   ) => {
     const state = get()
 
-    // Return cached documents if they exist (no search-based caching since we do client-side filtering)
+    // Check if we have cached data that matches the exact request parameters
     const cached = state.documents[knowledgeBaseId]
-    if (cached && cached.documents.length > 0) {
+    const requestLimit = options?.limit || 50
+    const requestOffset = options?.offset || 0
+    const requestSearch = options?.search
+
+    if (
+      cached &&
+      cached.searchQuery === requestSearch &&
+      cached.pagination.limit === requestLimit &&
+      cached.pagination.offset === requestOffset
+    ) {
       return cached.documents
     }
 
@@ -277,11 +286,11 @@ export const useKnowledgeStore = create<KnowledgeStore>((set, get) => ({
         loadingDocuments: new Set([...state.loadingDocuments, knowledgeBaseId]),
       }))
 
-      // Build query parameters
+      // Build query parameters using the same defaults as caching
       const params = new URLSearchParams()
-      if (options?.search) params.set('search', options.search)
-      if (options?.limit) params.set('limit', options.limit.toString())
-      if (options?.offset) params.set('offset', options.offset.toString())
+      if (requestSearch) params.set('search', requestSearch)
+      params.set('limit', requestLimit.toString())
+      params.set('offset', requestOffset.toString())
 
       const url = `/api/knowledge/${knowledgeBaseId}/documents${params.toString() ? `?${params.toString()}` : ''}`
       const response = await fetch(url)
@@ -299,15 +308,15 @@ export const useKnowledgeStore = create<KnowledgeStore>((set, get) => ({
       const documents = result.data.documents || result.data // Handle both paginated and non-paginated responses
       const pagination = result.data.pagination || {
         total: documents.length,
-        limit: options?.limit || 50,
-        offset: options?.offset || 0,
+        limit: requestLimit,
+        offset: requestOffset,
         hasMore: false,
       }
 
       const documentsCache: DocumentsCache = {
         documents,
         pagination,
-        searchQuery: options?.search,
+        searchQuery: requestSearch,
         lastFetchTime: Date.now(),
       }
 
@@ -515,11 +524,15 @@ export const useKnowledgeStore = create<KnowledgeStore>((set, get) => ({
         loadingDocuments: new Set([...state.loadingDocuments, knowledgeBaseId]),
       }))
 
-      // Build query parameters - for refresh, always start from offset 0
+      // Build query parameters using consistent defaults
+      const requestLimit = options?.limit || 50
+      const requestOffset = options?.offset || 0
+      const requestSearch = options?.search
+
       const params = new URLSearchParams()
-      if (options?.search) params.set('search', options.search)
-      if (options?.limit) params.set('limit', options.limit.toString())
-      params.set('offset', '0') // Always start fresh on refresh
+      if (requestSearch) params.set('search', requestSearch)
+      params.set('limit', requestLimit.toString())
+      params.set('offset', requestOffset.toString())
 
       const url = `/api/knowledge/${knowledgeBaseId}/documents${params.toString() ? `?${params.toString()}` : ''}`
       const response = await fetch(url)
@@ -534,87 +547,33 @@ export const useKnowledgeStore = create<KnowledgeStore>((set, get) => ({
         throw new Error(result.error || 'Failed to fetch documents')
       }
 
-      const serverDocuments = result.data.documents || result.data
+      const documents = result.data.documents || result.data
       const pagination = result.data.pagination || {
-        total: serverDocuments.length,
-        limit: options?.limit || 50,
-        offset: 0,
+        total: documents.length,
+        limit: requestLimit,
+        offset: requestOffset,
         hasMore: false,
       }
 
-      set((state) => {
-        const currentDocuments = state.documents[knowledgeBaseId]?.documents || []
-
-        // Create a map of server documents by filename for quick lookup
-        const serverDocumentsByFilename = new Map()
-        serverDocuments.forEach((doc: DocumentData) => {
-          serverDocumentsByFilename.set(doc.filename, doc)
-        })
-
-        // Filter out temporary documents that now have real server equivalents
-        const filteredCurrentDocs = currentDocuments.filter((doc) => {
-          // If this is a temporary document (starts with temp-) and a server document exists with the same filename
-          if (doc.id.startsWith('temp-') && serverDocumentsByFilename.has(doc.filename)) {
-            return false // Remove the temporary document
-          }
-
-          // If this is a real document that still exists on the server, keep it for merging
-          if (!doc.id.startsWith('temp-')) {
-            const serverDoc = serverDocuments.find((sDoc: DocumentData) => sDoc.id === doc.id)
-            if (serverDoc) {
-              return false // Will be replaced by server version in merge below
-            }
-          }
-
-          // Keep temporary documents that don't have server equivalents yet
-          return true
-        })
-
-        // Merge server documents with any remaining local documents
-        const mergedDocuments = serverDocuments.map((serverDoc: DocumentData) => {
-          const existingDoc = currentDocuments.find((doc) => doc.id === serverDoc.id)
-
-          if (!existingDoc) {
-            // New document from server, use it as-is
-            return serverDoc
-          }
-
-          // Merge logic for existing documents (prefer server data for most fields)
-          return {
-            ...existingDoc,
-            ...serverDoc,
-            // Preserve any local optimistic updates that haven't been reflected on server yet
-            ...(existingDoc.processingStatus !== serverDoc.processingStatus &&
-            ['pending', 'processing'].includes(existingDoc.processingStatus) &&
-            !serverDoc.processingStartedAt
-              ? { processingStatus: existingDoc.processingStatus }
-              : {}),
-          }
-        })
-
-        // Add any remaining temporary documents that don't have server equivalents
-        const finalDocuments = [...mergedDocuments, ...filteredCurrentDocs]
-
-        const documentsCache: DocumentsCache = {
-          documents: finalDocuments,
-          pagination,
-          searchQuery: options?.search,
-          lastFetchTime: Date.now(),
-        }
-
-        return {
-          documents: {
-            ...state.documents,
-            [knowledgeBaseId]: documentsCache,
-          },
-          loadingDocuments: new Set(
-            [...state.loadingDocuments].filter((loadingId) => loadingId !== knowledgeBaseId)
-          ),
-        }
-      })
+      const documentsCache: DocumentsCache = {
+        documents,
+        pagination,
+        searchQuery: requestSearch,
+        lastFetchTime: Date.now(),
+      }
+
+      set((state) => ({
+        documents: {
+          ...state.documents,
+          [knowledgeBaseId]: documentsCache,
+        },
+        loadingDocuments: new Set(
+          [...state.loadingDocuments].filter((loadingId) => loadingId !== knowledgeBaseId)
+        ),
+      }))
 
       logger.info(`Documents refreshed for knowledge base: ${knowledgeBaseId}`)
-      return serverDocuments
+      return documents
     } catch (error) {
       logger.error(`Error refreshing documents for knowledge base ${knowledgeBaseId}:`, error)
 

Original file line number	Diff line number	Diff line change
`@@ -1121,7 +1121,7 @@ export function KnowledgeBase({`
`1121`	`1121`	`key={page}`
`1122`	`1122`	`onClick={() => goToPage(page)}`
`1123`	`1123`	`disabled={isLoadingDocuments}`
`1124`		- className={`font-medium text-sm transition-colors hover:text-foreground disabled:cursor-not-allowed disabled:opacity-50 ${
	`1124`	+ className={`font-medium text-sm transition-colors hover:text-foreground disabled:opacity-50 ${
`1125`	`1125`	`page === currentPage ? 'text-foreground' : 'text-muted-foreground'`
`1126`	`1126`	}`}
`1127`	`1127`	`>`