diff --git a/app/llms-full.txt/route.ts b/app/llms-full.txt/route.ts new file mode 100644 index 00000000..36de6e39 --- /dev/null +++ b/app/llms-full.txt/route.ts @@ -0,0 +1,27 @@ +import {NextResponse} from 'next/server'; +import {getBaseUrl} from '@/lib/docs-utils'; +import {generateLlmsFullTxt} from '@/lib/llms-text'; + +export const revalidate = false; +export const dynamic = 'force-static'; + +const TEXT_HEADERS = { + 'Content-Type': 'text/plain; charset=utf-8', + 'Cache-Control': 'public, max-age=0, must-revalidate', +}; + +export async function GET() { + try { + const baseUrl = getBaseUrl(); + const body = (await generateLlmsFullTxt(baseUrl)).trim(); + + return new NextResponse(body, {headers: TEXT_HEADERS}); + } catch (err) { + console.error('Error generating /llms-full.txt:', err); + + return new NextResponse('Internal server error', { + status: 500, + headers: TEXT_HEADERS, + }); + } +} diff --git a/app/llms.txt/route.ts b/app/llms.txt/route.ts new file mode 100644 index 00000000..26fab277 --- /dev/null +++ b/app/llms.txt/route.ts @@ -0,0 +1,27 @@ +import {NextResponse} from 'next/server'; +import {getBaseUrl} from '@/lib/docs-utils'; +import {generateLlmsTxt} from '@/lib/llms-text'; + +export const revalidate = false; +export const dynamic = 'force-static'; + +const TEXT_HEADERS = { + 'Content-Type': 'text/plain; charset=utf-8', + 'Cache-Control': 'public, max-age=0, must-revalidate', +}; + +export async function GET() { + try { + const baseUrl = getBaseUrl(); + const body = (await generateLlmsTxt(baseUrl)).trim(); + + return new NextResponse(body, {headers: TEXT_HEADERS}); + } catch (err) { + console.error('Error generating /llms.txt:', err); + + return new NextResponse('Internal server error', { + status: 500, + headers: TEXT_HEADERS, + }); + } +} diff --git a/assets/search-index.json b/assets/search-index.json index af7a6368..b1b59c90 100644 --- a/assets/search-index.json +++ b/assets/search-index.json @@ -94,7 +94,7 @@ "id": "13-llms", "title": "Welcome, 🤖!", "url": "/docs/llms", - "content": "Are you an LLM? Do you like long walks through vector space and late-night tokenization? Or maybe you're a friend of an LLM, just trying to make life a little easier for the contextually challenged? Either way, you're in the right place! Stream on over to llms.txt for the text-only version of these docs.", + "content": "Are you an LLM? Do you like long walks through vector space and late-night tokenization? Or maybe you're a friend of an LLM, just trying to make life a little easier for the contextually challenged? Either way, you're in the right place! Here are some ways to get started: llms.txt - index of all documentation pages with links to individual pages llms-full.txt - full corpus of all documentation (for models that can't follow links) /docs/\\.md - individual pages as markdown (e.g. /docs/llms.md)", "headings": [] }, { diff --git a/contents/docs/llms.mdx b/contents/docs/llms.mdx index d9f3f667..588437d6 100644 --- a/contents/docs/llms.mdx +++ b/contents/docs/llms.mdx @@ -8,4 +8,8 @@ Do you like long walks through vector space and late-night tokenization? Or maybe you're a friend of an LLM, just trying to make life a little easier for the contextually challenged? -Either way, you're in the right place! Stream on over to [llms.txt](/llms.txt) for the text-only version of these docs. +Either way, you're in the right place! Here are some ways to get started: + +- [llms.txt](/llms.txt) - index of all documentation pages with links to individual pages +- [llms-full.txt](/llms-full.txt) - full corpus of all documentation (for models that can't follow links) +- `/docs/.md` - individual pages as markdown (e.g. `/docs/llms.md`) diff --git a/lib/generate-llms.ts b/lib/generate-llms.ts deleted file mode 100644 index 1903b37c..00000000 --- a/lib/generate-llms.ts +++ /dev/null @@ -1,101 +0,0 @@ -import path from 'path'; -import {promises as fs} from 'fs'; -import matter from 'gray-matter'; -import {getMarkdownForSlug} from './mdx-to-markdown'; -import {page_routes as pageRoutes} from './routes-config'; -import {getBaseUrl, getDocsContentPath} from './docs-utils'; - -async function generateLlmsTxt(baseUrl: string): Promise { - let output = OUTPUT_BASE; - - for (const route of pageRoutes) { - if (!route.href) continue; - - const slug = route.href.replace(/^\//, '').replace(/^docs\//, ''); - const url = `${baseUrl}/docs/${slug}`; - - try { - const contentPath = await getDocsContentPath(slug); - const rawMdx = await fs.readFile(contentPath, 'utf-8'); - const {data} = matter(rawMdx); - const description = data.description ?? ''; - const descSuffix = description ? `: ${description}` : ''; - - output += `- [${route.title}](${url})${descSuffix}\n`; - } catch (err) { - console.warn(`Warning: Could not process route ${route.href}:`, err); - } - } - - return output; -} - -async function generateLlmsFullTxt(baseUrl: string): Promise { - let output = OUTPUT_BASE; - - for (const route of pageRoutes) { - if (!route.href) continue; - - const slug = route.href.replace(/^\//, '').replace(/^docs\//, ''); - - try { - const markdown = await getMarkdownForSlug(slug); - - if (!markdown) { - console.warn(`Warning: No markdown generated for ${slug}`); - continue; - } - - const url = `${baseUrl}/docs/${slug}`; - - output += '---\n\n'; - - if (markdown.startsWith('# ')) { - const firstNewline = markdown.indexOf('\n'); - const title = markdown.slice(0, firstNewline); - const rest = markdown.slice(firstNewline).trimStart(); - - output += `${title}\n\n`; - output += `Source: ${url}\n\n`; - output += `${rest}\n\n`; - } else { - output += `Source: ${url}\n\n`; - output += `${markdown}\n\n`; - } - } catch (err) { - console.warn(`Warning: Error processing ${slug}:`, err); - } - } - - return output; -} - -const OUTPUT_BASE = `# Zero\n\n -> Zero is a new kind of sync engine powered by queries.\n\n -## Documentation\n\n`; - -async function main() { - console.log('Generating llms.txt files...'); - - const baseUrl = getBaseUrl(); - console.log(`Using base URL: ${baseUrl}`); - - try { - const llmsTxt = await generateLlmsTxt(baseUrl); - const llmsTxtPath = path.join(process.cwd(), 'public', 'llms.txt'); - await fs.writeFile(llmsTxtPath, llmsTxt.trim()); - console.log(`✓ Generated ${llmsTxtPath}`); - - const llmsFullTxt = await generateLlmsFullTxt(baseUrl); - const llmsFullTxtPath = path.join(process.cwd(), 'public', 'llms-full.txt'); - await fs.writeFile(llmsFullTxtPath, llmsFullTxt.trim()); - console.log(`✓ Generated ${llmsFullTxtPath}`); - - console.log('Successfully generated llms.txt files'); - } catch (err) { - console.error('Error generating llms.txt files:', err); - process.exit(1); - } -} - -main(); diff --git a/lib/llms-text.ts b/lib/llms-text.ts new file mode 100644 index 00000000..09bdb8dc --- /dev/null +++ b/lib/llms-text.ts @@ -0,0 +1,98 @@ +import {promises as fs} from 'fs'; +import matter from 'gray-matter'; +import {ROUTES} from './routes-config'; +import {getDocsContentPath} from './docs-utils'; +import {getMarkdownForSlug} from './mdx-to-markdown'; + +const OUTPUT_BASE = `# Zero + +> Zero is a new kind of sync engine powered by queries. + +`; + +function normalizeSlug(href: string) { + return href.replace(/^\//, '').replace(/^docs\//, ''); +} + +export async function generateLlmsTxt(baseUrl: string): Promise { + let output = OUTPUT_BASE; + + for (const section of ROUTES) { + output += `## ${section.title}\n\n`; + + if (section.items) { + for (const item of section.items) { + if (!item.href) continue; + + const slug = normalizeSlug(item.href); + const url = `${baseUrl}/docs/${slug}.md`; + + try { + const contentPath = await getDocsContentPath(slug); + const rawMdx = await fs.readFile(contentPath, 'utf-8'); + const {data} = matter(rawMdx); + const description = data.description ?? ''; + const descSuffix = description ? `: ${description}` : ''; + + output += `- [${item.title}](${url})${descSuffix}\n`; + } catch (err) { + console.warn(`warning: could not process route ${item.href}:`, err); + } + } + } + + output += '\n'; + } + + return output; +} + +export async function generateLlmsFullTxt(baseUrl: string): Promise { + let output = OUTPUT_BASE; + + for (const section of ROUTES) { + if (section.items) { + for (const item of section.items) { + if (!item.href) continue; + + const slug = normalizeSlug(item.href); + + try { + const markdown = await getMarkdownForSlug(slug); + + if (!markdown) { + console.warn(`warning: no markdown generated for ${slug}`); + continue; + } + + const url = `${baseUrl}/docs/${slug}.md`; + + output += '---\n\n'; + + if (markdown.startsWith('# ')) { + const firstNewline = markdown.indexOf('\n'); + const title = + firstNewline === -1 ? markdown : markdown.slice(0, firstNewline); + const rest = + firstNewline === -1 + ? '' + : markdown.slice(firstNewline).trimStart(); + + output += `${title}\n\n`; + output += `Source: ${url}\n\n`; + if (rest) { + output += `${rest}\n\n`; + } + } else { + output += `Source: ${url}\n\n`; + output += `${markdown}\n\n`; + } + } catch (err) { + console.warn(`warning: error processing ${slug}:`, err); + } + } + } + } + + return output; +} diff --git a/lib/mdx-to-markdown.ts b/lib/mdx-to-markdown.ts index 0a434c52..63910e44 100644 --- a/lib/mdx-to-markdown.ts +++ b/lib/mdx-to-markdown.ts @@ -85,11 +85,27 @@ function normalizeLineBreaks() { for (let index = 0; index < children.length - 1; index++) { const current = children[index]; + const next = children[index + 1]; + if (current?.type === 'text') { const textNode = current as Text; if (textNode.value && !/[\s([{]$/.test(textNode.value.trimEnd())) { textNode.value = textNode.value.trimEnd() + ' '; } + } else if ( + (current?.type === 'link' || current?.type === 'image') && + next?.type === 'text' + ) { + // ensure links/images have proper spacing before following text + // only add space if missing and next text starts with a letter (not punctuation) + const nextTextNode = next as Text; + if ( + nextTextNode.value && + !/^\s/.test(nextTextNode.value) && + /^[a-zA-Z]/.test(nextTextNode.value) + ) { + nextTextNode.value = ' ' + nextTextNode.value; + } } } }); @@ -330,7 +346,13 @@ function resolveDocumentHref(href: string, currentSlug: string) { if (docSlug !== undefined) { const canonicalSlug = DOC_ROUTE_LOOKUP.get(docSlug) ?? docSlug; const clean = canonicalSlug.replace(/^\/+/, ''); - absolutePath = clean ? `/docs/${clean}` : '/docs'; + absolutePath = clean ? `/docs/${clean}.md` : '/docs'; + } else if ( + absolutePath && + absolutePath.startsWith('/docs/') && + !absolutePath.endsWith('.md') + ) { + absolutePath = `${absolutePath}.md`; } if (!absolutePath) { diff --git a/package.json b/package.json index 5e767020..8af1eddc 100644 --- a/package.json +++ b/package.json @@ -4,9 +4,8 @@ "private": true, "scripts": { "dev": "next dev", - "build": "npm run build:search && npm run build:llms && next build", + "build": "npm run build:search && next build", "build:search": "tsx lib/generateSearchIndex.ts", - "build:llms": "npx --yes tsdown lib/generate-llms.ts --outDir dist --skipLibCheck && node dist/generate-llms.mjs", "start": "npm run build:search && next start", "lint": "next lint", "format": "prettier --write **/*.{ts,tsx,js,json,md,mdx}",