From 21bb6e230da050cea3161eb84043efc377f7080c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Guilherme=20Ara=C3=BAjo?= Date: Tue, 29 Apr 2025 10:25:07 -0300 Subject: [PATCH 01/15] feat: create llms.txt generator --- src/constants.mjs | 2 + src/generators/index.mjs | 2 + src/generators/llms-txt/index.mjs | 121 ++++++++++++++++++++++++++++++ 3 files changed, 125 insertions(+) create mode 100644 src/generators/llms-txt/index.mjs diff --git a/src/constants.mjs b/src/constants.mjs index 040a8c10..1c863610 100644 --- a/src/constants.mjs +++ b/src/constants.mjs @@ -6,3 +6,5 @@ export const DOC_NODE_VERSION = process.version; // This is the Node.js CHANGELOG to be consumed to generate a list of all major Node.js versions export const DOC_NODE_CHANGELOG_URL = 'https://raw.githubusercontent.com/nodejs/node/HEAD/CHANGELOG.md'; + +export const DOC_API_LATEST_BASE_URL = 'https://nodejs.org/docs/latest'; diff --git a/src/generators/index.mjs b/src/generators/index.mjs index f4bb744a..7a9db52b 100644 --- a/src/generators/index.mjs +++ b/src/generators/index.mjs @@ -10,6 +10,7 @@ import addonVerify from './addon-verify/index.mjs'; import apiLinks from './api-links/index.mjs'; import oramaDb from './orama-db/index.mjs'; import astJs from './ast-js/index.mjs'; +import llmsTxt from './llms-txt/index.mjs'; export const publicGenerators = { 'json-simple': jsonSimple, @@ -21,6 +22,7 @@ export const publicGenerators = { 'addon-verify': addonVerify, 'api-links': apiLinks, 'orama-db': oramaDb, + 'llms-txt': llmsTxt, }; export const allGenerators = { diff --git a/src/generators/llms-txt/index.mjs b/src/generators/llms-txt/index.mjs new file mode 100644 index 00000000..48f72e40 --- /dev/null +++ b/src/generators/llms-txt/index.mjs @@ -0,0 +1,121 @@ +'use strict'; + +import { writeFile } from 'node:fs/promises'; +import { join } from 'node:path'; +import { DOC_API_LATEST_BASE_URL } from '../../constants.mjs'; + +const IGNORE_LIST = ['doc/api/synopsis.md']; + +/** + * Extracts text content from a node recursively + * + * @param {import('mdast').Paragraph} node The AST node to extract text from + * @returns {string} The extracted text content + */ +function extractTextContent(node) { + if (!node) { + return ''; + } + + if (node.type === 'text' || node.type === 'inlineCode') { + return node.value; + } + + if (node.children && Array.isArray(node.children)) { + return node.children.map(extractTextContent).join(''); + } + + return ''; +} + +/** + * Extracts text from a paragraph node. + * + * @param {import('mdast').Paragraph} node The paragraph node to extract text from + * @returns {string} The extracted text content + * @throws {Error} If the node is not a paragraph + */ +function paragraphToString(node) { + if (node.type !== 'paragraph') { + throw new Error('Node is not a paragraph'); + } + + return node.children.map(extractTextContent).join(''); +} + +/** + * Generates a documentation entry string + * + * @param {ApiDocMetadataEntry} entry + * @returns {string} + */ +function generateDocEntry(entry) { + if (IGNORE_LIST.includes(entry.api_doc_source)) { + return null; + } + + if (entry.heading.depth !== 1) { + return null; + } + + // Remove the leading /doc of string + const path = entry.api_doc_source.replace(/^doc\//, ''); + + const entryLink = `[${entry.heading.data.name}](${DOC_API_LATEST_BASE_URL}/${path})`; + + const descriptionNode = entry.content.children.find( + child => child.type === 'paragraph' + ); + + if (!descriptionNode) { + console.warn(`No description found for entry: ${entry.api_doc_source}`); + return `- ${entryLink}`; + } + + const description = paragraphToString(descriptionNode).replace( + /[\r\n]+/g, + ' ' + ); + + return `- ${entryLink}: ${description}`; +} + +/** + * @typedef {Array} Input + * + * @type {GeneratorMetadata} + */ +export default { + name: 'llms-txt', + version: '0.1.0', + description: 'Generates a llms.txt file of the API docs', + dependsOn: 'ast', + + /** + * @param {Input} input The API documentation metadata + * @param {Partial} options Generator options + * @returns {Promise} The generated documentation text + */ + async generate(input, options) { + const output = [ + '# Node.js Documentation', + '> Node.js is an open-source, cross-platform JavaScript runtime environment that executes JavaScript code outside a web browser. Node.js uses an event-driven, non-blocking I/O model that makes it lightweight and efficient for building scalable network applications.', + '## Introduction', + `- [About this documentation](${DOC_API_LATEST_BASE_URL}/api/documentation.md)`, + `- [Usage and example](${DOC_API_LATEST_BASE_URL}/api/synopsis.md)`, + '## API Documentation', + ]; + + const docEntries = input.map(generateDocEntry).filter(Boolean); + + output.push(...docEntries); + + const resultText = output.join('\n'); + + if (options.output) { + await writeFile(join(options.output, 'llms.txt'), resultText); + } + + return resultText; + }, +}; From 277f607d838de25220c549514c883227042eb148 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Guilherme=20Ara=C3=BAjo?= Date: Tue, 29 Apr 2025 16:53:07 -0300 Subject: [PATCH 02/15] refactor: improvements --- src/constants.mjs | 2 - src/generators/llms-txt/constants.mjs | 3 + src/generators/llms-txt/index.mjs | 110 +++--------------- src/generators/llms-txt/template.txt | 9 ++ .../llms-txt/utils/generateDocEntry.mjs | 39 +++++++ .../llms-txt/utils/paragraphToString.mjs | 36 ++++++ 6 files changed, 105 insertions(+), 94 deletions(-) create mode 100644 src/generators/llms-txt/constants.mjs create mode 100644 src/generators/llms-txt/template.txt create mode 100644 src/generators/llms-txt/utils/generateDocEntry.mjs create mode 100644 src/generators/llms-txt/utils/paragraphToString.mjs diff --git a/src/constants.mjs b/src/constants.mjs index 1c863610..040a8c10 100644 --- a/src/constants.mjs +++ b/src/constants.mjs @@ -6,5 +6,3 @@ export const DOC_NODE_VERSION = process.version; // This is the Node.js CHANGELOG to be consumed to generate a list of all major Node.js versions export const DOC_NODE_CHANGELOG_URL = 'https://raw.githubusercontent.com/nodejs/node/HEAD/CHANGELOG.md'; - -export const DOC_API_LATEST_BASE_URL = 'https://nodejs.org/docs/latest'; diff --git a/src/generators/llms-txt/constants.mjs b/src/generators/llms-txt/constants.mjs new file mode 100644 index 00000000..aee42dd6 --- /dev/null +++ b/src/generators/llms-txt/constants.mjs @@ -0,0 +1,3 @@ +export const ENTRY_IGNORE_LIST = ['doc/api/synopsis.md']; + +export const LATEST_DOC_API_BASE_URL = 'https://nodejs.org/docs/latest'; diff --git a/src/generators/llms-txt/index.mjs b/src/generators/llms-txt/index.mjs index 48f72e40..c0a5780f 100644 --- a/src/generators/llms-txt/index.mjs +++ b/src/generators/llms-txt/index.mjs @@ -1,84 +1,9 @@ 'use strict'; -import { writeFile } from 'node:fs/promises'; +import { readFile, writeFile } from 'node:fs/promises'; import { join } from 'node:path'; -import { DOC_API_LATEST_BASE_URL } from '../../constants.mjs'; - -const IGNORE_LIST = ['doc/api/synopsis.md']; - -/** - * Extracts text content from a node recursively - * - * @param {import('mdast').Paragraph} node The AST node to extract text from - * @returns {string} The extracted text content - */ -function extractTextContent(node) { - if (!node) { - return ''; - } - - if (node.type === 'text' || node.type === 'inlineCode') { - return node.value; - } - - if (node.children && Array.isArray(node.children)) { - return node.children.map(extractTextContent).join(''); - } - - return ''; -} - -/** - * Extracts text from a paragraph node. - * - * @param {import('mdast').Paragraph} node The paragraph node to extract text from - * @returns {string} The extracted text content - * @throws {Error} If the node is not a paragraph - */ -function paragraphToString(node) { - if (node.type !== 'paragraph') { - throw new Error('Node is not a paragraph'); - } - - return node.children.map(extractTextContent).join(''); -} - -/** - * Generates a documentation entry string - * - * @param {ApiDocMetadataEntry} entry - * @returns {string} - */ -function generateDocEntry(entry) { - if (IGNORE_LIST.includes(entry.api_doc_source)) { - return null; - } - - if (entry.heading.depth !== 1) { - return null; - } - - // Remove the leading /doc of string - const path = entry.api_doc_source.replace(/^doc\//, ''); - - const entryLink = `[${entry.heading.data.name}](${DOC_API_LATEST_BASE_URL}/${path})`; - - const descriptionNode = entry.content.children.find( - child => child.type === 'paragraph' - ); - - if (!descriptionNode) { - console.warn(`No description found for entry: ${entry.api_doc_source}`); - return `- ${entryLink}`; - } - - const description = paragraphToString(descriptionNode).replace( - /[\r\n]+/g, - ' ' - ); - - return `- ${entryLink}: ${description}`; -} +import { generateDocEntry } from './utils/generateDocEntry.mjs'; +import { LATEST_DOC_API_BASE_URL } from './constants.mjs'; /** * @typedef {Array} Input @@ -87,7 +12,7 @@ function generateDocEntry(entry) { */ export default { name: 'llms-txt', - version: '0.1.0', + version: '1.0.0', description: 'Generates a llms.txt file of the API docs', dependsOn: 'ast', @@ -97,25 +22,26 @@ export default { * @returns {Promise} The generated documentation text */ async generate(input, options) { - const output = [ - '# Node.js Documentation', - '> Node.js is an open-source, cross-platform JavaScript runtime environment that executes JavaScript code outside a web browser. Node.js uses an event-driven, non-blocking I/O model that makes it lightweight and efficient for building scalable network applications.', - '## Introduction', - `- [About this documentation](${DOC_API_LATEST_BASE_URL}/api/documentation.md)`, - `- [Usage and example](${DOC_API_LATEST_BASE_URL}/api/synopsis.md)`, - '## API Documentation', - ]; + const template = await readFile( + join(import.meta.dirname, 'template.txt'), + 'utf-8' + ); - const docEntries = input.map(generateDocEntry).filter(Boolean); + const apiDocEntries = input.map(generateDocEntry).filter(Boolean); - output.push(...docEntries); + const introductionEntries = [ + `- [About this documentation](${LATEST_DOC_API_BASE_URL}/api/documentation.md)`, + `- [Usage and example](${LATEST_DOC_API_BASE_URL}/api/synopsis.md)`, + ]; - const resultText = output.join('\n'); + const filledTemplate = template + .replace('__INTRODUCTION__', introductionEntries.join('\n')) + .replace('__API_DOCS__', apiDocEntries.join('\n')); if (options.output) { - await writeFile(join(options.output, 'llms.txt'), resultText); + await writeFile(join(options.output, 'llms.txt'), filledTemplate); } - return resultText; + return filledTemplate; }, }; diff --git a/src/generators/llms-txt/template.txt b/src/generators/llms-txt/template.txt new file mode 100644 index 00000000..4106b300 --- /dev/null +++ b/src/generators/llms-txt/template.txt @@ -0,0 +1,9 @@ +# Node.js Documentation + +> Node.js is an open-source, cross-platform JavaScript runtime environment that executes JavaScript code outside a web browser. Node.js uses an event-driven, non-blocking I/O model that makes it lightweight and efficient for building scalable network applications. + +## Introduction +__INTRODUCTION__ + +## API Documentations +__API_DOCS__ diff --git a/src/generators/llms-txt/utils/generateDocEntry.mjs b/src/generators/llms-txt/utils/generateDocEntry.mjs new file mode 100644 index 00000000..61f5fb96 --- /dev/null +++ b/src/generators/llms-txt/utils/generateDocEntry.mjs @@ -0,0 +1,39 @@ +import { ENTRY_IGNORE_LIST, LATEST_DOC_API_BASE_URL } from '../constants.mjs'; +import { paragraphToString } from './paragraphToString.mjs'; + +/** + * Generates a documentation entry string + * + * @param {ApiDocMetadataEntry} entry + * @returns {string} + */ +export function generateDocEntry(entry) { + if (ENTRY_IGNORE_LIST.includes(entry.api_doc_source)) { + return null; + } + + if (entry.heading.depth !== 1) { + return null; + } + + // Remove the leading /doc of string + const path = entry.api_doc_source.replace(/^doc\//, ''); + + const entryLink = `[${entry.heading.data.name}](${LATEST_DOC_API_BASE_URL}/${path})`; + + const descriptionNode = entry.content.children.find( + child => child.type === 'paragraph' + ); + + if (!descriptionNode) { + console.warn(`No description found for entry: ${entry.api_doc_source}`); + return `- ${entryLink}`; + } + + const description = paragraphToString(descriptionNode).replace( + /[\r\n]+/g, + ' ' + ); + + return `- ${entryLink}: ${description}`; +} diff --git a/src/generators/llms-txt/utils/paragraphToString.mjs b/src/generators/llms-txt/utils/paragraphToString.mjs new file mode 100644 index 00000000..93f4134a --- /dev/null +++ b/src/generators/llms-txt/utils/paragraphToString.mjs @@ -0,0 +1,36 @@ +/** + * Extracts text content from a node recursively + * + * @param {import('mdast').Paragraph} node The AST node to extract text from + * @returns {string} The extracted text content + */ +function extractTextContent(node) { + if (!node) { + return ''; + } + + if (node.type === 'text' || node.type === 'inlineCode') { + return node.value; + } + + if (node.children && Array.isArray(node.children)) { + return node.children.map(extractTextContent).join(''); + } + + return ''; +} + +/** + * Extracts text from a paragraph node. + * + * @param {import('mdast').Paragraph} node The paragraph node to extract text from + * @returns {string} The extracted text content + * @throws {Error} If the node is not a paragraph + */ +export function paragraphToString(node) { + if (node.type !== 'paragraph') { + throw new Error('Node is not a paragraph'); + } + + return node.children.map(extractTextContent).join(''); +} From e5850886832fc683a1f6ab33761d8b5aaa36597b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Guilherme=20Ara=C3=BAjo?= Date: Tue, 29 Apr 2025 17:45:52 -0300 Subject: [PATCH 03/15] fix: typo --- src/generators/llms-txt/index.mjs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/generators/llms-txt/index.mjs b/src/generators/llms-txt/index.mjs index c0a5780f..ffb50112 100644 --- a/src/generators/llms-txt/index.mjs +++ b/src/generators/llms-txt/index.mjs @@ -31,7 +31,7 @@ export default { const introductionEntries = [ `- [About this documentation](${LATEST_DOC_API_BASE_URL}/api/documentation.md)`, - `- [Usage and example](${LATEST_DOC_API_BASE_URL}/api/synopsis.md)`, + `- [Usage and examples](${LATEST_DOC_API_BASE_URL}/api/synopsis.md)`, ]; const filledTemplate = template From 32fb861118ca6b686ee1d8b650a483507bb201ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Guilherme=20Ara=C3=BAjo?= Date: Wed, 30 Apr 2025 10:53:33 -0300 Subject: [PATCH 04/15] refactor: remove paragraphToString util --- .../llms-txt/utils/generateDocEntry.mjs | 4 +-- .../llms-txt/utils/paragraphToString.mjs | 36 ------------------- src/utils/unist.mjs | 34 ++++++++++-------- 3 files changed, 22 insertions(+), 52 deletions(-) delete mode 100644 src/generators/llms-txt/utils/paragraphToString.mjs diff --git a/src/generators/llms-txt/utils/generateDocEntry.mjs b/src/generators/llms-txt/utils/generateDocEntry.mjs index 61f5fb96..1ec8fe59 100644 --- a/src/generators/llms-txt/utils/generateDocEntry.mjs +++ b/src/generators/llms-txt/utils/generateDocEntry.mjs @@ -1,5 +1,5 @@ +import { transformNodeToString } from '../../../utils/unist.mjs'; import { ENTRY_IGNORE_LIST, LATEST_DOC_API_BASE_URL } from '../constants.mjs'; -import { paragraphToString } from './paragraphToString.mjs'; /** * Generates a documentation entry string @@ -30,7 +30,7 @@ export function generateDocEntry(entry) { return `- ${entryLink}`; } - const description = paragraphToString(descriptionNode).replace( + const description = transformNodeToString(descriptionNode).replace( /[\r\n]+/g, ' ' ); diff --git a/src/generators/llms-txt/utils/paragraphToString.mjs b/src/generators/llms-txt/utils/paragraphToString.mjs deleted file mode 100644 index 93f4134a..00000000 --- a/src/generators/llms-txt/utils/paragraphToString.mjs +++ /dev/null @@ -1,36 +0,0 @@ -/** - * Extracts text content from a node recursively - * - * @param {import('mdast').Paragraph} node The AST node to extract text from - * @returns {string} The extracted text content - */ -function extractTextContent(node) { - if (!node) { - return ''; - } - - if (node.type === 'text' || node.type === 'inlineCode') { - return node.value; - } - - if (node.children && Array.isArray(node.children)) { - return node.children.map(extractTextContent).join(''); - } - - return ''; -} - -/** - * Extracts text from a paragraph node. - * - * @param {import('mdast').Paragraph} node The paragraph node to extract text from - * @returns {string} The extracted text content - * @throws {Error} If the node is not a paragraph - */ -export function paragraphToString(node) { - if (node.type !== 'paragraph') { - throw new Error('Node is not a paragraph'); - } - - return node.children.map(extractTextContent).join(''); -} diff --git a/src/utils/unist.mjs b/src/utils/unist.mjs index 32270bb3..c365beb5 100644 --- a/src/utils/unist.mjs +++ b/src/utils/unist.mjs @@ -2,6 +2,25 @@ import { pointEnd, pointStart } from 'unist-util-position'; +/** + * Extracts text content from a node recursively + * + * @param {import('unist').Node} node The Node to be transformed into a string + * @returns {string} The transformed Node as a string + */ +export const transformNodeToString = node => { + switch (node.type) { + case 'inlineCode': + return `\`${node.value}\``; + case 'strong': + return `**${transformNodesToString(node.children)}**`; + case 'emphasis': + return `_${transformNodesToString(node.children)}_`; + default: + return node.children ? transformNodesToString(node.children) : node.value; + } +}; + /** * This utility allows us to join children Nodes into one * and transfor them back to what their source would look like @@ -10,20 +29,7 @@ import { pointEnd, pointStart } from 'unist-util-position'; * @returns {string} The parsed and joined nodes as a string */ export const transformNodesToString = nodes => { - const mappedChildren = nodes.map(node => { - switch (node.type) { - case 'inlineCode': - return `\`${node.value}\``; - case 'strong': - return `**${transformNodesToString(node.children)}**`; - case 'emphasis': - return `_${transformNodesToString(node.children)}_`; - default: - return node.children - ? transformNodesToString(node.children) - : node.value; - } - }); + const mappedChildren = nodes.map(node => transformNodeToString(node)); return mappedChildren.join(''); }; From b074a7ffbc8d45a1ddc1ef4f35376badc94f9ead Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Guilherme=20Ara=C3=BAjo?= Date: Wed, 30 Apr 2025 11:57:35 -0300 Subject: [PATCH 05/15] refactor: some improvements --- src/constants.mjs | 3 ++ src/generators/llms-txt/constants.mjs | 9 ++-- src/generators/llms-txt/index.mjs | 53 ++++++++++++------- .../llms-txt/utils/buildApiDocLink.mjs | 33 ++++++++++++ .../llms-txt/utils/generateDocEntry.mjs | 39 -------------- .../llms-txt/utils/getIntroLinks.mjs | 18 +++++++ 6 files changed, 95 insertions(+), 60 deletions(-) create mode 100644 src/generators/llms-txt/utils/buildApiDocLink.mjs delete mode 100644 src/generators/llms-txt/utils/generateDocEntry.mjs create mode 100644 src/generators/llms-txt/utils/getIntroLinks.mjs diff --git a/src/constants.mjs b/src/constants.mjs index 040a8c10..294710b5 100644 --- a/src/constants.mjs +++ b/src/constants.mjs @@ -6,3 +6,6 @@ export const DOC_NODE_VERSION = process.version; // This is the Node.js CHANGELOG to be consumed to generate a list of all major Node.js versions export const DOC_NODE_CHANGELOG_URL = 'https://raw.githubusercontent.com/nodejs/node/HEAD/CHANGELOG.md'; + +// The base URL for the latest Node.js documentation +export const LATEST_DOC_API_BASE_URL = 'https://nodejs.org/docs/latest/'; diff --git a/src/generators/llms-txt/constants.mjs b/src/generators/llms-txt/constants.mjs index aee42dd6..6af3dbbe 100644 --- a/src/generators/llms-txt/constants.mjs +++ b/src/generators/llms-txt/constants.mjs @@ -1,3 +1,6 @@ -export const ENTRY_IGNORE_LIST = ['doc/api/synopsis.md']; - -export const LATEST_DOC_API_BASE_URL = 'https://nodejs.org/docs/latest'; +// These files are not part of the API documentation and are manually included +// in the llms.txt file +export const ENTRY_IGNORE_LIST = [ + 'doc/api/synopsis.md', + 'doc/api/documentation.md', +]; diff --git a/src/generators/llms-txt/index.mjs b/src/generators/llms-txt/index.mjs index ffb50112..9132fa62 100644 --- a/src/generators/llms-txt/index.mjs +++ b/src/generators/llms-txt/index.mjs @@ -1,45 +1,62 @@ -'use strict'; - import { readFile, writeFile } from 'node:fs/promises'; import { join } from 'node:path'; -import { generateDocEntry } from './utils/generateDocEntry.mjs'; -import { LATEST_DOC_API_BASE_URL } from './constants.mjs'; + +import { buildApiDocLink } from './utils/buildApiDocLink.mjs'; +import { ENTRY_IGNORE_LIST } from './constants.mjs'; +import { getIntroLinks } from './utils/getIntroLinks.mjs'; /** + * This generator generates a llms.txt file to provide information to LLMs at + * inference time + * * @typedef {Array} Input * * @type {GeneratorMetadata} */ export default { name: 'llms-txt', + version: '1.0.0', - description: 'Generates a llms.txt file of the API docs', + + description: + 'Generates a llms.txt file to provide information to LLMs at inference time', + dependsOn: 'ast', /** - * @param {Input} input The API documentation metadata - * @param {Partial} options Generator options - * @returns {Promise} The generated documentation text + * Generates a llms.txt file + * + * @param {Input} entries + * @param {Partial} options + * @returns {Promise} */ - async generate(input, options) { + async generate(entries, { output }) { const template = await readFile( join(import.meta.dirname, 'template.txt'), 'utf-8' ); - const apiDocEntries = input.map(generateDocEntry).filter(Boolean); + const introLinks = getIntroLinks().join('\n'); - const introductionEntries = [ - `- [About this documentation](${LATEST_DOC_API_BASE_URL}/api/documentation.md)`, - `- [Usage and examples](${LATEST_DOC_API_BASE_URL}/api/synopsis.md)`, - ]; + const apiDocsLinks = entries + .filter(entry => { + // Filter non top-level headings and ignored entries + return ( + entry.heading.depth === 1 || ENTRY_IGNORE_LIST.includes(entry.path) + ); + }) + .map(entry => { + const link = buildApiDocLink(entry); + return `- ${link}`; + }) + .join('\n'); const filledTemplate = template - .replace('__INTRODUCTION__', introductionEntries.join('\n')) - .replace('__API_DOCS__', apiDocEntries.join('\n')); + .replace('__INTRODUCTION__', introLinks) + .replace('__API_DOCS__', apiDocsLinks); - if (options.output) { - await writeFile(join(options.output, 'llms.txt'), filledTemplate); + if (output) { + await writeFile(join(output, 'llms.txt'), filledTemplate); } return filledTemplate; diff --git a/src/generators/llms-txt/utils/buildApiDocLink.mjs b/src/generators/llms-txt/utils/buildApiDocLink.mjs new file mode 100644 index 00000000..1030f12d --- /dev/null +++ b/src/generators/llms-txt/utils/buildApiDocLink.mjs @@ -0,0 +1,33 @@ +import { LATEST_DOC_API_BASE_URL } from '../../../constants.mjs'; +import { transformNodeToString } from '../../../utils/unist.mjs'; + +/** + * Builds a markdown link for an API doc entry + * + * @param {ApiDocMetadataEntry} entry + * @returns {string} + */ +export const buildApiDocLink = entry => { + const title = entry.heading.data.name; + + // Remove the leading doc/ from the path + const path = entry.api_doc_source.replace(/^doc\//, ''); + const url = new URL(path, LATEST_DOC_API_BASE_URL); + + const link = `[${title}](${url})`; + + // Find the first paragraph in the content + const descriptionNode = entry.content.children.find( + child => child.type === 'paragraph' + ); + + if (!descriptionNode) { + return link; + } + + const description = transformNodeToString(descriptionNode) + // Remove newlines and extra spaces + .replace(/[\r\n]+/g, ' '); + + return `${link}: ${description}`; +}; diff --git a/src/generators/llms-txt/utils/generateDocEntry.mjs b/src/generators/llms-txt/utils/generateDocEntry.mjs deleted file mode 100644 index 1ec8fe59..00000000 --- a/src/generators/llms-txt/utils/generateDocEntry.mjs +++ /dev/null @@ -1,39 +0,0 @@ -import { transformNodeToString } from '../../../utils/unist.mjs'; -import { ENTRY_IGNORE_LIST, LATEST_DOC_API_BASE_URL } from '../constants.mjs'; - -/** - * Generates a documentation entry string - * - * @param {ApiDocMetadataEntry} entry - * @returns {string} - */ -export function generateDocEntry(entry) { - if (ENTRY_IGNORE_LIST.includes(entry.api_doc_source)) { - return null; - } - - if (entry.heading.depth !== 1) { - return null; - } - - // Remove the leading /doc of string - const path = entry.api_doc_source.replace(/^doc\//, ''); - - const entryLink = `[${entry.heading.data.name}](${LATEST_DOC_API_BASE_URL}/${path})`; - - const descriptionNode = entry.content.children.find( - child => child.type === 'paragraph' - ); - - if (!descriptionNode) { - console.warn(`No description found for entry: ${entry.api_doc_source}`); - return `- ${entryLink}`; - } - - const description = transformNodeToString(descriptionNode).replace( - /[\r\n]+/g, - ' ' - ); - - return `- ${entryLink}: ${description}`; -} diff --git a/src/generators/llms-txt/utils/getIntroLinks.mjs b/src/generators/llms-txt/utils/getIntroLinks.mjs new file mode 100644 index 00000000..ecc501cc --- /dev/null +++ b/src/generators/llms-txt/utils/getIntroLinks.mjs @@ -0,0 +1,18 @@ +import { LATEST_DOC_API_BASE_URL } from '../constants.mjs'; + +/** + * Generates a list of introduction links for the llms.txt file + * + * @returns {string[]} + */ +export const getIntroLinks = () => { + const aboutDocUrl = new URL('/api/documentation.md', LATEST_DOC_API_BASE_URL); + const usageExamplesUrl = new URL('/api/synopsis.md', LATEST_DOC_API_BASE_URL); + + const introLinks = [ + `- [About this documentation](${aboutDocUrl})`, + `- [Usage and examples](${usageExamplesUrl})`, + ]; + + return introLinks; +}; From 646b826f4b2e3d0e19afbd112fb0089d37ac7c38 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Guilherme=20Ara=C3=BAjo?= Date: Wed, 30 Apr 2025 12:00:26 -0300 Subject: [PATCH 06/15] fix: doc api url path --- src/types.d.ts | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/types.d.ts b/src/types.d.ts index 7d513d43..b4c9bbf6 100644 --- a/src/types.d.ts +++ b/src/types.d.ts @@ -1,12 +1,12 @@ -import type { Heading, Root } from '@types/mdast'; +import type { Heading, Root } from 'mdast'; import type { Program } from 'acorn'; import type { SemVer } from 'semver'; import type { Data, Node, Parent, Position } from 'unist'; // Unist Node with typed Data, which allows better type inference -interface NodeWithData extends T { +type NodeWithData = T & { data: J; -} +}; declare global { export interface StabilityIndexMetadataEntry { From 419dcd6ac7e5b0035b72d65ca254381b09e6661a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Guilherme=20Ara=C3=BAjo?= Date: Wed, 30 Apr 2025 12:00:26 -0300 Subject: [PATCH 07/15] fix: doc api url path --- src/generators/llms-txt/utils/getIntroLinks.mjs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/generators/llms-txt/utils/getIntroLinks.mjs b/src/generators/llms-txt/utils/getIntroLinks.mjs index ecc501cc..338f7a79 100644 --- a/src/generators/llms-txt/utils/getIntroLinks.mjs +++ b/src/generators/llms-txt/utils/getIntroLinks.mjs @@ -1,4 +1,4 @@ -import { LATEST_DOC_API_BASE_URL } from '../constants.mjs'; +import { LATEST_DOC_API_BASE_URL } from '../../../constants.mjs'; /** * Generates a list of introduction links for the llms.txt file From ebc48281b23667b5e14dfe22c2dea7eaefce2b31 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Guilherme=20Ara=C3=BAjo?= Date: Thu, 1 May 2025 09:16:42 -0300 Subject: [PATCH 08/15] refactor: some changes --- src/generators/llms-txt/constants.mjs | 6 ------ src/generators/llms-txt/index.mjs | 21 ++++--------------- src/generators/llms-txt/template.txt | 3 +-- .../llms-txt/utils/getIntroLinks.mjs | 18 ---------------- src/types.d.ts | 4 ++-- 5 files changed, 7 insertions(+), 45 deletions(-) delete mode 100644 src/generators/llms-txt/constants.mjs delete mode 100644 src/generators/llms-txt/utils/getIntroLinks.mjs diff --git a/src/generators/llms-txt/constants.mjs b/src/generators/llms-txt/constants.mjs deleted file mode 100644 index 6af3dbbe..00000000 --- a/src/generators/llms-txt/constants.mjs +++ /dev/null @@ -1,6 +0,0 @@ -// These files are not part of the API documentation and are manually included -// in the llms.txt file -export const ENTRY_IGNORE_LIST = [ - 'doc/api/synopsis.md', - 'doc/api/documentation.md', -]; diff --git a/src/generators/llms-txt/index.mjs b/src/generators/llms-txt/index.mjs index 9132fa62..02899eb9 100644 --- a/src/generators/llms-txt/index.mjs +++ b/src/generators/llms-txt/index.mjs @@ -2,8 +2,6 @@ import { readFile, writeFile } from 'node:fs/promises'; import { join } from 'node:path'; import { buildApiDocLink } from './utils/buildApiDocLink.mjs'; -import { ENTRY_IGNORE_LIST } from './constants.mjs'; -import { getIntroLinks } from './utils/getIntroLinks.mjs'; /** * This generator generates a llms.txt file to provide information to LLMs at @@ -36,24 +34,13 @@ export default { 'utf-8' ); - const introLinks = getIntroLinks().join('\n'); - const apiDocsLinks = entries - .filter(entry => { - // Filter non top-level headings and ignored entries - return ( - entry.heading.depth === 1 || ENTRY_IGNORE_LIST.includes(entry.path) - ); - }) - .map(entry => { - const link = buildApiDocLink(entry); - return `- ${link}`; - }) + // Filter non top-level headings + .filter(entry => entry.heading.depth === 1) + .map(entry => `- ${buildApiDocLink(entry)}`) .join('\n'); - const filledTemplate = template - .replace('__INTRODUCTION__', introLinks) - .replace('__API_DOCS__', apiDocsLinks); + const filledTemplate = template.replace('__API_DOCS__', apiDocsLinks); if (output) { await writeFile(join(output, 'llms.txt'), filledTemplate); diff --git a/src/generators/llms-txt/template.txt b/src/generators/llms-txt/template.txt index 4106b300..20c71647 100644 --- a/src/generators/llms-txt/template.txt +++ b/src/generators/llms-txt/template.txt @@ -2,8 +2,7 @@ > Node.js is an open-source, cross-platform JavaScript runtime environment that executes JavaScript code outside a web browser. Node.js uses an event-driven, non-blocking I/O model that makes it lightweight and efficient for building scalable network applications. -## Introduction -__INTRODUCTION__ +Below are the sections of the API documentation. Look out especially towards the links that point towards guidance/introductioon to the structure of this documentation. ## API Documentations __API_DOCS__ diff --git a/src/generators/llms-txt/utils/getIntroLinks.mjs b/src/generators/llms-txt/utils/getIntroLinks.mjs deleted file mode 100644 index 338f7a79..00000000 --- a/src/generators/llms-txt/utils/getIntroLinks.mjs +++ /dev/null @@ -1,18 +0,0 @@ -import { LATEST_DOC_API_BASE_URL } from '../../../constants.mjs'; - -/** - * Generates a list of introduction links for the llms.txt file - * - * @returns {string[]} - */ -export const getIntroLinks = () => { - const aboutDocUrl = new URL('/api/documentation.md', LATEST_DOC_API_BASE_URL); - const usageExamplesUrl = new URL('/api/synopsis.md', LATEST_DOC_API_BASE_URL); - - const introLinks = [ - `- [About this documentation](${aboutDocUrl})`, - `- [Usage and examples](${usageExamplesUrl})`, - ]; - - return introLinks; -}; diff --git a/src/types.d.ts b/src/types.d.ts index b4c9bbf6..bc23007b 100644 --- a/src/types.d.ts +++ b/src/types.d.ts @@ -4,9 +4,9 @@ import type { SemVer } from 'semver'; import type { Data, Node, Parent, Position } from 'unist'; // Unist Node with typed Data, which allows better type inference -type NodeWithData = T & { +interface NodeWithData extends T { data: J; -}; +} declare global { export interface StabilityIndexMetadataEntry { From 28aaa10df0f38d35c6073f5622356a677a81fb40 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Guilherme=20Ara=C3=BAjo?= Date: Thu, 1 May 2025 13:25:08 -0300 Subject: [PATCH 09/15] feat: add llm_description prop --- .../llms-txt/utils/buildApiDocLink.mjs | 41 +++++++++++++------ src/metadata.mjs | 2 + src/types.d.ts | 3 ++ src/utils/parser/index.mjs | 3 +- 4 files changed, 36 insertions(+), 13 deletions(-) diff --git a/src/generators/llms-txt/utils/buildApiDocLink.mjs b/src/generators/llms-txt/utils/buildApiDocLink.mjs index 1030f12d..c2d1f290 100644 --- a/src/generators/llms-txt/utils/buildApiDocLink.mjs +++ b/src/generators/llms-txt/utils/buildApiDocLink.mjs @@ -1,6 +1,34 @@ import { LATEST_DOC_API_BASE_URL } from '../../../constants.mjs'; import { transformNodeToString } from '../../../utils/unist.mjs'; +/** + * Retrieves the description of a given API doc entry. It first checks whether + * the entry has a llm_description property. If not, it extracts the first + * paragraph from the entry's content. + * + * @param {ApiDocMetadataEntry} entry + * @returns {string} + */ +const getEntryDescription = entry => { + if (entry.llm_description) { + return entry.llm_description; + } + + const descriptionNode = entry.content.children.find( + child => child.type === 'paragraph' + ); + + if (!descriptionNode) { + return ''; + } + + return ( + transformNodeToString(descriptionNode) + // Remove newlines and extra spaces + .replace(/[\r\n]+/g, '') + ); +}; + /** * Builds a markdown link for an API doc entry * @@ -16,18 +44,7 @@ export const buildApiDocLink = entry => { const link = `[${title}](${url})`; - // Find the first paragraph in the content - const descriptionNode = entry.content.children.find( - child => child.type === 'paragraph' - ); - - if (!descriptionNode) { - return link; - } - - const description = transformNodeToString(descriptionNode) - // Remove newlines and extra spaces - .replace(/[\r\n]+/g, ' '); + const description = getEntryDescription(entry); return `${link}: ${description}`; }; diff --git a/src/metadata.mjs b/src/metadata.mjs index df954074..6b65df94 100644 --- a/src/metadata.mjs +++ b/src/metadata.mjs @@ -131,6 +131,7 @@ const createMetadata = slugger => { updates = [], changes = [], tags = [], + llm_description, } = internalMetadata.properties; // Also add the slug to the heading data as it is used to build the heading @@ -157,6 +158,7 @@ const createMetadata = slugger => { content: section, tags, introduced_in, + llm_description, yaml_position: internalMetadata.yaml_position, }; }, diff --git a/src/types.d.ts b/src/types.d.ts index bc23007b..6519e2fe 100644 --- a/src/types.d.ts +++ b/src/types.d.ts @@ -56,6 +56,7 @@ declare global { introduced_in?: string; napiVersion?: number; tags?: Array; + llm_description?: string; } export interface ApiDocMetadataEntry { @@ -90,6 +91,8 @@ declare global { // Extra YAML section entries that are stringd and serve // to provide additional metadata about the API doc entry tags: Array; + // The llms.txt specific description + llm_description: string | undefined; // The postion of the YAML of the API doc entry yaml_position: Position; } diff --git a/src/utils/parser/index.mjs b/src/utils/parser/index.mjs index a58ae14f..dedfa4b3 100644 --- a/src/utils/parser/index.mjs +++ b/src/utils/parser/index.mjs @@ -95,7 +95,8 @@ export const parseYAMLIntoMetadata = yamlString => { .replace('introduced_in=', 'introduced_in: ') .replace('source_link=', 'source_link: ') .replace('type=', 'type: ') - .replace('name=', 'name: '); + .replace('name=', 'name: ') + .replace('llm_description=', 'llm_description: '); // Ensures that the parsed YAML is an object, because even if it is not // i.e. a plain string or an array, it will simply not result into anything From f00fe97d494870cdab7ee35131b63c860dd50c49 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Guilherme=20Ara=C3=BAjo?= Date: Thu, 1 May 2025 13:31:29 -0300 Subject: [PATCH 10/15] test: add llm_description prop --- src/test/metadata.test.mjs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/test/metadata.test.mjs b/src/test/metadata.test.mjs index eff0f9f8..f1c86a3a 100644 --- a/src/test/metadata.test.mjs +++ b/src/test/metadata.test.mjs @@ -72,6 +72,7 @@ describe('createMetadata', () => { heading, n_api_version: undefined, introduced_in: undefined, + llm_description: undefined, removed_in: undefined, slug: 'test-heading', source_link: 'test.com', From 1bf37488fd6aeec5531e5e0383720a9e8d55328f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Guilherme=20Ara=C3=BAjo?= Date: Fri, 2 May 2025 16:06:52 -0300 Subject: [PATCH 11/15] refacotr: remove template replace --- src/generators/llms-txt/index.mjs | 2 +- src/generators/llms-txt/template.txt | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/src/generators/llms-txt/index.mjs b/src/generators/llms-txt/index.mjs index 02899eb9..997fb348 100644 --- a/src/generators/llms-txt/index.mjs +++ b/src/generators/llms-txt/index.mjs @@ -40,7 +40,7 @@ export default { .map(entry => `- ${buildApiDocLink(entry)}`) .join('\n'); - const filledTemplate = template.replace('__API_DOCS__', apiDocsLinks); + const filledTemplate = `${template}${apiDocsLinks}`; if (output) { await writeFile(join(output, 'llms.txt'), filledTemplate); diff --git a/src/generators/llms-txt/template.txt b/src/generators/llms-txt/template.txt index 20c71647..95d096a2 100644 --- a/src/generators/llms-txt/template.txt +++ b/src/generators/llms-txt/template.txt @@ -5,4 +5,3 @@ Below are the sections of the API documentation. Look out especially towards the links that point towards guidance/introductioon to the structure of this documentation. ## API Documentations -__API_DOCS__ From 9904ecb8ed92c0fb7ae9f17dfafda498cbcb1794 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Guilherme=20Ara=C3=BAjo?= Date: Fri, 2 May 2025 16:51:37 -0300 Subject: [PATCH 12/15] feat(linter): create llm description rule --- src/linter/constants.mjs | 2 ++ src/linter/rules/index.mjs | 2 ++ src/linter/rules/missing-llm-description.mjs | 34 ++++++++++++++++++++ 3 files changed, 38 insertions(+) create mode 100644 src/linter/rules/missing-llm-description.mjs diff --git a/src/linter/constants.mjs b/src/linter/constants.mjs index 95d29b50..85f2589a 100644 --- a/src/linter/constants.mjs +++ b/src/linter/constants.mjs @@ -5,4 +5,6 @@ export const LINT_MESSAGES = { missingChangeVersion: 'Missing version field in the API doc entry', invalidChangeVersion: 'Invalid version number: {{version}}', duplicateStabilityNode: 'Duplicate stability node', + missingLlmDescription: + 'Missing llm_description field or paragraph node in the API doc entry', }; diff --git a/src/linter/rules/index.mjs b/src/linter/rules/index.mjs index 52361afb..bb06a4a5 100644 --- a/src/linter/rules/index.mjs +++ b/src/linter/rules/index.mjs @@ -3,6 +3,7 @@ import { duplicateStabilityNodes } from './duplicate-stability-nodes.mjs'; import { invalidChangeVersion } from './invalid-change-version.mjs'; import { missingIntroducedIn } from './missing-introduced-in.mjs'; +import { missingLlmDescription } from './missing-llm-description.mjs'; /** * @type {Record} @@ -11,4 +12,5 @@ export default { 'duplicate-stability-nodes': duplicateStabilityNodes, 'invalid-change-version': invalidChangeVersion, 'missing-introduced-in': missingIntroducedIn, + 'missing-llm-description': missingLlmDescription, }; diff --git a/src/linter/rules/missing-llm-description.mjs b/src/linter/rules/missing-llm-description.mjs new file mode 100644 index 00000000..da7f810a --- /dev/null +++ b/src/linter/rules/missing-llm-description.mjs @@ -0,0 +1,34 @@ +import { LINT_MESSAGES } from '../constants.mjs'; + +/** + * Checks if a top-level entry is missing a llm_description field or a paragraph + * node. + * + * @param {ApiDocMetadataEntry[]} entries + * @returns {Array} + */ +export const missingLlmDescription = entries => { + const issues = []; + + for (const entry of entries) { + if (entry.heading.depth !== 1 || entry.llm_description) { + continue; + } + + const descriptionNode = entry.content.children.find( + child => child.type === 'paragraph' + ); + + if (!descriptionNode) { + issues.push({ + level: 'warn', + message: LINT_MESSAGES.missingLlmDescription, + location: { + path: entry.api_doc_source, + }, + }); + } + } + + return issues; +}; From ee63084901481044803a95f77490056b66d34ef0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Guilherme=20Ara=C3=BAjo?= Date: Sat, 3 May 2025 10:11:24 -0300 Subject: [PATCH 13/15] refactor(linter): remove for-of loop --- src/linter/rules/missing-llm-description.mjs | 51 ++++++++++++-------- 1 file changed, 32 insertions(+), 19 deletions(-) diff --git a/src/linter/rules/missing-llm-description.mjs b/src/linter/rules/missing-llm-description.mjs index da7f810a..18d5b1d5 100644 --- a/src/linter/rules/missing-llm-description.mjs +++ b/src/linter/rules/missing-llm-description.mjs @@ -8,27 +8,40 @@ import { LINT_MESSAGES } from '../constants.mjs'; * @returns {Array} */ export const missingLlmDescription = entries => { - const issues = []; + return entries + .filter(entry => { + // Only process top-level headings + if (entry.heading.depth !== 1) { + return false; + } - for (const entry of entries) { - if (entry.heading.depth !== 1 || entry.llm_description) { - continue; - } + // Skip entries that have an llm_description property + if (entry.llm_description !== undefined) { + return false; + } - const descriptionNode = entry.content.children.find( - child => child.type === 'paragraph' - ); + const hasParagraph = entry.content.children.some( + node => node.type === 'paragraph' + ); - if (!descriptionNode) { - issues.push({ - level: 'warn', - message: LINT_MESSAGES.missingLlmDescription, - location: { - path: entry.api_doc_source, - }, - }); - } - } + // Skip entries that contain a paragraph that can be used as a fallback. + if (hasParagraph) { + return false; + } - return issues; + return true; + }) + .map(entry => mapToMissingEntryWarning(entry)); }; + +/** + * Maps a entry to a warning for missing llm description. + * + * @param {ApiDocMetadataEntry} entry + * @returns {import('../types.d.ts').LintIssue} + */ +const mapToMissingEntryWarning = entry => ({ + level: 'warn', + message: LINT_MESSAGES.missingLlmDescription, + location: { path: entry.api_doc_source }, +}); From c8d86c77005c55bb2e5808f85c527bf512d1cf78 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Guilherme=20Ara=C3=BAjo?= Date: Sat, 3 May 2025 12:25:11 -0300 Subject: [PATCH 14/15] refactor(llms-txt): remove docs url suffix --- src/constants.mjs | 4 ++-- src/generators/llms-txt/utils/buildApiDocLink.mjs | 3 +-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/src/constants.mjs b/src/constants.mjs index 294710b5..96d08969 100644 --- a/src/constants.mjs +++ b/src/constants.mjs @@ -7,5 +7,5 @@ export const DOC_NODE_VERSION = process.version; export const DOC_NODE_CHANGELOG_URL = 'https://raw.githubusercontent.com/nodejs/node/HEAD/CHANGELOG.md'; -// The base URL for the latest Node.js documentation -export const LATEST_DOC_API_BASE_URL = 'https://nodejs.org/docs/latest/'; +// The base URL for the Node.js website +export const BASE_URL = 'https://nodejs.org/'; diff --git a/src/generators/llms-txt/utils/buildApiDocLink.mjs b/src/generators/llms-txt/utils/buildApiDocLink.mjs index c2d1f290..e02baf17 100644 --- a/src/generators/llms-txt/utils/buildApiDocLink.mjs +++ b/src/generators/llms-txt/utils/buildApiDocLink.mjs @@ -38,8 +38,7 @@ const getEntryDescription = entry => { export const buildApiDocLink = entry => { const title = entry.heading.data.name; - // Remove the leading doc/ from the path - const path = entry.api_doc_source.replace(/^doc\//, ''); + const path = entry.api_doc_source.replace(/^doc\//, '/docs/latest/'); const url = new URL(path, LATEST_DOC_API_BASE_URL); const link = `[${title}](${url})`; From b4dbc2d90001baa28e21426c6442d7d86d9c1388 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Guilherme=20Ara=C3=BAjo?= Date: Sat, 3 May 2025 12:26:44 -0300 Subject: [PATCH 15/15] fix: base url const --- src/generators/llms-txt/utils/buildApiDocLink.mjs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/generators/llms-txt/utils/buildApiDocLink.mjs b/src/generators/llms-txt/utils/buildApiDocLink.mjs index e02baf17..0752fcbe 100644 --- a/src/generators/llms-txt/utils/buildApiDocLink.mjs +++ b/src/generators/llms-txt/utils/buildApiDocLink.mjs @@ -1,4 +1,4 @@ -import { LATEST_DOC_API_BASE_URL } from '../../../constants.mjs'; +import { BASE_URL } from '../../../constants.mjs'; import { transformNodeToString } from '../../../utils/unist.mjs'; /** @@ -39,7 +39,7 @@ export const buildApiDocLink = entry => { const title = entry.heading.data.name; const path = entry.api_doc_source.replace(/^doc\//, '/docs/latest/'); - const url = new URL(path, LATEST_DOC_API_BASE_URL); + const url = new URL(path, BASE_URL); const link = `[${title}](${url})`;