diff --git a/src/constants.mjs b/src/constants.mjs index 040a8c10..96d08969 100644 --- a/src/constants.mjs +++ b/src/constants.mjs @@ -6,3 +6,6 @@ export const DOC_NODE_VERSION = process.version; // This is the Node.js CHANGELOG to be consumed to generate a list of all major Node.js versions export const DOC_NODE_CHANGELOG_URL = 'https://raw.githubusercontent.com/nodejs/node/HEAD/CHANGELOG.md'; + +// The base URL for the Node.js website +export const BASE_URL = 'https://nodejs.org/'; diff --git a/src/generators/index.mjs b/src/generators/index.mjs index f4bb744a..7a9db52b 100644 --- a/src/generators/index.mjs +++ b/src/generators/index.mjs @@ -10,6 +10,7 @@ import addonVerify from './addon-verify/index.mjs'; import apiLinks from './api-links/index.mjs'; import oramaDb from './orama-db/index.mjs'; import astJs from './ast-js/index.mjs'; +import llmsTxt from './llms-txt/index.mjs'; export const publicGenerators = { 'json-simple': jsonSimple, @@ -21,6 +22,7 @@ export const publicGenerators = { 'addon-verify': addonVerify, 'api-links': apiLinks, 'orama-db': oramaDb, + 'llms-txt': llmsTxt, }; export const allGenerators = { diff --git a/src/generators/llms-txt/index.mjs b/src/generators/llms-txt/index.mjs new file mode 100644 index 00000000..997fb348 --- /dev/null +++ b/src/generators/llms-txt/index.mjs @@ -0,0 +1,51 @@ +import { readFile, writeFile } from 'node:fs/promises'; +import { join } from 'node:path'; + +import { buildApiDocLink } from './utils/buildApiDocLink.mjs'; + +/** + * This generator generates a llms.txt file to provide information to LLMs at + * inference time + * + * @typedef {Array} Input + * + * @type {GeneratorMetadata} + */ +export default { + name: 'llms-txt', + + version: '1.0.0', + + description: + 'Generates a llms.txt file to provide information to LLMs at inference time', + + dependsOn: 'ast', + + /** + * Generates a llms.txt file + * + * @param {Input} entries + * @param {Partial} options + * @returns {Promise} + */ + async generate(entries, { output }) { + const template = await readFile( + join(import.meta.dirname, 'template.txt'), + 'utf-8' + ); + + const apiDocsLinks = entries + // Filter non top-level headings + .filter(entry => entry.heading.depth === 1) + .map(entry => `- ${buildApiDocLink(entry)}`) + .join('\n'); + + const filledTemplate = `${template}${apiDocsLinks}`; + + if (output) { + await writeFile(join(output, 'llms.txt'), filledTemplate); + } + + return filledTemplate; + }, +}; diff --git a/src/generators/llms-txt/template.txt b/src/generators/llms-txt/template.txt new file mode 100644 index 00000000..95d096a2 --- /dev/null +++ b/src/generators/llms-txt/template.txt @@ -0,0 +1,7 @@ +# Node.js Documentation + +> Node.js is an open-source, cross-platform JavaScript runtime environment that executes JavaScript code outside a web browser. Node.js uses an event-driven, non-blocking I/O model that makes it lightweight and efficient for building scalable network applications. + +Below are the sections of the API documentation. Look out especially towards the links that point towards guidance/introductioon to the structure of this documentation. + +## API Documentations diff --git a/src/generators/llms-txt/utils/buildApiDocLink.mjs b/src/generators/llms-txt/utils/buildApiDocLink.mjs new file mode 100644 index 00000000..0752fcbe --- /dev/null +++ b/src/generators/llms-txt/utils/buildApiDocLink.mjs @@ -0,0 +1,49 @@ +import { BASE_URL } from '../../../constants.mjs'; +import { transformNodeToString } from '../../../utils/unist.mjs'; + +/** + * Retrieves the description of a given API doc entry. It first checks whether + * the entry has a llm_description property. If not, it extracts the first + * paragraph from the entry's content. + * + * @param {ApiDocMetadataEntry} entry + * @returns {string} + */ +const getEntryDescription = entry => { + if (entry.llm_description) { + return entry.llm_description; + } + + const descriptionNode = entry.content.children.find( + child => child.type === 'paragraph' + ); + + if (!descriptionNode) { + return ''; + } + + return ( + transformNodeToString(descriptionNode) + // Remove newlines and extra spaces + .replace(/[\r\n]+/g, '') + ); +}; + +/** + * Builds a markdown link for an API doc entry + * + * @param {ApiDocMetadataEntry} entry + * @returns {string} + */ +export const buildApiDocLink = entry => { + const title = entry.heading.data.name; + + const path = entry.api_doc_source.replace(/^doc\//, '/docs/latest/'); + const url = new URL(path, BASE_URL); + + const link = `[${title}](${url})`; + + const description = getEntryDescription(entry); + + return `${link}: ${description}`; +}; diff --git a/src/linter/constants.mjs b/src/linter/constants.mjs index 95d29b50..85f2589a 100644 --- a/src/linter/constants.mjs +++ b/src/linter/constants.mjs @@ -5,4 +5,6 @@ export const LINT_MESSAGES = { missingChangeVersion: 'Missing version field in the API doc entry', invalidChangeVersion: 'Invalid version number: {{version}}', duplicateStabilityNode: 'Duplicate stability node', + missingLlmDescription: + 'Missing llm_description field or paragraph node in the API doc entry', }; diff --git a/src/linter/rules/index.mjs b/src/linter/rules/index.mjs index 52361afb..bb06a4a5 100644 --- a/src/linter/rules/index.mjs +++ b/src/linter/rules/index.mjs @@ -3,6 +3,7 @@ import { duplicateStabilityNodes } from './duplicate-stability-nodes.mjs'; import { invalidChangeVersion } from './invalid-change-version.mjs'; import { missingIntroducedIn } from './missing-introduced-in.mjs'; +import { missingLlmDescription } from './missing-llm-description.mjs'; /** * @type {Record} @@ -11,4 +12,5 @@ export default { 'duplicate-stability-nodes': duplicateStabilityNodes, 'invalid-change-version': invalidChangeVersion, 'missing-introduced-in': missingIntroducedIn, + 'missing-llm-description': missingLlmDescription, }; diff --git a/src/linter/rules/missing-llm-description.mjs b/src/linter/rules/missing-llm-description.mjs new file mode 100644 index 00000000..18d5b1d5 --- /dev/null +++ b/src/linter/rules/missing-llm-description.mjs @@ -0,0 +1,47 @@ +import { LINT_MESSAGES } from '../constants.mjs'; + +/** + * Checks if a top-level entry is missing a llm_description field or a paragraph + * node. + * + * @param {ApiDocMetadataEntry[]} entries + * @returns {Array} + */ +export const missingLlmDescription = entries => { + return entries + .filter(entry => { + // Only process top-level headings + if (entry.heading.depth !== 1) { + return false; + } + + // Skip entries that have an llm_description property + if (entry.llm_description !== undefined) { + return false; + } + + const hasParagraph = entry.content.children.some( + node => node.type === 'paragraph' + ); + + // Skip entries that contain a paragraph that can be used as a fallback. + if (hasParagraph) { + return false; + } + + return true; + }) + .map(entry => mapToMissingEntryWarning(entry)); +}; + +/** + * Maps a entry to a warning for missing llm description. + * + * @param {ApiDocMetadataEntry} entry + * @returns {import('../types.d.ts').LintIssue} + */ +const mapToMissingEntryWarning = entry => ({ + level: 'warn', + message: LINT_MESSAGES.missingLlmDescription, + location: { path: entry.api_doc_source }, +}); diff --git a/src/metadata.mjs b/src/metadata.mjs index df954074..6b65df94 100644 --- a/src/metadata.mjs +++ b/src/metadata.mjs @@ -131,6 +131,7 @@ const createMetadata = slugger => { updates = [], changes = [], tags = [], + llm_description, } = internalMetadata.properties; // Also add the slug to the heading data as it is used to build the heading @@ -157,6 +158,7 @@ const createMetadata = slugger => { content: section, tags, introduced_in, + llm_description, yaml_position: internalMetadata.yaml_position, }; }, diff --git a/src/test/metadata.test.mjs b/src/test/metadata.test.mjs index eff0f9f8..f1c86a3a 100644 --- a/src/test/metadata.test.mjs +++ b/src/test/metadata.test.mjs @@ -72,6 +72,7 @@ describe('createMetadata', () => { heading, n_api_version: undefined, introduced_in: undefined, + llm_description: undefined, removed_in: undefined, slug: 'test-heading', source_link: 'test.com', diff --git a/src/types.d.ts b/src/types.d.ts index 7d513d43..6519e2fe 100644 --- a/src/types.d.ts +++ b/src/types.d.ts @@ -1,4 +1,4 @@ -import type { Heading, Root } from '@types/mdast'; +import type { Heading, Root } from 'mdast'; import type { Program } from 'acorn'; import type { SemVer } from 'semver'; import type { Data, Node, Parent, Position } from 'unist'; @@ -56,6 +56,7 @@ declare global { introduced_in?: string; napiVersion?: number; tags?: Array; + llm_description?: string; } export interface ApiDocMetadataEntry { @@ -90,6 +91,8 @@ declare global { // Extra YAML section entries that are stringd and serve // to provide additional metadata about the API doc entry tags: Array; + // The llms.txt specific description + llm_description: string | undefined; // The postion of the YAML of the API doc entry yaml_position: Position; } diff --git a/src/utils/parser/index.mjs b/src/utils/parser/index.mjs index a58ae14f..dedfa4b3 100644 --- a/src/utils/parser/index.mjs +++ b/src/utils/parser/index.mjs @@ -95,7 +95,8 @@ export const parseYAMLIntoMetadata = yamlString => { .replace('introduced_in=', 'introduced_in: ') .replace('source_link=', 'source_link: ') .replace('type=', 'type: ') - .replace('name=', 'name: '); + .replace('name=', 'name: ') + .replace('llm_description=', 'llm_description: '); // Ensures that the parsed YAML is an object, because even if it is not // i.e. a plain string or an array, it will simply not result into anything diff --git a/src/utils/unist.mjs b/src/utils/unist.mjs index 32270bb3..c365beb5 100644 --- a/src/utils/unist.mjs +++ b/src/utils/unist.mjs @@ -2,6 +2,25 @@ import { pointEnd, pointStart } from 'unist-util-position'; +/** + * Extracts text content from a node recursively + * + * @param {import('unist').Node} node The Node to be transformed into a string + * @returns {string} The transformed Node as a string + */ +export const transformNodeToString = node => { + switch (node.type) { + case 'inlineCode': + return `\`${node.value}\``; + case 'strong': + return `**${transformNodesToString(node.children)}**`; + case 'emphasis': + return `_${transformNodesToString(node.children)}_`; + default: + return node.children ? transformNodesToString(node.children) : node.value; + } +}; + /** * This utility allows us to join children Nodes into one * and transfor them back to what their source would look like @@ -10,20 +29,7 @@ import { pointEnd, pointStart } from 'unist-util-position'; * @returns {string} The parsed and joined nodes as a string */ export const transformNodesToString = nodes => { - const mappedChildren = nodes.map(node => { - switch (node.type) { - case 'inlineCode': - return `\`${node.value}\``; - case 'strong': - return `**${transformNodesToString(node.children)}**`; - case 'emphasis': - return `_${transformNodesToString(node.children)}_`; - default: - return node.children - ? transformNodesToString(node.children) - : node.value; - } - }); + const mappedChildren = nodes.map(node => transformNodeToString(node)); return mappedChildren.join(''); };