From f7dd452a04fb6c94e61a6ccc61f407a92819ed85 Mon Sep 17 00:00:00 2001 From: Claudio Wunder Date: Sun, 7 Dec 2025 18:38:47 +0100 Subject: [PATCH 01/25] feat: chunked processing (streaming) --- bin/commands/generate.mjs | 2 +- src/generators.mjs | 37 ++- .../api-links/__tests__/fixtures.test.mjs | 11 +- src/generators/ast-js/index.mjs | 11 +- src/generators/jsx-ast/index.mjs | 45 +--- .../jsx-ast/utils/getSortedHeadNodes.mjs | 36 +++ src/generators/legacy-html-all/index.mjs | 150 +++++++----- src/generators/legacy-html/index.mjs | 45 ++-- .../utils/replaceTemplateValues.mjs | 53 +++++ src/generators/legacy-json-all/index.mjs | 78 ++++-- src/generators/legacy-json/index.mjs | 9 +- src/generators/metadata/index.mjs | 12 +- src/generators/web/index.mjs | 67 ++++-- src/streaming.mjs | 222 ++++++++++++++++++ src/threading/parallel.mjs | 95 +++++--- 15 files changed, 661 insertions(+), 212 deletions(-) create mode 100644 src/generators/jsx-ast/utils/getSortedHeadNodes.mjs create mode 100644 src/generators/legacy-html/utils/replaceTemplateValues.mjs create mode 100644 src/streaming.mjs diff --git a/bin/commands/generate.mjs b/bin/commands/generate.mjs index 5b9dbfd6..f3456bc9 100644 --- a/bin/commands/generate.mjs +++ b/bin/commands/generate.mjs @@ -79,7 +79,7 @@ export default { prompt: { type: 'text', message: 'Items per worker thread', - initialValue: '10', + initialValue: '20', }, }, version: { diff --git a/src/generators.mjs b/src/generators.mjs index 3d985b8f..ebd39757 100644 --- a/src/generators.mjs +++ b/src/generators.mjs @@ -1,6 +1,7 @@ 'use strict'; import { allGenerators } from './generators/index.mjs'; +import { isAsyncGenerator, createStreamingCache } from './streaming.mjs'; import WorkerPool from './threading/index.mjs'; import createParallelWorker from './threading/parallel.mjs'; @@ -26,12 +27,38 @@ const createGenerator = input => { /** * We store all the registered generators to be processed * within a Record, so we can access their results at any time whenever needed - * (we store the Promises of the generator outputs) + * (we store the Promises of the generator outputs, or AsyncGenerators for streaming) * * @type {{ [K in keyof AllGenerators]: ReturnType }} */ const cachedGenerators = { ast: Promise.resolve(input) }; + /** + * Cache for collected async generator results. + * When a streaming generator is first consumed, we collect all chunks + * and store the promise here so subsequent consumers share the same result. + */ + const streamingCache = createStreamingCache(); + + /** + * Gets the dependency input, handling both regular promises and async generators. + * For async generators, ensures only one collection happens and result is cached. + * @param {string} dependsOn - Name of the dependency generator + * @returns {Promise} + */ + const getDependencyInput = async dependsOn => { + // First, await the cached promise to get the actual result + const result = await cachedGenerators[dependsOn]; + + // Check if the result is an async generator (streaming) + if (isAsyncGenerator(result)) { + return streamingCache.getOrCollect(dependsOn, result); + } + + // Regular result - return it directly + return result; + }; + /** * Runs the Generator engine with the provided top-level input and the given generator options * @@ -57,18 +84,20 @@ const createGenerator = input => { // Ensure dependency is scheduled (but don't await its result yet) if (dependsOn && !(dependsOn in cachedGenerators)) { - await runGenerators({ ...options, generators: [dependsOn] }); + // Recursively schedule - don't await, just ensure it's in cachedGenerators + runGenerators({ ...options, generators: [dependsOn] }); } // Create a ParallelWorker for this generator + // The worker supports both batch (map) and streaming (stream) modes const worker = createParallelWorker(generatorName, chunkPool, options); /** * Schedule the generator - it awaits its dependency internally - * his allows multiple generators with the same dependency to run in parallel + * This allows multiple generators with the same dependency to run in parallel */ const scheduledGenerator = async () => { - const input = await cachedGenerators[dependsOn]; + const input = await getDependencyInput(dependsOn); return generate(input, { ...options, worker }); }; diff --git a/src/generators/api-links/__tests__/fixtures.test.mjs b/src/generators/api-links/__tests__/fixtures.test.mjs index fc6b204f..171fb560 100644 --- a/src/generators/api-links/__tests__/fixtures.test.mjs +++ b/src/generators/api-links/__tests__/fixtures.test.mjs @@ -26,12 +26,17 @@ describe('api links', () => { chunkSize: 10, }); - const astJsResult = await astJs.generate(undefined, { + // Collect results from the async generator + const astJsResults = []; + + for await (const chunk of astJs.generate(undefined, { input: [sourceFile], worker, - }); + })) { + astJsResults.push(...chunk); + } - const actualOutput = await apiLinks.generate(astJsResult, { + const actualOutput = await apiLinks.generate(astJsResults, { gitRef: 'https://github.com/nodejs/node/tree/HEAD', }); diff --git a/src/generators/ast-js/index.mjs b/src/generators/ast-js/index.mjs index 7967a5b3..ee144795 100644 --- a/src/generators/ast-js/index.mjs +++ b/src/generators/ast-js/index.mjs @@ -50,15 +50,20 @@ export default { }, /** - * @param {Input} _ + * @param {Input} i * @param {Partial} options + * @returns {AsyncGenerator>} */ - async generate(_, { input = [], worker }) { + async *generate(i, { input = [], worker }) { const sourceFiles = globSync(input).filter( filePath => extname(filePath) === '.js' ); + const deps = { input: sourceFiles }; + // Parse the Javascript sources into ASTs in parallel using worker threads - return worker.map(sourceFiles, _, { input: sourceFiles }); + for await (const chunkResult of worker.stream(sourceFiles, i, deps)) { + yield chunkResult; + } }, }; diff --git a/src/generators/jsx-ast/index.mjs b/src/generators/jsx-ast/index.mjs index 629646ea..5ccf8fc9 100644 --- a/src/generators/jsx-ast/index.mjs +++ b/src/generators/jsx-ast/index.mjs @@ -1,42 +1,9 @@ -import { OVERRIDDEN_POSITIONS } from './constants.mjs'; import { buildSideBarProps } from './utils/buildBarProps.mjs'; import buildContent from './utils/buildContent.mjs'; +import { getSortedHeadNodes } from './utils/getSortedHeadNodes.mjs'; import { groupNodesByModule } from '../../utils/generators.mjs'; import { getRemarkRecma } from '../../utils/remark.mjs'; -/** - * Sorts entries by OVERRIDDEN_POSITIONS and then heading name. - * @param {Array} entries - */ -const getSortedHeadNodes = entries => { - /** - * Sorts entries by OVERRIDDEN_POSITIONS and then heading name. - * @param {ApiDocMetadataEntry} a - * @param {ApiDocMetadataEntry} b - * @returns {number} - */ - const headingSortFn = (a, b) => { - const ai = OVERRIDDEN_POSITIONS.indexOf(a.api); - const bi = OVERRIDDEN_POSITIONS.indexOf(b.api); - - if (ai !== -1 && bi !== -1) { - return ai - bi; - } - - if (ai !== -1) { - return -1; - } - - if (bi !== -1) { - return 1; - } - - return a.heading.data.name.localeCompare(b.heading.data.name); - }; - - return entries.filter(node => node.heading.depth === 1).sort(headingSortFn); -}; - /** * Generator for converting MDAST to JSX AST. * @@ -97,11 +64,15 @@ export default { * * @param {Input} entries * @param {Partial} options - * @returns {Promise>} Array of generated content + * @returns {AsyncGenerator>} */ - async generate(entries, { index, releases, version, worker }) { + async *generate(entries, { index, releases, version, worker }) { const headNodes = entries.filter(node => node.heading.depth === 1); - return worker.map(headNodes, entries, { index, releases, version }); + const deps = { index, releases, version }; + + for await (const chunkResult of worker.stream(headNodes, entries, deps)) { + yield chunkResult; + } }, }; diff --git a/src/generators/jsx-ast/utils/getSortedHeadNodes.mjs b/src/generators/jsx-ast/utils/getSortedHeadNodes.mjs new file mode 100644 index 00000000..d8c015b0 --- /dev/null +++ b/src/generators/jsx-ast/utils/getSortedHeadNodes.mjs @@ -0,0 +1,36 @@ +'use strict'; + +import { OVERRIDDEN_POSITIONS } from '../constants.mjs'; + +/** + * Sorts entries by OVERRIDDEN_POSITIONS and then heading name. + * @param {ApiDocMetadataEntry} a + * @param {ApiDocMetadataEntry} b + * @returns {number} + */ +const headingSortFn = (a, b) => { + const ai = OVERRIDDEN_POSITIONS.indexOf(a.api); + const bi = OVERRIDDEN_POSITIONS.indexOf(b.api); + + if (ai !== -1 && bi !== -1) { + return ai - bi; + } + + if (ai !== -1) { + return -1; + } + + if (bi !== -1) { + return 1; + } + + return a.heading.data.name.localeCompare(b.heading.data.name); +}; + +/** + * Filters and sorts entries by OVERRIDDEN_POSITIONS and then heading name. + * @param {Array} entries + * @returns {Array} + */ +export const getSortedHeadNodes = entries => + entries.filter(node => node.heading.depth === 1).sort(headingSortFn); diff --git a/src/generators/legacy-html-all/index.mjs b/src/generators/legacy-html-all/index.mjs index 15d448a1..6c879e0e 100644 --- a/src/generators/legacy-html-all/index.mjs +++ b/src/generators/legacy-html-all/index.mjs @@ -6,7 +6,7 @@ import { join, resolve } from 'node:path'; import HTMLMinifier from '@minify-html/node'; import { getRemarkRehype } from '../../utils/remark.mjs'; -import dropdowns from '../legacy-html/utils/buildDropdowns.mjs'; +import { replaceTemplateValues } from '../legacy-html/utils/replaceTemplateValues.mjs'; import tableOfContents from '../legacy-html/utils/tableOfContents.mjs'; /** @@ -40,67 +40,109 @@ export default { dependsOn: 'legacy-html', + /** + * Process a chunk of template values from the dependency. + * Extracts toc and content from each entry for aggregation. + * @param {Input} fullInput + * @param {number[]} itemIndices + */ + processChunk(fullInput, itemIndices) { + const results = []; + + for (const idx of itemIndices) { + const entry = fullInput[idx]; + + // Skip the index entry + if (entry.api === 'index') { + continue; + } + + results.push({ + api: entry.api, + section: entry.section, + toc: entry.toc, + content: entry.content, + }); + } + + return results; + }, + /** * Generates the `all.html` file from the `legacy-html` generator * @param {Input} input * @param {Partial} options + * @returns {AsyncGenerator>} */ - async generate(input, { version, releases, output }) { - const inputWithoutIndex = input.filter(entry => entry.api !== 'index'); - - // Gets a Remark Processor that parses Markdown to minified HTML - const remarkWithRehype = getRemarkRehype(); - - // Current directory path relative to the `index.mjs` file - // from the `legacy-html` generator, as all the assets are there - const baseDir = resolve(import.meta.dirname, '..', 'legacy-html'); - - // Reads the API template.html file to be used as a base for the HTML files - const apiTemplate = await readFile(join(baseDir, 'template.html'), 'utf-8'); - - // Aggregates all individual Table of Contents into one giant string - const aggregatedToC = inputWithoutIndex.map(entry => entry.toc).join('\n'); - - // Aggregates all individual content into one giant string - const aggregatedContent = inputWithoutIndex - .map(entry => entry.content) - .join('\n'); - - // Creates a "mimic" of an `ApiDocMetadataEntry` which fulfils the requirements - // for generating the `tableOfContents` with the `tableOfContents.parseNavigationNode` parser - const sideNavigationFromValues = inputWithoutIndex.map(entry => ({ - api: entry.api, - heading: { data: { depth: 1, name: entry.section } }, - })); - - // Generates the global Table of Contents (Sidebar Navigation) - const parsedSideNav = remarkWithRehype.processSync( - tableOfContents(sideNavigationFromValues, { - maxDepth: 1, - parser: tableOfContents.parseNavigationNode, - }) - ); - - const generatedAllTemplate = apiTemplate - .replace('__ID__', 'all') - .replace(/__FILENAME__/g, 'all') - .replace('__SECTION__', 'All') - .replace(/__VERSION__/g, `v${version.version}`) - .replace(/__TOC__/g, tableOfContents.wrapToC(aggregatedToC)) - .replace(/__GTOC__/g, parsedSideNav) - .replace('__CONTENT__', aggregatedContent) - .replace(/__TOC_PICKER__/g, dropdowns.buildToC(aggregatedToC)) - .replace(/__GTOC_PICKER__/g, '') - .replace('__ALTDOCS__', dropdowns.buildVersions('all', '', releases)) - .replace('__EDIT_ON_GITHUB__', ''); - - // We minify the html result to reduce the file size and keep it "clean" - const minified = HTMLMinifier.minify(Buffer.from(generatedAllTemplate), {}); + async *generate(input, { version, releases, output, worker }) { + // Collect all chunks as they stream in + const allChunks = []; + for await (const chunkResult of worker.stream(input, input, {})) { + allChunks.push(...chunkResult); + + yield chunkResult; + } + + // After all chunks are collected, build and write the final file if (output) { + // Gets a Remark Processor that parses Markdown to minified HTML + const remarkWithRehype = getRemarkRehype(); + + // Current directory path relative to the `index.mjs` file + // from the `legacy-html` generator, as all the assets are there + const baseDir = resolve(import.meta.dirname, '..', 'legacy-html'); + + // Reads the API template.html file to be used as a base for the HTML files + const apiTemplate = await readFile( + join(baseDir, 'template.html'), + 'utf-8' + ); + + // Aggregates all individual Table of Contents into one giant string + const aggregatedToC = allChunks.map(entry => entry.toc).join('\n'); + + // Aggregates all individual content into one giant string + const aggregatedContent = allChunks + .map(entry => entry.content) + .join('\n'); + + // Creates a "mimic" of an `ApiDocMetadataEntry` which fulfils the requirements + // for generating the `tableOfContents` with the `tableOfContents.parseNavigationNode` parser + const sideNavigationFromValues = allChunks.map(entry => ({ + api: entry.api, + heading: { data: { depth: 1, name: entry.section } }, + })); + + // Generates the global Table of Contents (Sidebar Navigation) + const parsedSideNav = remarkWithRehype.processSync( + tableOfContents(sideNavigationFromValues, { + maxDepth: 1, + parser: tableOfContents.parseNavigationNode, + }) + ); + + const templateValues = { + api: 'all', + added: '', + section: 'All', + version: `v${version.version}`, + toc: aggregatedToC, + nav: String(parsedSideNav), + content: aggregatedContent, + }; + + const generatedAllTemplate = replaceTemplateValues( + apiTemplate, + templateValues, + releases, + { skipGitHub: true, skipGtocPicker: true } + ); + + // We minify the html result to reduce the file size and keep it "clean" + const minified = HTMLMinifier.minify(Buffer.from(generatedAllTemplate)); + await writeFile(join(output, 'all.html'), minified); } - - return minified; }, }; diff --git a/src/generators/legacy-html/index.mjs b/src/generators/legacy-html/index.mjs index 525c7978..a3098ff4 100644 --- a/src/generators/legacy-html/index.mjs +++ b/src/generators/legacy-html/index.mjs @@ -6,7 +6,7 @@ import { join } from 'node:path'; import HTMLMinifier from '@minify-html/node'; import buildContent from './utils/buildContent.mjs'; -import dropdowns from './utils/buildDropdowns.mjs'; +import { replaceTemplateValues } from './utils/replaceTemplateValues.mjs'; import { safeCopy } from './utils/safeCopy.mjs'; import tableOfContents from './utils/tableOfContents.mjs'; import { groupNodesByModule } from '../../utils/generators.mjs'; @@ -68,28 +68,6 @@ export default { .filter(node => node.heading.depth === 1) .sort((a, b) => a.heading.data.name.localeCompare(b.heading.data.name)); - /** - * Replaces the template values in the API template with the given values. - * @param {TemplateValues} values - The values to replace the template values with - * @returns {string} The replaced template values - */ - const replaceTemplateValues = values => { - const { api, added, section, version, toc, nav, content } = values; - - return apiTemplate - .replace('__ID__', api) - .replace(/__FILENAME__/g, api) - .replace('__SECTION__', section) - .replace(/__VERSION__/g, version) - .replace(/__TOC__/g, tableOfContents.wrapToC(toc)) - .replace(/__GTOC__/g, nav) - .replace('__CONTENT__', content) - .replace(/__TOC_PICKER__/g, dropdowns.buildToC(toc)) - .replace(/__GTOC_PICKER__/g, dropdowns.buildNavigation(nav)) - .replace('__ALTDOCS__', dropdowns.buildVersions(api, added, releases)) - .replace('__EDIT_ON_GITHUB__', dropdowns.buildGitHub(api)); - }; - const results = []; for (const idx of itemIndices) { @@ -116,7 +94,7 @@ export default { const apiAsHeading = head.api.charAt(0).toUpperCase() + head.api.slice(1); - const generatedTemplate = { + const template = { api: head.api, added: head.introduced_in ?? '', section: head.heading.data.name || apiAsHeading, @@ -128,13 +106,14 @@ export default { if (output) { // We minify the html result to reduce the file size and keep it "clean" - const result = replaceTemplateValues(generatedTemplate); + const result = replaceTemplateValues(apiTemplate, template, releases); + const minified = HTMLMinifier.minify(Buffer.from(result), {}); await writeFile(join(output, `${head.api}.html`), minified); } - results.push(generatedTemplate); + results.push(template); } return results; @@ -144,8 +123,9 @@ export default { * Generates the legacy version of the API docs in HTML * @param {Input} input * @param {Partial} options + * @returns {AsyncGenerator>} */ - async generate(input, { index, releases, version, output, worker }) { + async *generate(input, { index, releases, version, output, worker }) { const remarkRehypeProcessor = getRemarkRehypeWithShiki(); const baseDir = import.meta.dirname; @@ -167,14 +147,19 @@ export default { }) ); - const generatedValues = await worker.map(headNodes, input, { + const deps = { index, releases, version, output, apiTemplate, parsedSideNav: String(parsedSideNav), - }); + }; + + // Stream chunks as they complete - HTML files are written immediately + for await (const chunkResult of worker.stream(headNodes, input, deps)) { + yield chunkResult; + } if (output) { // Define the source folder for API docs assets @@ -189,7 +174,5 @@ export default { // Copy all files from assets folder to output, skipping unchanged files await safeCopy(srcAssets, assetsFolder); } - - return generatedValues; }, }; diff --git a/src/generators/legacy-html/utils/replaceTemplateValues.mjs b/src/generators/legacy-html/utils/replaceTemplateValues.mjs new file mode 100644 index 00000000..3eabbe73 --- /dev/null +++ b/src/generators/legacy-html/utils/replaceTemplateValues.mjs @@ -0,0 +1,53 @@ +'use strict'; + +import dropdowns from './buildDropdowns.mjs'; +import tableOfContents from './tableOfContents.mjs'; + +/** + * @typedef {{ + * api: string; + * added: string; + * section: string; + * version: string; + * toc: string; + * nav: string; + * content: string; + * }} TemplateValues + */ + +/** + * Replaces the template values in the API template with the given values. + * @param {string} apiTemplate - The HTML template string + * @param {TemplateValues} values - The values to replace the template values with + * @param {Array} releases - The releases array for version dropdown + * @param {{ skipGitHub?: boolean; skipGtocPicker?: boolean }} [options] - Optional settings + * @returns {string} The replaced template values + */ +export const replaceTemplateValues = ( + apiTemplate, + values, + releases, + options = {} +) => { + const { api, added, section, version, toc, nav, content } = values; + const { skipGitHub = false, skipGtocPicker = false } = options; + + return apiTemplate + .replace('__ID__', api) + .replace(/__FILENAME__/g, api) + .replace('__SECTION__', section) + .replace(/__VERSION__/g, version) + .replace(/__TOC__/g, tableOfContents.wrapToC(toc)) + .replace(/__GTOC__/g, nav) + .replace('__CONTENT__', content) + .replace(/__TOC_PICKER__/g, dropdowns.buildToC(toc)) + .replace( + /__GTOC_PICKER__/g, + skipGtocPicker ? '' : dropdowns.buildNavigation(nav) + ) + .replace('__ALTDOCS__', dropdowns.buildVersions(api, added, releases)) + .replace( + '__EDIT_ON_GITHUB__', + skipGitHub ? '' : dropdowns.buildGitHub(api) + ); +}; diff --git a/src/generators/legacy-json-all/index.mjs b/src/generators/legacy-json-all/index.mjs index 5fb8c061..d6838eda 100644 --- a/src/generators/legacy-json-all/index.mjs +++ b/src/generators/legacy-json-all/index.mjs @@ -3,6 +3,14 @@ import { writeFile } from 'node:fs/promises'; import { join } from 'node:path'; +const PROPERTIES_TO_COPY = [ + 'miscs', + 'modules', + 'classes', + 'globals', + 'methods', +]; + /** * This generator consolidates data from the `legacy-json` generator into a single * JSON file (`all.json`). @@ -21,13 +29,50 @@ export default { dependsOn: 'legacy-json', + /** + * Process a chunk of sections from the dependency. + * Extracts and enriches relevant properties for aggregation. + * @param {Input} fullInput + * @param {number[]} itemIndices + */ + processChunk(fullInput, itemIndices) { + /** @type {import('./types.d.ts').Output} */ + const chunkResult = { + miscs: [], + modules: [], + classes: [], + globals: [], + methods: [], + }; + + for (const idx of itemIndices) { + const section = fullInput[idx]; + + // Copy the relevant properties from each section into our chunk result + for (const property of PROPERTIES_TO_COPY) { + const items = section[property]; + + if (Array.isArray(items)) { + const enrichedItems = section.source + ? items.map(item => ({ ...item, source: section.source })) + : items; + + chunkResult[property].push(...enrichedItems); + } + } + } + + return chunkResult; + }, + /** * Generates the legacy JSON `all.json` file. * * @param {Input} input * @param {Partial} options + * @returns {AsyncGenerator} */ - async generate(input, { output }) { + async *generate(input, { output, worker }) { /** * The consolidated output object that will contain * combined data from all sections in the input. @@ -42,33 +87,18 @@ export default { methods: [], }; - const propertiesToCopy = [ - 'miscs', - 'modules', - 'classes', - 'globals', - 'methods', - ]; - - input.forEach(section => { - // Copy the relevant properties from each section into our output - propertiesToCopy.forEach(property => { - const items = section[property]; - - if (Array.isArray(items)) { - const enrichedItems = section.source - ? items.map(item => ({ ...item, source: section.source })) - : items; + // Stream chunks as they complete and aggregate results + for await (const chunkResult of worker.stream(input, input, {})) { + // Merge chunk result into generatedValue + for (const property of PROPERTIES_TO_COPY) { + generatedValue[property].push(...chunkResult[property]); + } - generatedValue[property].push(...enrichedItems); - } - }); - }); + yield chunkResult; + } if (output) { await writeFile(join(output, 'all.json'), JSON.stringify(generatedValue)); } - - return generatedValue; }, }; diff --git a/src/generators/legacy-json/index.mjs b/src/generators/legacy-json/index.mjs index 9d468760..72aef46c 100644 --- a/src/generators/legacy-json/index.mjs +++ b/src/generators/legacy-json/index.mjs @@ -65,10 +65,15 @@ export default { * * @param {Input} input * @param {Partial} options + * @returns {AsyncGenerator>} */ - async generate(input, { output, worker }) { + async *generate(input, { output, worker }) { const headNodes = input.filter(node => node.heading.depth === 1); - return worker.map(headNodes, input, { output }); + const deps = { output }; + + for await (const chunkResult of worker.stream(headNodes, input, deps)) { + yield chunkResult; + } }, }; diff --git a/src/generators/metadata/index.mjs b/src/generators/metadata/index.mjs index 750dd82c..3949f3a9 100644 --- a/src/generators/metadata/index.mjs +++ b/src/generators/metadata/index.mjs @@ -39,11 +39,15 @@ export default { /** * @param {Input} inputs * @param {GeneratorOptions} options - * @returns {Promise>} + * @returns {AsyncGenerator>} */ - async generate(inputs, { typeMap, worker }) { - const results = await worker.map(inputs, inputs, { typeMap }); + async *generate(inputs, { typeMap, worker }) { + const deps = { typeMap }; - return results.flat(); + // Stream chunks as they complete - allows dependent generators + // to start collecting/preparing while we're still processing + for await (const chunkResult of worker.stream(inputs, inputs, deps)) { + yield chunkResult.flat(); + } }, }; diff --git a/src/generators/web/index.mjs b/src/generators/web/index.mjs index 71f2f015..2f072381 100644 --- a/src/generators/web/index.mjs +++ b/src/generators/web/index.mjs @@ -21,6 +21,24 @@ export default { description: 'Generates HTML/CSS/JS bundles from JSX AST entries', dependsOn: 'jsx-ast', + /** + * Process a chunk of JSX AST entries. + * This simply passes through the entries for aggregation in the main generate function. + * The actual processing happens in processJSXEntries which needs all entries together. + * + * @param {import('../jsx-ast/utils/buildContent.mjs').JSXContent[]} fullInput + * @param {number[]} itemIndices + */ + processChunk(fullInput, itemIndices) { + const results = []; + + for (const idx of itemIndices) { + results.push(fullInput[idx]); + } + + return results; + }, + /** * Main generation function that processes JSX AST entries into web bundles. * @@ -28,32 +46,44 @@ export default { * @param {Partial} options - Generator options. * @param {string} [options.output] - Output directory for generated files. * @param {string} options.version - Documentation version string. - * @returns {Promise>} Generated HTML and CSS. + * @returns {AsyncGenerator>} */ - async generate(entries, { output, version }) { - // Load the HTML template with placeholders - const template = await readFile( + async *generate(entries, { output, version, worker }) { + // Start loading template while chunks stream in (parallel I/O) + const templatePromise = readFile( new URL('template.html', import.meta.url), 'utf-8' ); - // Create AST builders for server and client programs - const astBuilders = createASTBuilder(); + // Collect all chunks as they stream in from jsx-ast + const allEntries = []; - // Create require function for resolving external packages in server code - const requireFn = createRequire(import.meta.url); + for await (const chunkResult of worker.stream(entries, entries, {})) { + allEntries.push(...chunkResult); - // Process all entries: convert JSX to HTML/CSS/JS - const { results, css, chunks } = await processJSXEntries( - entries, - template, - astBuilders, - requireFn, - { version } - ); + yield chunkResult; + } - // Write files to disk if output directory is specified + // Now that all chunks are collected, process them together + // (processJSXEntries needs all entries to generate code-split bundles) if (output) { + const template = await templatePromise; + + // Create AST builders for server and client programs + const astBuilders = createASTBuilder(); + + // Create require function for resolving external packages in server code + const requireFn = createRequire(import.meta.url); + + // Process all entries: convert JSX to HTML/CSS/JS + const { results, css, chunks } = await processJSXEntries( + allEntries, + template, + astBuilders, + requireFn, + { version } + ); + // Write HTML files for (const { html, api } of results) { await writeFile(join(output, `${api}.html`), html, 'utf-8'); @@ -67,8 +97,5 @@ export default { // Write CSS bundle await writeFile(join(output, 'styles.css'), css, 'utf-8'); } - - // Return HTML and CSS for each entry - return results.map(({ html }) => ({ html, css })); }, }; diff --git a/src/streaming.mjs b/src/streaming.mjs new file mode 100644 index 00000000..8548ae46 --- /dev/null +++ b/src/streaming.mjs @@ -0,0 +1,222 @@ +'use strict'; + +/** + * Streaming utilities for processing chunks asynchronously. + * Provides a unified interface for both parallel (worker threads) and + * single-threaded processing modes. + */ + +/** + * Helper to check if a value is an async generator/iterable + * @param {any} obj + * @returns {boolean} + */ +export const isAsyncGenerator = obj => + obj && typeof obj[Symbol.asyncIterator] === 'function'; + +/** + * Collects all values from an async generator into a flat array. + * Each yielded chunk is spread into the results array. + * @template T + * @param {AsyncGenerator} generator - Async generator yielding arrays + * @returns {Promise} - Flattened array of all yielded items + */ +export const collectAsyncGenerator = async generator => { + const results = []; + + for await (const chunk of generator) { + results.push(...chunk); + } + + return results; +}; + +/** + * Splits a count of items into chunks of specified size. + * Returns arrays of indices for each chunk. + * @param {number} count - Total number of items + * @param {number} size - Maximum items per chunk + * @returns {number[][]} Array of index arrays + */ +export const createIndexChunks = (count, size) => { + const chunks = []; + + for (let i = 0; i < count; i += size) { + const end = Math.min(i + size, count); + const chunk = []; + + for (let j = i; j < end; j++) { + chunk.push(j); + } + + chunks.push(chunk); + } + + return chunks; +}; + +/** + * Creates an array of indices from 0 to count-1 + * @param {number} count - Number of indices to create + * @returns {number[]} Array of indices + */ +export const createIndices = count => { + const indices = []; + + for (let i = 0; i < count; i++) { + indices.push(i); + } + + return indices; +}; + +/** + * Yields results from an array of promises as they complete. + * Uses Promise.race pattern to yield in completion order, not input order. + * @template T + * @param {Promise[]} promises - Array of promises to race + * @yields {T} - Results as they complete + */ +export async function* yieldAsCompleted(promises) { + const pending = new Map(promises.map((p, i) => [i, p])); + + while (pending.size > 0) { + const entries = [...pending.entries()]; + + const racingPromises = entries.map(([idx, promise]) => + promise.then(result => ({ idx, result })) + ); + + const { idx, result } = await Promise.race(racingPromises); + + pending.delete(idx); + + yield result; + } +} + +/** + * Creates a streaming processor that can run in either parallel or single-threaded mode. + * Provides a unified interface for generators to process chunks. + * + * @param {object} config - Configuration object + * @param {Function} config.processChunk - Function to process a chunk: (fullInput, indices, options) => Promise + * @param {number} config.chunkSize - Number of items per chunk + * @param {boolean} config.parallel - Whether to use parallel processing + * @param {Function} [config.runInWorker] - Function to run chunk in worker: (indices, fullInput, options) => Promise + */ +export const createStreamingProcessor = ({ + processChunk, + chunkSize, + parallel, + runInWorker, +}) => ({ + /** + * Process all items and return results as a single array. + * @template T, R + * @param {T[]} items - Items to process + * @param {T[]} fullInput - Full input for context + * @param {object} extra - Extra options for processChunk + * @returns {Promise} + */ + async map(items, fullInput, extra) { + const itemCount = items.length; + + if (itemCount === 0) { + return []; + } + + // Single chunk - process directly + if (!parallel || itemCount <= chunkSize) { + const indices = createIndices(itemCount); + + return processChunk(fullInput, indices, extra); + } + + // Multiple chunks - process in parallel + const indexChunks = createIndexChunks(itemCount, chunkSize); + + const chunkResults = await Promise.all( + indexChunks.map(indices => runInWorker(indices, fullInput, extra)) + ); + + return chunkResults.flat(); + }, + + /** + * Process items and yield results as each chunk completes. + * @template T, R + * @param {T[]} items - Items to process + * @param {T[]} fullInput - Full input for context + * @param {object} extra - Extra options for processChunk + * @yields {R[]} - Each chunk's results as they complete + */ + async *stream(items, fullInput, extra) { + const itemCount = items.length; + + if (itemCount === 0) { + return; + } + + // Single chunk - yield directly + if (!parallel || itemCount <= chunkSize) { + const indices = createIndices(itemCount); + const result = await processChunk(fullInput, indices, extra); + + yield result; + + return; + } + + // Multiple chunks - yield as they complete + const indexChunks = createIndexChunks(itemCount, chunkSize); + + const chunkPromises = indexChunks.map(indices => + runInWorker(indices, fullInput, extra) + ); + + yield* yieldAsCompleted(chunkPromises); + }, +}); + +/** + * Creates a cache system for collecting async generator results. + * Ensures that when multiple consumers request the same async generator, + * only one collection happens and all consumers share the result. + */ +export const createStreamingCache = () => { + /** @type {Map>} */ + const cache = new Map(); + + return { + /** + * Get the collected result for a generator, starting collection if needed. + * @param {string} key - Cache key (usually generator name) + * @param {AsyncGenerator} generator - The async generator to collect + * @returns {Promise} - Promise resolving to collected results + */ + getOrCollect(key, generator) { + if (!cache.has(key)) { + cache.set(key, collectAsyncGenerator(generator)); + } + + return cache.get(key); + }, + + /** + * Check if a key exists in the cache + * @param {string} key + * @returns {boolean} + */ + has(key) { + return cache.has(key); + }, + + /** + * Clear the cache + */ + clear() { + cache.clear(); + }, + }; +}; diff --git a/src/threading/parallel.mjs b/src/threading/parallel.mjs index 25333cb8..818a825f 100644 --- a/src/threading/parallel.mjs +++ b/src/threading/parallel.mjs @@ -1,6 +1,11 @@ 'use strict'; import { allGenerators } from '../generators/index.mjs'; +import { + createIndexChunks, + createIndices, + yieldAsCompleted, +} from '../streaming.mjs'; /** * Creates a ParallelWorker that uses real Node.js Worker threads @@ -16,30 +21,6 @@ export default function createParallelWorker(generatorName, pool, options) { const generator = allGenerators[generatorName]; - /** - * Splits items into chunks of specified size. - * @param {number} count - Number of items - * @param {number} size - Items per chunk - * @returns {number[][]} Array of index arrays - */ - const createIndexChunks = (count, size) => { - const chunks = []; - - for (let i = 0; i < count; i += size) { - const end = Math.min(i + size, count); - - const chunk = []; - - for (let j = i; j < end; j++) { - chunk.push(j); - } - - chunks.push(chunk); - } - - return chunks; - }; - /** * Strips non-serializable properties from options for worker transfer * @param {object} extra - Extra options to merge @@ -56,6 +37,7 @@ export default function createParallelWorker(generatorName, pool, options) { /** * Process items in parallel using real worker threads. * Items are split into chunks, each chunk processed by a separate worker. + * Waits for all chunks to complete before returning. * * @template T, R * @param {T[]} items - Items to process (must be serializable) @@ -78,11 +60,7 @@ export default function createParallelWorker(generatorName, pool, options) { // For single thread or small workloads - run in main thread if (threads <= 1 || itemCount <= 2) { - const indices = []; - - for (let i = 0; i < itemCount; i++) { - indices.push(i); - } + const indices = createIndices(itemCount); return generator.processChunk(fullInput, indices, { ...options, @@ -106,5 +84,64 @@ export default function createParallelWorker(generatorName, pool, options) { // Flatten results return chunkResults.flat(); }, + + /** + * Process items in parallel and yield each chunk's results as they complete. + * This enables pipeline parallelism: downstream generators can start processing + * chunk results while upstream chunks are still being processed. + * + * Use this when the consuming generator also supports chunking, allowing it + * to begin work immediately on each completed chunk rather than waiting + * for all chunks to finish. + * + * @template T, R + * @param {T[]} items - Items to process (must be serializable) + * @param {T[]} fullInput - Full input data for context rebuilding in workers + * @param {object} extra - Generator-specific context + * @yields {R[]} - Each chunk's results as they complete + */ + async *stream(items, fullInput, extra) { + const itemCount = items.length; + + if (itemCount === 0) { + return; + } + + if (!generator.processChunk) { + throw new Error( + `Generator "${generatorName}" does not support chunk processing` + ); + } + + // For single thread or small workloads - yield single result + if (threads <= 1 || itemCount <= 2) { + const indices = createIndices(itemCount); + + const result = await generator.processChunk(fullInput, indices, { + ...options, + ...extra, + }); + + yield result; + + return; + } + + // Divide items into chunks based on chunkSize + const indexChunks = createIndexChunks(itemCount, chunkSize); + + // Create all chunk promises upfront for parallel execution + const chunkPromises = indexChunks.map(indices => + pool.run({ + generatorName, + fullInput, + itemIndices: indices, + options: serializeOptions(extra), + }) + ); + + // Yield chunks as they complete + yield* yieldAsCompleted(chunkPromises); + }, }; } From b5a647be702cb9011ca3509833b9850494f3776f Mon Sep 17 00:00:00 2001 From: Claudio Wunder Date: Mon, 8 Dec 2025 19:10:29 +0100 Subject: [PATCH 02/25] feat: properly implement streaming, worker chunking and proper worker thread scheduler --- bin/cli.mjs | 17 ++ bin/commands/generate.mjs | 14 +- src/__tests__/generators.test.mjs | 137 ++++++++++ src/__tests__/streaming.test.mjs | 232 ++++++++++++++++ src/constants.mjs | 4 +- src/generators.mjs | 152 +++++++---- src/generators/api-links/index.mjs | 7 +- src/generators/ast-js/index.mjs | 15 +- src/generators/jsx-ast/index.mjs | 9 +- src/generators/legacy-html-all/index.mjs | 148 ++++------ src/generators/legacy-html/index.mjs | 50 ++-- src/generators/legacy-json-all/index.mjs | 59 ++-- src/generators/legacy-json/index.mjs | 32 ++- src/generators/legacy-json/types.d.ts | 5 + .../legacy-json/utils/buildSection.mjs | 6 +- src/generators/man-page/index.mjs | 3 + src/generators/metadata/index.mjs | 7 +- src/generators/metadata/utils/parse.mjs | 8 +- src/generators/types.d.ts | 18 +- src/logger/__tests__/logger.test.mjs | 86 ++++++ src/logger/index.mjs | 3 + src/logger/logger.mjs | 60 ++++- src/logger/transports/console.mjs | 10 +- src/streaming.mjs | 213 ++++----------- src/threading/__tests__/WorkerPool.test.mjs | 175 ++++++++++-- src/threading/__tests__/parallel.test.mjs | 194 ++++++++++++- src/threading/chunk-worker.mjs | 38 ++- src/threading/index.mjs | 255 +++++++++++++----- src/threading/parallel.mjs | 195 ++++++++------ 29 files changed, 1519 insertions(+), 633 deletions(-) create mode 100644 src/__tests__/generators.test.mjs create mode 100644 src/__tests__/streaming.test.mjs diff --git a/bin/cli.mjs b/bin/cli.mjs index 05533cde..70bc2727 100755 --- a/bin/cli.mjs +++ b/bin/cli.mjs @@ -7,11 +7,28 @@ import { Command, Option } from 'commander'; import commands from './commands/index.mjs'; import interactive from './commands/interactive.mjs'; import { errorWrap } from './utils.mjs'; +import logger from '../src/logger/index.mjs'; const program = new Command() .name('@nodejs/doc-kit') .description('CLI tool to generate the Node.js API documentation'); +// Add global log level option +program.addOption( + new Option('--log-level ', 'Log level') + .choices(['debug', 'info', 'warn', 'error', 'fatal']) + .default('info') +); + +// Set log level before any command runs +program.hook('preAction', thisCommand => { + const logLevel = thisCommand.opts().logLevel; + + if (logLevel) { + logger.setLogLevel(logLevel); + } +}); + // Registering commands commands.forEach(({ name, description, options, action }) => { const cmd = program.command(name).description(description); diff --git a/bin/commands/generate.mjs b/bin/commands/generate.mjs index f3456bc9..535bfc87 100644 --- a/bin/commands/generate.mjs +++ b/bin/commands/generate.mjs @@ -3,10 +3,7 @@ import { resolve } from 'node:path'; import { coerce } from 'semver'; -import { - DOC_NODE_CHANGELOG_URL, - DOC_NODE_VERSION, -} from '../../src/constants.mjs'; +import { NODE_CHANGELOG_URL, NODE_VERSION } from '../../src/constants.mjs'; import { publicGenerators } from '../../src/generators/index.mjs'; import createGenerator from '../../src/generators.mjs'; import { parseChangelog, parseIndex } from '../../src/parsers/markdown.mjs'; @@ -18,7 +15,10 @@ const availableGenerators = Object.keys(publicGenerators); // Half of available logical CPUs guarantees in general all physical CPUs are being used // which in most scenarios is the best way to maximize performance -const optimalThreads = Math.floor(cpus().length / 2) + 1; +// When spawning more than a said number of threads, the overhead of context switching +// and CPU contention starts to degrade performance rather than improve it. +// Therefore, we set the optimal threads to half the number of CPU cores, with a minimum of 6. +const optimalThreads = Math.min(Math.floor(cpus().length / 2), 6); /** * @typedef {Object} Options @@ -88,7 +88,7 @@ export default { prompt: { type: 'text', message: 'Enter Node.js version', - initialValue: DOC_NODE_VERSION, + initialValue: NODE_VERSION, }, }, changelog: { @@ -97,7 +97,7 @@ export default { prompt: { type: 'text', message: 'Enter changelog URL', - initialValue: DOC_NODE_CHANGELOG_URL, + initialValue: NODE_CHANGELOG_URL, }, }, gitRef: { diff --git a/src/__tests__/generators.test.mjs b/src/__tests__/generators.test.mjs new file mode 100644 index 00000000..9b599ca1 --- /dev/null +++ b/src/__tests__/generators.test.mjs @@ -0,0 +1,137 @@ +import { ok, strictEqual } from 'node:assert'; +import { describe, it } from 'node:test'; + +import createGenerator from '../generators.mjs'; +import { isAsyncGenerator } from '../streaming.mjs'; + +describe('createGenerator', () => { + // Simple mock input for testing + const mockInput = [ + { + file: { stem: 'test', basename: 'test.md' }, + tree: { type: 'root', children: [] }, + }, + ]; + + // Mock options with minimal required fields + const mockOptions = { + input: '/tmp/test', + output: '/tmp/output', + generators: ['metadata'], + version: { major: 22, minor: 0, patch: 0 }, + releases: [], + index: [], + gitRef: 'https://github.com/nodejs/node/tree/HEAD', + threads: 1, + chunkSize: 20, + typeMap: {}, + }; + + it('should create a generator orchestrator with runGenerators method', () => { + const { runGenerators } = createGenerator(mockInput); + + ok(runGenerators); + strictEqual(typeof runGenerators, 'function'); + }); + + it('should return the ast input directly when generators list is empty', async () => { + const { runGenerators } = createGenerator(mockInput); + + const result = await runGenerators({ + ...mockOptions, + generators: ['ast'], + }); + + // The 'ast' key should resolve to the original input + ok(result); + }); + + it('should run metadata generator', async () => { + const { runGenerators } = createGenerator(mockInput); + + const result = await runGenerators({ + ...mockOptions, + generators: ['metadata'], + }); + + // metadata returns an async generator + ok(isAsyncGenerator(result)); + }); + + it('should handle generator with dependency', async () => { + const { runGenerators } = createGenerator(mockInput); + + // legacy-html depends on metadata + const result = await runGenerators({ + ...mockOptions, + generators: ['legacy-html'], + }); + + // Should complete without error + ok(result !== undefined); + }); + + it('should skip already scheduled generators', async () => { + const { runGenerators } = createGenerator(mockInput); + + // Running with ['metadata', 'metadata'] should skip the second + const result = await runGenerators({ + ...mockOptions, + generators: ['metadata', 'metadata'], + }); + + ok(isAsyncGenerator(result)); + }); + + it('should handle multiple generators in sequence', async () => { + const { runGenerators } = createGenerator(mockInput); + + // Run metadata twice - the system should skip the already scheduled one + // Avoid json-simple since it writes to disk + const result = await runGenerators({ + ...mockOptions, + generators: ['metadata'], + }); + + // Result should be from the last generator + ok(result !== undefined); + }); + + it('should collect async generator results for dependents', async () => { + const { runGenerators } = createGenerator(mockInput); + + // legacy-json depends on metadata (async generator) + const result = await runGenerators({ + ...mockOptions, + generators: ['legacy-json'], + }); + + ok(result !== undefined); + }); + + it('should use multiple threads when specified', async () => { + const { runGenerators } = createGenerator(mockInput); + + const result = await runGenerators({ + ...mockOptions, + threads: 4, + generators: ['metadata'], + }); + + ok(isAsyncGenerator(result)); + }); + + it('should pass options to generators', async () => { + const { runGenerators } = createGenerator(mockInput); + + const customTypeMap = { TestType: 'https://example.com/TestType' }; + + const result = await runGenerators({ + ...mockOptions, + typeMap: customTypeMap, + generators: ['metadata'], + }); + + ok(isAsyncGenerator(result)); + }); +}); diff --git a/src/__tests__/streaming.test.mjs b/src/__tests__/streaming.test.mjs new file mode 100644 index 00000000..2ba63e1f --- /dev/null +++ b/src/__tests__/streaming.test.mjs @@ -0,0 +1,232 @@ +import { deepStrictEqual, ok, strictEqual } from 'node:assert'; +import { describe, it } from 'node:test'; + +import { + isAsyncGenerator, + collectAsyncGenerator, + createStreamingCache, +} from '../streaming.mjs'; + +describe('streaming utilities', () => { + describe('isAsyncGenerator', () => { + it('should return true for async generators', () => { + async function* asyncGen() { + yield 1; + } + + const gen = asyncGen(); + + strictEqual(isAsyncGenerator(gen), true); + }); + + it('should return false for regular generators', () => { + function* syncGen() { + yield 1; + } + + const gen = syncGen(); + + strictEqual(isAsyncGenerator(gen), false); + }); + + it('should return false for plain objects', () => { + strictEqual(isAsyncGenerator({}), false); + strictEqual(isAsyncGenerator([]), false); + strictEqual(isAsyncGenerator({ async: true }), false); + }); + + it('should return false for null and undefined', () => { + strictEqual(isAsyncGenerator(null), false); + strictEqual(isAsyncGenerator(undefined), false); + }); + + it('should return false for primitives', () => { + strictEqual(isAsyncGenerator(42), false); + strictEqual(isAsyncGenerator('string'), false); + strictEqual(isAsyncGenerator(true), false); + }); + + it('should return true for objects with Symbol.asyncIterator', () => { + const asyncIterable = { + [Symbol.asyncIterator]() { + return { + next: async () => ({ done: true, value: undefined }), + }; + }, + }; + + strictEqual(isAsyncGenerator(asyncIterable), true); + }); + }); + + describe('collectAsyncGenerator', () => { + it('should collect all chunks into a flat array', async () => { + async function* gen() { + yield [1, 2]; + yield [3, 4]; + yield [5]; + } + + const result = await collectAsyncGenerator(gen()); + + deepStrictEqual(result, [1, 2, 3, 4, 5]); + }); + + it('should return empty array for empty generator', async () => { + async function* gen() { + // empty generator + } + + const result = await collectAsyncGenerator(gen()); + + deepStrictEqual(result, []); + }); + + it('should handle single chunk', async () => { + async function* gen() { + yield [1, 2, 3]; + } + + const result = await collectAsyncGenerator(gen()); + + deepStrictEqual(result, [1, 2, 3]); + }); + + it('should handle empty chunks', async () => { + async function* gen() { + yield []; + yield [1]; + yield []; + yield [2, 3]; + } + + const result = await collectAsyncGenerator(gen()); + + deepStrictEqual(result, [1, 2, 3]); + }); + + it('should handle objects in chunks', async () => { + async function* gen() { + yield [{ a: 1 }, { b: 2 }]; + yield [{ c: 3 }]; + } + + const result = await collectAsyncGenerator(gen()); + + deepStrictEqual(result, [{ a: 1 }, { b: 2 }, { c: 3 }]); + }); + }); + + describe('createStreamingCache', () => { + it('should create a cache with required methods', () => { + const cache = createStreamingCache(); + + ok(cache); + strictEqual(typeof cache.getOrCollect, 'function'); + strictEqual(typeof cache.has, 'function'); + strictEqual(typeof cache.clear, 'function'); + }); + + it('should return same promise for same key', async () => { + const cache = createStreamingCache(); + + async function* gen() { + yield [1, 2, 3]; + } + + const promise1 = cache.getOrCollect('test', gen()); + + // Create a new generator (which shouldn't be used due to caching) + async function* gen2() { + yield [4, 5, 6]; + } + + const promise2 = cache.getOrCollect('test', gen2()); + + // Both should resolve to the same result (from first generator) + const result1 = await promise1; + const result2 = await promise2; + + deepStrictEqual(result1, [1, 2, 3]); + strictEqual(result1, result2); + }); + + it('should return different results for different keys', async () => { + const cache = createStreamingCache(); + + async function* gen1() { + yield [1, 2]; + } + + async function* gen2() { + yield [3, 4]; + } + + const result1 = await cache.getOrCollect('key1', gen1()); + const result2 = await cache.getOrCollect('key2', gen2()); + + deepStrictEqual(result1, [1, 2]); + deepStrictEqual(result2, [3, 4]); + }); + + it('should report has() correctly', async () => { + const cache = createStreamingCache(); + + strictEqual(cache.has('test'), false); + + async function* gen() { + yield [1]; + } + + cache.getOrCollect('test', gen()); + + strictEqual(cache.has('test'), true); + strictEqual(cache.has('other'), false); + }); + + it('should clear all entries', async () => { + const cache = createStreamingCache(); + + async function* gen1() { + yield [1]; + } + + async function* gen2() { + yield [2]; + } + + cache.getOrCollect('key1', gen1()); + cache.getOrCollect('key2', gen2()); + + strictEqual(cache.has('key1'), true); + strictEqual(cache.has('key2'), true); + + cache.clear(); + + strictEqual(cache.has('key1'), false); + strictEqual(cache.has('key2'), false); + }); + + it('should allow re-adding after clear', async () => { + const cache = createStreamingCache(); + + async function* gen1() { + yield [1, 2]; + } + + const result1 = await cache.getOrCollect('test', gen1()); + + deepStrictEqual(result1, [1, 2]); + + cache.clear(); + + async function* gen2() { + yield [3, 4]; + } + + const result2 = await cache.getOrCollect('test', gen2()); + + deepStrictEqual(result2, [3, 4]); + }); + }); +}); diff --git a/src/constants.mjs b/src/constants.mjs index 30edc199..37b3dc16 100644 --- a/src/constants.mjs +++ b/src/constants.mjs @@ -1,10 +1,10 @@ 'use strict'; // The current running version of Node.js (Environment) -export const DOC_NODE_VERSION = process.version; +export const NODE_VERSION = process.version; // This is the Node.js CHANGELOG to be consumed to generate a list of all major Node.js versions -export const DOC_NODE_CHANGELOG_URL = +export const NODE_CHANGELOG_URL = 'https://raw.githubusercontent.com/nodejs/node/HEAD/CHANGELOG.md'; // The base URL for the Node.js website diff --git a/src/generators.mjs b/src/generators.mjs index ebd39757..49ca9ed5 100644 --- a/src/generators.mjs +++ b/src/generators.mjs @@ -1,112 +1,152 @@ 'use strict'; import { allGenerators } from './generators/index.mjs'; +import logger from './logger/index.mjs'; import { isAsyncGenerator, createStreamingCache } from './streaming.mjs'; import WorkerPool from './threading/index.mjs'; import createParallelWorker from './threading/parallel.mjs'; +const generatorsLogger = logger.child('generators'); + /** - * This method creates a system that allows you to register generators - * and then execute them in a specific order, keeping track of the - * generation process, and handling errors that may occur from the - * execution of generating content. - * - * When the final generator is reached, the system will return the - * final generated content. + * Creates a generator orchestration system that manages the execution of + * documentation generators in dependency order, with support for parallel + * processing and streaming results. * - * Generators can output content that can be consumed by other generators; - * Generators can also write to files. These would usually be considered - * the final generators in the chain. + * Generators can output content consumed by other generators or write to files. + * The system handles dependency resolution, parallel scheduling, and result caching. * - * @typedef {{ ast: GeneratorMetadata}} AstGenerator The AST "generator" is a facade for the AST tree and it isn't really a generator - * @typedef {AvailableGenerators & AstGenerator} AllGenerators A complete set of the available generators, including the AST one + * @typedef {{ ast: GeneratorMetadata}} AstGenerator + * @typedef {AvailableGenerators & AstGenerator} AllGenerators * - * @param {ParserOutput} input The API doc AST tree + * @param {ParserOutput} input - The API doc AST tree + * @returns {{ runGenerators: (options: GeneratorOptions) => Promise }} */ const createGenerator = input => { /** - * We store all the registered generators to be processed - * within a Record, so we can access their results at any time whenever needed - * (we store the Promises of the generator outputs, or AsyncGenerators for streaming) - * - * @type {{ [K in keyof AllGenerators]: ReturnType }} + * Cache for generator results (Promises or AsyncGenerators). + * @type {{ [K in keyof AllGenerators]?: ReturnType }} */ const cachedGenerators = { ast: Promise.resolve(input) }; /** - * Cache for collected async generator results. - * When a streaming generator is first consumed, we collect all chunks - * and store the promise here so subsequent consumers share the same result. + * Cache for async generator collection results. + * Ensures collection happens only once when multiple generators depend on + * the same streaming generator. */ const streamingCache = createStreamingCache(); /** - * Gets the dependency input, handling both regular promises and async generators. - * For async generators, ensures only one collection happens and result is cached. + * Shared WorkerPool instance for all generators. + * @type {WorkerPool | null} + */ + let sharedPool = null; + + /** + * Resolves the dependency input for a generator, handling both regular + * promises and async generators. For async generators, creates a shared + * collection so multiple dependents reuse the same result. + * * @param {string} dependsOn - Name of the dependency generator - * @returns {Promise} + * @returns {Promise} Collected results from the dependency */ const getDependencyInput = async dependsOn => { - // First, await the cached promise to get the actual result const result = await cachedGenerators[dependsOn]; - // Check if the result is an async generator (streaming) + // For async generators, collect all chunks (shared across dependents) if (isAsyncGenerator(result)) { + generatorsLogger.debug( + `Collecting async generator output from "${dependsOn}"` + ); + return streamingCache.getOrCollect(dependsOn, result); } - // Regular result - return it directly return result; }; /** - * Runs the Generator engine with the provided top-level input and the given generator options + * Schedules generators for execution without creating new pools. + * Uses the shared pool for all parallel work. * - * @param {GeneratorOptions} options The options for the generator runtime + * @param {GeneratorOptions} options - Generator runtime options + * @param {WorkerPool} pool - Shared worker pool */ - const runGenerators = async options => { - const { generators, threads } = options; + const scheduleGenerators = (options, pool) => { + const { generators } = options; - // WorkerPool for chunk-level parallelization within generators - const chunkPool = new WorkerPool('./chunk-worker.mjs', threads); - - // Schedule all generators, allowing independent ones to run in parallel. - // Each generator awaits its own dependency internally, so generators - // with the same dependency (e.g. legacy-html and legacy-json both depend - // on metadata) will run concurrently once metadata resolves. for (const generatorName of generators) { - // Skip if already scheduled + // Skip already scheduled generators if (generatorName in cachedGenerators) { + generatorsLogger.debug(`Skipping "${generatorName}"`); + continue; } const { dependsOn, generate } = allGenerators[generatorName]; - // Ensure dependency is scheduled (but don't await its result yet) + // Recursively schedule dependencies (without awaiting) if (dependsOn && !(dependsOn in cachedGenerators)) { - // Recursively schedule - don't await, just ensure it's in cachedGenerators - runGenerators({ ...options, generators: [dependsOn] }); + generatorsLogger.debug(`Scheduling "${dependsOn}":"${generatorName}"`); + + scheduleGenerators({ ...options, generators: [dependsOn] }, pool); } - // Create a ParallelWorker for this generator - // The worker supports both batch (map) and streaming (stream) modes - const worker = createParallelWorker(generatorName, chunkPool, options); + // Create a ParallelWorker for this generator's chunk processing + const worker = createParallelWorker(generatorName, pool, options); + + generatorsLogger.debug(`Scheduling generator "${generatorName}"`, { + dependsOn: dependsOn || 'none', + }); + + // Schedule the generator (awaits dependency internally) + cachedGenerators[generatorName] = (async () => { + const dependencyInput = await getDependencyInput(dependsOn); + + generatorsLogger.debug(`Starting generator "${generatorName}"`); - /** - * Schedule the generator - it awaits its dependency internally - * This allows multiple generators with the same dependency to run in parallel - */ - const scheduledGenerator = async () => { - const input = await getDependencyInput(dependsOn); + const result = await generate(dependencyInput, { ...options, worker }); - return generate(input, { ...options, worker }); - }; + generatorsLogger.debug(`Completed generator "${generatorName}"`); - cachedGenerators[generatorName] = scheduledGenerator(); + return result; + })(); } + }; + + /** + * Schedules and runs all requested generators with their dependencies. + * Independent generators run in parallel; dependent generators wait for + * their dependencies to complete. + * + * @param {GeneratorOptions} options - Generator runtime options + * @returns {Promise} Result of the last generator in the pipeline + */ + const runGenerators = async options => { + const { generators, threads } = options; + + generatorsLogger.debug(`Starting generator pipeline`, { + generators: generators.join(', '), + threads, + }); - // Returns the value of the last generator of the current pipeline - return cachedGenerators[generators[generators.length - 1]]; + // Create shared WorkerPool for all generators (only once) + if (!sharedPool) { + sharedPool = new WorkerPool('./chunk-worker.mjs', threads); + } + + // Schedule all generators using the shared pool + scheduleGenerators(options, sharedPool); + + // Wait for the last generator's result + const result = await cachedGenerators[generators[generators.length - 1]]; + + // Terminate workers after all work is complete + await sharedPool.terminate(); + + sharedPool = null; + + return result; }; return { runGenerators }; diff --git a/src/generators/api-links/index.mjs b/src/generators/api-links/index.mjs index 95689be5..e15784bf 100644 --- a/src/generators/api-links/index.mjs +++ b/src/generators/api-links/index.mjs @@ -72,10 +72,9 @@ export default { }); if (output) { - await writeFile( - join(output, 'apilinks.json'), - JSON.stringify(definitions) - ); + const out = join(output, 'apilinks.json'); + + await writeFile(out, JSON.stringify(definitions)); } return definitions; diff --git a/src/generators/ast-js/index.mjs b/src/generators/ast-js/index.mjs index ee144795..70cb2cb8 100644 --- a/src/generators/ast-js/index.mjs +++ b/src/generators/ast-js/index.mjs @@ -28,9 +28,12 @@ export default { /** * Process a chunk of JavaScript files in a worker thread. - * @param {unknown} _ - * @param {number[]} itemIndices - * @param {Partial} options + * Parses JS source files into AST representations. + * + * @param {Input} _ - Unused (files loaded from input paths) + * @param {number[]} itemIndices - Indices of input paths to process + * @param {Partial>} options - Serializable options + * @returns {Promise} Parsed JS AST objects for each file */ async processChunk(_, itemIndices, { input }) { const { loadFiles } = createJsLoader(); @@ -50,11 +53,11 @@ export default { }, /** - * @param {Input} i + * @param {Input} _ - Unused (files loaded from input paths) * @param {Partial} options * @returns {AsyncGenerator>} */ - async *generate(i, { input = [], worker }) { + async *generate(_, { input = [], worker }) { const sourceFiles = globSync(input).filter( filePath => extname(filePath) === '.js' ); @@ -62,7 +65,7 @@ export default { const deps = { input: sourceFiles }; // Parse the Javascript sources into ASTs in parallel using worker threads - for await (const chunkResult of worker.stream(sourceFiles, i, deps)) { + for await (const chunkResult of worker.stream(sourceFiles, _, deps)) { yield chunkResult; } }, diff --git a/src/generators/jsx-ast/index.mjs b/src/generators/jsx-ast/index.mjs index 5ccf8fc9..0f0d283d 100644 --- a/src/generators/jsx-ast/index.mjs +++ b/src/generators/jsx-ast/index.mjs @@ -21,9 +21,12 @@ export default { /** * Process a chunk of items in a worker thread. - * @param {Input} fullInput - * @param {number[]} itemIndices - * @param {Partial} options + * Transforms metadata entries into JSX AST nodes. + * + * @param {Input} fullInput - Full metadata input for context rebuilding + * @param {number[]} itemIndices - Indices of head nodes to process + * @param {Partial>} options - Serializable options + * @returns {Promise>} JSX AST programs for each module */ async processChunk(fullInput, itemIndices, { index, releases, version }) { const remarkRecma = getRemarkRecma(); diff --git a/src/generators/legacy-html-all/index.mjs b/src/generators/legacy-html-all/index.mjs index 6c879e0e..334e6555 100644 --- a/src/generators/legacy-html-all/index.mjs +++ b/src/generators/legacy-html-all/index.mjs @@ -40,109 +40,71 @@ export default { dependsOn: 'legacy-html', - /** - * Process a chunk of template values from the dependency. - * Extracts toc and content from each entry for aggregation. - * @param {Input} fullInput - * @param {number[]} itemIndices - */ - processChunk(fullInput, itemIndices) { - const results = []; - - for (const idx of itemIndices) { - const entry = fullInput[idx]; - - // Skip the index entry - if (entry.api === 'index') { - continue; - } - - results.push({ - api: entry.api, - section: entry.section, - toc: entry.toc, - content: entry.content, - }); - } - - return results; - }, - /** * Generates the `all.html` file from the `legacy-html` generator * @param {Input} input * @param {Partial} options - * @returns {AsyncGenerator>} + * @returns {Promise} */ - async *generate(input, { version, releases, output, worker }) { - // Collect all chunks as they stream in - const allChunks = []; + async generate(input, { version, releases, output }) { + // Gets a Remark Processor that parses Markdown to minified HTML + const remarkWithRehype = getRemarkRehype(); + + // Current directory path relative to the `index.mjs` file + // from the `legacy-html` generator, as all the assets are there + const baseDir = resolve(import.meta.dirname, '..', 'legacy-html'); + + // Reads the API template.html file to be used as a base for the HTML files + const apiTemplate = await readFile(join(baseDir, 'template.html'), 'utf-8'); + + // Filter out index entries and extract needed properties + const entries = input.filter(entry => entry.api !== 'index'); + + // Aggregates all individual Table of Contents into one giant string + const aggregatedToC = entries.map(entry => entry.toc).join('\n'); + + // Aggregates all individual content into one giant string + const aggregatedContent = entries.map(entry => entry.content).join('\n'); + + // Creates a "mimic" of an `ApiDocMetadataEntry` which fulfils the requirements + // for generating the `tableOfContents` with the `tableOfContents.parseNavigationNode` parser + const sideNavigationFromValues = entries.map(entry => ({ + api: entry.api, + heading: { data: { depth: 1, name: entry.section } }, + })); + + // Generates the global Table of Contents (Sidebar Navigation) + const parsedSideNav = remarkWithRehype.processSync( + tableOfContents(sideNavigationFromValues, { + maxDepth: 1, + parser: tableOfContents.parseNavigationNode, + }) + ); + + const templateValues = { + api: 'all', + added: '', + section: 'All', + version: `v${version.version}`, + toc: aggregatedToC, + nav: String(parsedSideNav), + content: aggregatedContent, + }; + + const result = replaceTemplateValues( + apiTemplate, + templateValues, + releases, + { skipGitHub: true, skipGtocPicker: true } + ); - for await (const chunkResult of worker.stream(input, input, {})) { - allChunks.push(...chunkResult); - - yield chunkResult; - } - - // After all chunks are collected, build and write the final file if (output) { - // Gets a Remark Processor that parses Markdown to minified HTML - const remarkWithRehype = getRemarkRehype(); - - // Current directory path relative to the `index.mjs` file - // from the `legacy-html` generator, as all the assets are there - const baseDir = resolve(import.meta.dirname, '..', 'legacy-html'); - - // Reads the API template.html file to be used as a base for the HTML files - const apiTemplate = await readFile( - join(baseDir, 'template.html'), - 'utf-8' - ); - - // Aggregates all individual Table of Contents into one giant string - const aggregatedToC = allChunks.map(entry => entry.toc).join('\n'); - - // Aggregates all individual content into one giant string - const aggregatedContent = allChunks - .map(entry => entry.content) - .join('\n'); - - // Creates a "mimic" of an `ApiDocMetadataEntry` which fulfils the requirements - // for generating the `tableOfContents` with the `tableOfContents.parseNavigationNode` parser - const sideNavigationFromValues = allChunks.map(entry => ({ - api: entry.api, - heading: { data: { depth: 1, name: entry.section } }, - })); - - // Generates the global Table of Contents (Sidebar Navigation) - const parsedSideNav = remarkWithRehype.processSync( - tableOfContents(sideNavigationFromValues, { - maxDepth: 1, - parser: tableOfContents.parseNavigationNode, - }) - ); - - const templateValues = { - api: 'all', - added: '', - section: 'All', - version: `v${version.version}`, - toc: aggregatedToC, - nav: String(parsedSideNav), - content: aggregatedContent, - }; - - const generatedAllTemplate = replaceTemplateValues( - apiTemplate, - templateValues, - releases, - { skipGitHub: true, skipGtocPicker: true } - ); - // We minify the html result to reduce the file size and keep it "clean" - const minified = HTMLMinifier.minify(Buffer.from(generatedAllTemplate)); + const minified = HTMLMinifier.minify(Buffer.from(result), {}); await writeFile(join(output, 'all.html'), minified); } + + return result; }, }; diff --git a/src/generators/legacy-html/index.mjs b/src/generators/legacy-html/index.mjs index a3098ff4..6877f179 100644 --- a/src/generators/legacy-html/index.mjs +++ b/src/generators/legacy-html/index.mjs @@ -19,6 +19,8 @@ import { getRemarkRehypeWithShiki } from '../../utils/remark.mjs'; */ const getHeading = name => ({ data: { depth: 1, name } }); +const remarkRehypeProcessor = getRemarkRehypeWithShiki(); + /** * @typedef {{ * api: string; @@ -52,16 +54,14 @@ export default { /** * Process a chunk of items in a worker thread. - * @param {Input} fullInput - * @param {number[]} itemIndices - * @param {Partial} options + * Builds HTML template objects - FS operations happen in generate(). + * + * @param {Input} fullInput - Full metadata input for context rebuilding + * @param {number[]} itemIndices - Indices of head nodes to process + * @param {Partial>} options - Serializable options + * @returns {Promise} Template objects for each processed module */ - async processChunk( - fullInput, - itemIndices, - { releases, version, output, apiTemplate, parsedSideNav } - ) { - const remarkRehypeProcessor = getRemarkRehypeWithShiki(); + async processChunk(fullInput, itemIndices, { version, parsedSideNav }) { const groupedModules = groupNodesByModule(fullInput); const headNodes = fullInput @@ -104,15 +104,6 @@ export default { content: parsedContent, }; - if (output) { - // We minify the html result to reduce the file size and keep it "clean" - const result = replaceTemplateValues(apiTemplate, template, releases); - - const minified = HTMLMinifier.minify(Buffer.from(result), {}); - - await writeFile(join(output, `${head.api}.html`), minified); - } - results.push(template); } @@ -126,8 +117,6 @@ export default { * @returns {AsyncGenerator>} */ async *generate(input, { index, releases, version, output, worker }) { - const remarkRehypeProcessor = getRemarkRehypeWithShiki(); - const baseDir = import.meta.dirname; const apiTemplate = await readFile(join(baseDir, 'template.html'), 'utf-8'); @@ -156,11 +145,6 @@ export default { parsedSideNav: String(parsedSideNav), }; - // Stream chunks as they complete - HTML files are written immediately - for await (const chunkResult of worker.stream(headNodes, input, deps)) { - yield chunkResult; - } - if (output) { // Define the source folder for API docs assets const srcAssets = join(baseDir, 'assets'); @@ -174,5 +158,21 @@ export default { // Copy all files from assets folder to output, skipping unchanged files await safeCopy(srcAssets, assetsFolder); } + + // Stream chunks as they complete - HTML files are written immediately + for await (const chunkResult of worker.stream(headNodes, input, deps)) { + // Write files for this chunk in the generate method (main thread) + if (output) { + for (const template of chunkResult) { + const result = replaceTemplateValues(apiTemplate, template, releases); + + const minified = HTMLMinifier.minify(Buffer.from(result), {}); + + await writeFile(join(output, `${template.api}.html`), minified); + } + } + + yield chunkResult; + } }, }; diff --git a/src/generators/legacy-json-all/index.mjs b/src/generators/legacy-json-all/index.mjs index d6838eda..01aaf255 100644 --- a/src/generators/legacy-json-all/index.mjs +++ b/src/generators/legacy-json-all/index.mjs @@ -29,50 +29,14 @@ export default { dependsOn: 'legacy-json', - /** - * Process a chunk of sections from the dependency. - * Extracts and enriches relevant properties for aggregation. - * @param {Input} fullInput - * @param {number[]} itemIndices - */ - processChunk(fullInput, itemIndices) { - /** @type {import('./types.d.ts').Output} */ - const chunkResult = { - miscs: [], - modules: [], - classes: [], - globals: [], - methods: [], - }; - - for (const idx of itemIndices) { - const section = fullInput[idx]; - - // Copy the relevant properties from each section into our chunk result - for (const property of PROPERTIES_TO_COPY) { - const items = section[property]; - - if (Array.isArray(items)) { - const enrichedItems = section.source - ? items.map(item => ({ ...item, source: section.source })) - : items; - - chunkResult[property].push(...enrichedItems); - } - } - } - - return chunkResult; - }, - /** * Generates the legacy JSON `all.json` file. * * @param {Input} input * @param {Partial} options - * @returns {AsyncGenerator} + * @returns {Promise} */ - async *generate(input, { output, worker }) { + async generate(input, { output }) { /** * The consolidated output object that will contain * combined data from all sections in the input. @@ -87,18 +51,25 @@ export default { methods: [], }; - // Stream chunks as they complete and aggregate results - for await (const chunkResult of worker.stream(input, input, {})) { - // Merge chunk result into generatedValue + // Aggregate all sections into the output + for (const section of input) { for (const property of PROPERTIES_TO_COPY) { - generatedValue[property].push(...chunkResult[property]); - } + const items = section[property]; - yield chunkResult; + if (Array.isArray(items)) { + const enrichedItems = section.source + ? items.map(item => ({ ...item, source: section.source })) + : items; + + generatedValue[property].push(...enrichedItems); + } + } } if (output) { await writeFile(join(output, 'all.json'), JSON.stringify(generatedValue)); } + + return generatedValue; }, }; diff --git a/src/generators/legacy-json/index.mjs b/src/generators/legacy-json/index.mjs index 72aef46c..92f14c75 100644 --- a/src/generators/legacy-json/index.mjs +++ b/src/generators/legacy-json/index.mjs @@ -6,6 +6,8 @@ import { join } from 'node:path'; import { createSectionBuilder } from './utils/buildSection.mjs'; import { groupNodesByModule } from '../../utils/generators.mjs'; +const buildSection = createSectionBuilder(); + /** * This generator is responsible for generating the legacy JSON files for the * legacy API docs for retro-compatibility. It is to be replaced while we work @@ -30,12 +32,14 @@ export default { /** * Process a chunk of items in a worker thread. - * @param {Input} fullInput - * @param {number[]} itemIndices - * @param {Partial} options + * Builds JSON sections - FS operations happen in generate(). + * + * @param {Input} fullInput - Full metadata input for context rebuilding + * @param {number[]} itemIndices - Indices of head nodes to process + * @param {Partial>} _options - Serializable options (unused) + * @returns {Promise} JSON sections for each processed module */ - async processChunk(fullInput, itemIndices, { output }) { - const buildSection = createSectionBuilder(); + async processChunk(fullInput, itemIndices) { const groupedModules = groupNodesByModule(fullInput); const headNodes = fullInput.filter(node => node.heading.depth === 1); @@ -45,16 +49,8 @@ export default { for (const idx of itemIndices) { const head = headNodes[idx]; const nodes = groupedModules.get(head.api); - const section = buildSection(head, nodes); - - if (output) { - await writeFile( - join(output, `${head.api}.json`), - JSON.stringify(section) - ); - } - results.push(section); + results.push(buildSection(head, nodes)); } return results; @@ -73,6 +69,14 @@ export default { const deps = { output }; for await (const chunkResult of worker.stream(headNodes, input, deps)) { + if (output) { + for (const section of chunkResult) { + const out = join(output, `${section.api}.json`); + + await writeFile(out, JSON.stringify(section)); + } + } + yield chunkResult; } }, diff --git a/src/generators/legacy-json/types.d.ts b/src/generators/legacy-json/types.d.ts index 3174fc7a..9b6f1d47 100644 --- a/src/generators/legacy-json/types.d.ts +++ b/src/generators/legacy-json/types.d.ts @@ -45,6 +45,11 @@ export interface Meta { * Base interface for sections in the API documentation, representing common properties. */ export interface SectionBase { + /** + * The inferred API file or module name (e.g., 'fs', 'http', 'path'). + */ + api: string; + /** * The type of section (e.g., 'module', 'method', 'property'). */ diff --git a/src/generators/legacy-json/utils/buildSection.mjs b/src/generators/legacy-json/utils/buildSection.mjs index 247a7a5f..185bfba0 100644 --- a/src/generators/legacy-json/utils/buildSection.mjs +++ b/src/generators/legacy-json/utils/buildSection.mjs @@ -187,7 +187,11 @@ export const createSectionBuilder = () => { * @returns {import('../types.d.ts').ModuleSection} The constructed module section. */ return (head, entries) => { - const rootModule = { type: 'module', source: head.api_doc_source }; + const rootModule = { + type: 'module', + api: head.api, + source: head.api_doc_source, + }; buildHierarchy(entries).forEach(entry => handleEntry(entry, rootModule)); diff --git a/src/generators/man-page/index.mjs b/src/generators/man-page/index.mjs index 229e1126..e54484c4 100644 --- a/src/generators/man-page/index.mjs +++ b/src/generators/man-page/index.mjs @@ -38,6 +38,7 @@ export default { async generate(input, options) { // Filter to only 'cli'. const components = input.filter(({ api }) => api === 'cli'); + if (!components.length) { throw new Error('Could not find any `cli` documentation.'); } @@ -46,9 +47,11 @@ export default { const optionsStart = components.findIndex( ({ slug }) => slug === DOC_SLUG_OPTIONS ); + const environmentStart = components.findIndex( ({ slug }) => slug === DOC_SLUG_ENVIRONMENT ); + // The first header that is <3 in depth after environmentStart const environmentEnd = components.findIndex( ({ heading }, index) => heading.depth < 3 && index > environmentStart diff --git a/src/generators/metadata/index.mjs b/src/generators/metadata/index.mjs index 3949f3a9..34d235d1 100644 --- a/src/generators/metadata/index.mjs +++ b/src/generators/metadata/index.mjs @@ -22,9 +22,10 @@ export default { * Process a chunk of API doc files in a worker thread. * Called by chunk-worker.mjs for parallel processing. * - * @param {Input} fullInput - Full input array - * @param {number[]} itemIndices - Indices of items to process - * @param {Partial} options + * @param {Input} fullInput - Full input array (parsed API doc files) + * @param {number[]} itemIndices - Indices of files to process + * @param {Partial>} options - Serializable options + * @returns {Promise} Metadata entries for processed files */ async processChunk(fullInput, itemIndices, { typeMap }) { const results = []; diff --git a/src/generators/metadata/utils/parse.mjs b/src/generators/metadata/utils/parse.mjs index 68436303..bb3efeb4 100644 --- a/src/generators/metadata/utils/parse.mjs +++ b/src/generators/metadata/utils/parse.mjs @@ -12,6 +12,10 @@ import createQueries from '../../../utils/queries/index.mjs'; import { getRemark } from '../../../utils/remark.mjs'; import { IGNORE_STABILITY_STEMS } from '../constants.mjs'; +// Creates an instance of the Remark processor with GFM support +// which is used for stringifying the AST tree back to Markdown +const remarkProcessor = getRemark(); + /** * This generator generates a flattened list of metadata entries from a API doc * @@ -42,10 +46,6 @@ export const parseApiDoc = ({ file, tree }, typeMap) => { addStabilityMetadata, } = createQueries(typeMap); - // Creates an instance of the Remark processor with GFM support - // which is used for stringifying the AST tree back to Markdown - const remarkProcessor = getRemark(); - // Creates a new Slugger instance for the current API doc file const nodeSlugger = createNodeSlugger(); diff --git a/src/generators/types.d.ts b/src/generators/types.d.ts index 201d8e39..caab0826 100644 --- a/src/generators/types.d.ts +++ b/src/generators/types.d.ts @@ -6,22 +6,26 @@ declare global { // to be type complete and runtime friendly within `runGenerators` export type AvailableGenerators = typeof publicGenerators; - // ParallelWorker interface for item-level parallelization using real worker threads + /** + * ParallelWorker interface for distributing work across Node.js worker threads. + * Streams results as chunks complete, enabling pipeline parallelism. + */ export interface ParallelWorker { /** - * Process items in parallel using real worker threads. - * Items are split into chunks, each chunk processed by a separate worker. + * Processes items in parallel across worker threads and yields results + * as each chunk completes. Enables downstream processing to begin + * while upstream chunks are still being processed. * - * @param items - Items to process (used to determine indices) + * @param items - Items to process (determines chunk distribution) * @param fullInput - Full input data for context rebuilding in workers * @param opts - Additional options to pass to workers - * @returns Results in same order as input items + * @yields Each chunk's results as they complete */ - map( + stream( items: T[], fullInput: unknown, opts?: Record - ): Promise; + ): AsyncGenerator; } // This is the runtime config passed to the API doc generators diff --git a/src/logger/__tests__/logger.test.mjs b/src/logger/__tests__/logger.test.mjs index 006d2495..d00d9982 100644 --- a/src/logger/__tests__/logger.test.mjs +++ b/src/logger/__tests__/logger.test.mjs @@ -221,4 +221,90 @@ describe('createLogger', () => { }, ]); }); + + describe('setLogLevel', () => { + it('should change log level at runtime using number', t => { + const transport = t.mock.fn(); + + const logger = createLogger(transport, LogLevel.info); + + // Should log at info level + logger.info('Info message'); + strictEqual(transport.mock.callCount(), 1); + + // Change to error level + logger.setLogLevel(LogLevel.error); + + // Should not log info anymore + logger.info('Another info message'); + strictEqual(transport.mock.callCount(), 1); + + // Should log error + logger.error('Error message'); + strictEqual(transport.mock.callCount(), 2); + }); + + it('should change log level at runtime using string', t => { + const transport = t.mock.fn(); + + const logger = createLogger(transport, LogLevel.error); + + // Should not log at info level initially + logger.info('Info message'); + strictEqual(transport.mock.callCount(), 0); + + // Change to debug level using string + logger.setLogLevel('debug'); + + // Should now log info + logger.info('Another info message'); + strictEqual(transport.mock.callCount(), 1); + }); + + it('should handle case-insensitive level names', t => { + const transport = t.mock.fn(); + + const logger = createLogger(transport, LogLevel.fatal); + + logger.setLogLevel('DEBUG'); + logger.debug('Debug message'); + strictEqual(transport.mock.callCount(), 1); + + logger.setLogLevel('Info'); + logger.debug('Debug message 2'); + strictEqual(transport.mock.callCount(), 1); // Should not log debug at info level + }); + + it('should propagate to child loggers', t => { + const transport = t.mock.fn(); + + const logger = createLogger(transport, LogLevel.info); + const child = logger.child('child-module'); + + // Child should initially respect parent's info level + child.debug('Debug message'); + strictEqual(transport.mock.callCount(), 0); + + child.info('Info message'); + strictEqual(transport.mock.callCount(), 1); + + // Change parent to debug level + logger.setLogLevel(LogLevel.debug); + + // Child should now log debug messages + child.debug('Debug message after level change'); + strictEqual(transport.mock.callCount(), 2); + + // Change parent to error level + logger.setLogLevel(LogLevel.error); + + // Child should not log info anymore + child.info('Info message after error level'); + strictEqual(transport.mock.callCount(), 2); + + // Child should log error + child.error('Error message'); + strictEqual(transport.mock.callCount(), 3); + }); + }); }); diff --git a/src/logger/index.mjs b/src/logger/index.mjs index 4b2ac5b4..da54ded6 100644 --- a/src/logger/index.mjs +++ b/src/logger/index.mjs @@ -1,5 +1,6 @@ 'use strict'; +import { LogLevel } from './constants.mjs'; import { createLogger } from './logger.mjs'; import { transports } from './transports/index.mjs'; @@ -25,3 +26,5 @@ export const Logger = (transportName = 'console') => { // Default logger instance using console transport export default Logger(); + +export { LogLevel }; diff --git a/src/logger/logger.mjs b/src/logger/logger.mjs index 0723a2c7..2a0a9a47 100644 --- a/src/logger/logger.mjs +++ b/src/logger/logger.mjs @@ -9,7 +9,7 @@ import { LogLevel } from './constants.mjs'; /** * Creates a logger instance with the specified transport, log level and an - * optional module name. + * optional module name. Child loggers share the parent's log level. * * @param {import('./types').Transport} transport - Function to handle log output. * @param {number} [loggerLevel] - Minimum log level to output. @@ -20,6 +20,21 @@ export const createLogger = ( loggerLevel = LogLevel.info, module ) => { + /** @type {number} */ + let currentLevel = loggerLevel; + + /** @type {Set>} */ + const children = new Set(); + + /** + * Checks if the given log level should be logged based on the current logger + * level. + * + * @param {number} level - Log level to check. + * @returns {boolean} + */ + const shouldLog = level => level >= currentLevel; + /** * Logs a message at the given level with optional metadata. * @@ -41,8 +56,10 @@ export const createLogger = ( // Extract message string from Error object or use message as-is let msg; + if (message instanceof Error) { msg = message.message; + metadata.stack = message.stack; } else { msg = message; @@ -108,22 +125,44 @@ export const createLogger = ( log(LogLevel.debug, message, metadata); /** - * Creates a child logger for a specific module. + * Creates a child logger for a specific module. Child loggers share the + * parent's log level. * - * @param {string} module - Module name for the child logger. + * @param {string} childModule - Module name for the child logger. * @returns {ReturnType} */ - const child = module => createLogger(transport, loggerLevel, module); + const child = childModule => { + const childLogger = createLogger(transport, currentLevel, childModule); + + children.add(childLogger); + + return childLogger; + }; /** - * Checks if the given log level should be logged based on the current logger - * level. + * Sets the log level for this logger instance and all child loggers. * - * @param {number} level - Log level to check. - * @returns {boolean} + * @param {number | string} level - Log level (number) or level name (string) */ - const shouldLog = level => { - return level >= loggerLevel; + const setLogLevel = level => { + let newLogLevel = level; + + if (typeof newLogLevel === 'string') { + newLogLevel = newLogLevel.toLowerCase(); + + if (newLogLevel in LogLevel === false) { + return; + } + + newLogLevel = LogLevel[newLogLevel]; + } + + currentLevel = newLogLevel; + + // Propagate to all child loggers + for (const childLogger of children) { + childLogger.setLogLevel(currentLevel); + } }; return { @@ -133,5 +172,6 @@ export const createLogger = ( fatal, debug, child, + setLogLevel, }; }; diff --git a/src/logger/transports/console.mjs b/src/logger/transports/console.mjs index 3dfb7e19..0edd50f1 100644 --- a/src/logger/transports/console.mjs +++ b/src/logger/transports/console.mjs @@ -1,5 +1,7 @@ 'use strict'; +import { styleText } from 'node:util'; + import { prettifyLevel } from '../utils/colors.mjs'; import { prettifyTimestamp } from '../utils/time.mjs'; @@ -10,7 +12,7 @@ import { prettifyTimestamp } from '../utils/time.mjs'; * @returns {void} */ const console = ({ level, message, timestamp, metadata = {}, module }) => { - const { file, stack } = metadata; + const { file, stack, ...rest } = metadata; const time = prettifyTimestamp(timestamp); @@ -36,6 +38,12 @@ const console = ({ level, message, timestamp, metadata = {}, module }) => { process.stdout.write(position); } + // Print remaining metadata inline in purple + if (Object.keys(rest).length > 0) { + const metaStr = styleText('magenta', JSON.stringify(rest)); + process.stdout.write(` ${metaStr}`); + } + process.stdout.write('\n'); if (stack) { diff --git a/src/streaming.mjs b/src/streaming.mjs index 8548ae46..3d8fc7bc 100644 --- a/src/streaming.mjs +++ b/src/streaming.mjs @@ -1,221 +1,100 @@ 'use strict'; -/** - * Streaming utilities for processing chunks asynchronously. - * Provides a unified interface for both parallel (worker threads) and - * single-threaded processing modes. - */ +import logger from './logger/index.mjs'; + +const streamingLogger = logger.child('streaming'); /** - * Helper to check if a value is an async generator/iterable - * @param {any} obj - * @returns {boolean} + * Checks if a value is an async generator/iterable. + * + * @param {unknown} obj - Value to check + * @returns {obj is AsyncGenerator} True if the value is an async iterable */ export const isAsyncGenerator = obj => - obj && typeof obj[Symbol.asyncIterator] === 'function'; + obj !== null && + typeof obj === 'object' && + typeof obj[Symbol.asyncIterator] === 'function'; /** * Collects all values from an async generator into a flat array. * Each yielded chunk is spread into the results array. + * * @template T * @param {AsyncGenerator} generator - Async generator yielding arrays - * @returns {Promise} - Flattened array of all yielded items + * @returns {Promise} Flattened array of all yielded items */ export const collectAsyncGenerator = async generator => { const results = []; - for await (const chunk of generator) { - results.push(...chunk); - } - - return results; -}; - -/** - * Splits a count of items into chunks of specified size. - * Returns arrays of indices for each chunk. - * @param {number} count - Total number of items - * @param {number} size - Maximum items per chunk - * @returns {number[][]} Array of index arrays - */ -export const createIndexChunks = (count, size) => { - const chunks = []; + let chunkCount = 0; - for (let i = 0; i < count; i += size) { - const end = Math.min(i + size, count); - const chunk = []; + for await (const chunk of generator) { + chunkCount++; - for (let j = i; j < end; j++) { - chunk.push(j); - } + results.push(...chunk); - chunks.push(chunk); + streamingLogger.debug(`Collected chunk ${chunkCount}`, { + itemsInChunk: chunk.length, + }); } - return chunks; -}; - -/** - * Creates an array of indices from 0 to count-1 - * @param {number} count - Number of indices to create - * @returns {number[]} Array of indices - */ -export const createIndices = count => { - const indices = []; - - for (let i = 0; i < count; i++) { - indices.push(i); - } + streamingLogger.debug(`Collection complete`, { + totalItems: results.length, + chunks: chunkCount, + }); - return indices; + return results; }; /** - * Yields results from an array of promises as they complete. - * Uses Promise.race pattern to yield in completion order, not input order. - * @template T - * @param {Promise[]} promises - Array of promises to race - * @yields {T} - Results as they complete - */ -export async function* yieldAsCompleted(promises) { - const pending = new Map(promises.map((p, i) => [i, p])); - - while (pending.size > 0) { - const entries = [...pending.entries()]; - - const racingPromises = entries.map(([idx, promise]) => - promise.then(result => ({ idx, result })) - ); - - const { idx, result } = await Promise.race(racingPromises); - - pending.delete(idx); - - yield result; - } -} - -/** - * Creates a streaming processor that can run in either parallel or single-threaded mode. - * Provides a unified interface for generators to process chunks. - * - * @param {object} config - Configuration object - * @param {Function} config.processChunk - Function to process a chunk: (fullInput, indices, options) => Promise - * @param {number} config.chunkSize - Number of items per chunk - * @param {boolean} config.parallel - Whether to use parallel processing - * @param {Function} [config.runInWorker] - Function to run chunk in worker: (indices, fullInput, options) => Promise - */ -export const createStreamingProcessor = ({ - processChunk, - chunkSize, - parallel, - runInWorker, -}) => ({ - /** - * Process all items and return results as a single array. - * @template T, R - * @param {T[]} items - Items to process - * @param {T[]} fullInput - Full input for context - * @param {object} extra - Extra options for processChunk - * @returns {Promise} - */ - async map(items, fullInput, extra) { - const itemCount = items.length; - - if (itemCount === 0) { - return []; - } - - // Single chunk - process directly - if (!parallel || itemCount <= chunkSize) { - const indices = createIndices(itemCount); - - return processChunk(fullInput, indices, extra); - } - - // Multiple chunks - process in parallel - const indexChunks = createIndexChunks(itemCount, chunkSize); - - const chunkResults = await Promise.all( - indexChunks.map(indices => runInWorker(indices, fullInput, extra)) - ); - - return chunkResults.flat(); - }, - - /** - * Process items and yield results as each chunk completes. - * @template T, R - * @param {T[]} items - Items to process - * @param {T[]} fullInput - Full input for context - * @param {object} extra - Extra options for processChunk - * @yields {R[]} - Each chunk's results as they complete - */ - async *stream(items, fullInput, extra) { - const itemCount = items.length; - - if (itemCount === 0) { - return; - } - - // Single chunk - yield directly - if (!parallel || itemCount <= chunkSize) { - const indices = createIndices(itemCount); - const result = await processChunk(fullInput, indices, extra); - - yield result; - - return; - } - - // Multiple chunks - yield as they complete - const indexChunks = createIndexChunks(itemCount, chunkSize); - - const chunkPromises = indexChunks.map(indices => - runInWorker(indices, fullInput, extra) - ); - - yield* yieldAsCompleted(chunkPromises); - }, -}); - -/** - * Creates a cache system for collecting async generator results. + * Creates a cache for async generator collection results. * Ensures that when multiple consumers request the same async generator, * only one collection happens and all consumers share the result. */ export const createStreamingCache = () => { - /** @type {Map>} */ + /** @type {Map>} */ const cache = new Map(); return { /** - * Get the collected result for a generator, starting collection if needed. + * Gets the collected result for a generator, starting collection if needed. + * * @param {string} key - Cache key (usually generator name) - * @param {AsyncGenerator} generator - The async generator to collect - * @returns {Promise} - Promise resolving to collected results + * @param {AsyncGenerator} generator - The async generator to collect + * @returns {Promise} Promise resolving to collected results */ getOrCollect(key, generator) { - if (!cache.has(key)) { + const hasKey = cache.has(key); + + if (!hasKey) { cache.set(key, collectAsyncGenerator(generator)); } + streamingLogger.debug( + hasKey + ? `Using cached result for "${key}"` + : `Starting collection for "${key}"` + ); + return cache.get(key); }, /** - * Check if a key exists in the cache - * @param {string} key - * @returns {boolean} + * Checks if a key exists in the cache. + * + * @param {string} key - Cache key to check + * @returns {boolean} True if the key exists */ has(key) { return cache.has(key); }, /** - * Clear the cache + * Clears all cached results. */ clear() { + streamingLogger.debug(`Clearing streaming cache`); + cache.clear(); }, }; diff --git a/src/threading/__tests__/WorkerPool.test.mjs b/src/threading/__tests__/WorkerPool.test.mjs index c878fe48..dda1a31f 100644 --- a/src/threading/__tests__/WorkerPool.test.mjs +++ b/src/threading/__tests__/WorkerPool.test.mjs @@ -1,4 +1,4 @@ -import { deepStrictEqual, ok, strictEqual } from 'node:assert'; +import { ok, rejects, strictEqual } from 'node:assert'; import { describe, it } from 'node:test'; import WorkerPool from '../index.mjs'; @@ -11,26 +11,14 @@ describe('WorkerPool', () => { const pool = new WorkerPool(workerPath, 4); strictEqual(pool.threads, 4); - strictEqual(pool.getActiveThreadCount(), 0); + strictEqual(pool.allWorkers.size, 0); }); - it('should initialize with zero active threads', () => { + it('should initialize with no workers', () => { const pool = new WorkerPool(workerPath, 2); - strictEqual(pool.getActiveThreadCount(), 0); - }); - - it('should change active thread count atomically', () => { - const pool = new WorkerPool(workerPath, 2); - - pool.changeActiveThreadCount(1); - strictEqual(pool.getActiveThreadCount(), 1); - - pool.changeActiveThreadCount(2); - strictEqual(pool.getActiveThreadCount(), 3); - - pool.changeActiveThreadCount(-1); - strictEqual(pool.getActiveThreadCount(), 2); + strictEqual(pool.allWorkers.size, 0); + strictEqual(pool.idleWorkers.length, 0); }); it('should queue tasks when thread limit is reached', async () => { @@ -54,9 +42,11 @@ describe('WorkerPool', () => { ok(Array.isArray(results)); strictEqual(results.length, 2); + + await pool.terminate(); }); - it('should run multiple tasks in parallel with runAll', async () => { + it('should run multiple tasks via individual run calls', async () => { const pool = new WorkerPool(workerPath, 2); const tasks = [ @@ -74,17 +64,158 @@ describe('WorkerPool', () => { }, ]; - const results = await pool.runAll(tasks); + const results = await Promise.all(tasks.map(task => pool.run(task))); ok(Array.isArray(results)); strictEqual(results.length, 2); + + await pool.terminate(); + }); + + it('should handle default thread count', () => { + const pool = new WorkerPool(workerPath); + + strictEqual(pool.threads, 1); + }); + + it('should accept URL for worker script', () => { + const url = new URL('./chunk-worker.mjs', import.meta.url); + const pool = new WorkerPool(url, 2); + + ok(pool.workerScript instanceof URL); + strictEqual(pool.threads, 2); + }); + + it('should process queued tasks after completion', async () => { + const pool = new WorkerPool(workerPath, 1); + + // Queue up 3 tasks with only 1 thread + const tasks = []; + + for (let i = 0; i < 3; i++) { + tasks.push( + pool.run({ + generatorName: 'ast-js', + fullInput: [], + itemIndices: [], + options: {}, + }) + ); + } + + // All should complete even with only 1 thread + const results = await Promise.all(tasks); + + strictEqual(results.length, 3); + + await pool.terminate(); + }); + + it('should reject on worker error with result.error', async () => { + const pool = new WorkerPool(workerPath, 1); + + // Using an invalid generator name should cause an error + await rejects(async () => { + await pool.run({ + generatorName: 'nonexistent-generator', + fullInput: [], + itemIndices: [0], + options: {}, + }); + }, Error); + + await pool.terminate(); + }); + + it('should handle concurrent tasks up to thread limit', async () => { + const pool = new WorkerPool(workerPath, 4); + + // Run 4 tasks concurrently (at thread limit) + const tasks = Array.from({ length: 4 }, () => + pool.run({ + generatorName: 'ast-js', + fullInput: [], + itemIndices: [], + options: {}, + }) + ); + + const results = await Promise.all(tasks); + + strictEqual(results.length, 4); + results.forEach(r => ok(Array.isArray(r))); + + await pool.terminate(); }); - it('should handle empty task array', async () => { + it('should return results correctly from workers', async () => { const pool = new WorkerPool(workerPath, 2); - const results = await pool.runAll([]); + const result = await pool.run({ + generatorName: 'ast-js', + fullInput: [], + itemIndices: [], + options: {}, + }); + + ok(Array.isArray(result)); + + await pool.terminate(); + }); + + it('should reuse workers for multiple tasks', async () => { + const pool = new WorkerPool(workerPath, 2); + + // Run first batch + await pool.run({ + generatorName: 'ast-js', + fullInput: [], + itemIndices: [], + options: {}, + }); + + // Workers should now be idle + strictEqual(pool.idleWorkers.length, 1); + strictEqual(pool.allWorkers.size, 1); + + // Run another task - should reuse idle worker + await pool.run({ + generatorName: 'ast-js', + fullInput: [], + itemIndices: [], + options: {}, + }); + + // Still same number of workers + strictEqual(pool.allWorkers.size, 1); + + await pool.terminate(); + }); + + it('should terminate all workers', async () => { + const pool = new WorkerPool(workerPath, 2); + + // Spawn some workers + await Promise.all([ + pool.run({ + generatorName: 'ast-js', + fullInput: [], + itemIndices: [], + options: {}, + }), + pool.run({ + generatorName: 'ast-js', + fullInput: [], + itemIndices: [], + options: {}, + }), + ]); + + strictEqual(pool.allWorkers.size, 2); + + await pool.terminate(); - deepStrictEqual(results, []); + strictEqual(pool.allWorkers.size, 0); + strictEqual(pool.idleWorkers.length, 0); }); }); diff --git a/src/threading/__tests__/parallel.test.mjs b/src/threading/__tests__/parallel.test.mjs index 3090e234..6ac9285b 100644 --- a/src/threading/__tests__/parallel.test.mjs +++ b/src/threading/__tests__/parallel.test.mjs @@ -1,20 +1,56 @@ -import { deepStrictEqual, ok, strictEqual } from 'node:assert'; +import { deepStrictEqual, ok, rejects, strictEqual } from 'node:assert'; import { describe, it } from 'node:test'; import WorkerPool from '../index.mjs'; import createParallelWorker from '../parallel.mjs'; +/** + * Helper to collect all results from an async generator. + * + * @template T + * @param {AsyncGenerator} generator + * @returns {Promise} + */ +async function collectStream(generator) { + const results = []; + + for await (const chunk of generator) { + results.push(...chunk); + } + + return results; +} + +/** + * Helper to collect chunks (not flattened) + * + * @template T + * @param {AsyncGenerator} generator + * @returns {Promise} + */ +async function collectChunks(generator) { + const chunks = []; + + for await (const chunk of generator) { + chunks.push(chunk); + } + + return chunks; +} + describe('createParallelWorker', () => { // Use relative path from WorkerPool's location (src/threading/) const workerPath = './chunk-worker.mjs'; - it('should create a ParallelWorker with map method', () => { + it('should create a ParallelWorker with stream method', async () => { const pool = new WorkerPool(workerPath, 2); const worker = createParallelWorker('metadata', pool, { threads: 2 }); ok(worker); - strictEqual(typeof worker.map, 'function'); + strictEqual(typeof worker.stream, 'function'); + + await pool.terminate(); }); it('should use main thread for single-threaded execution', async () => { @@ -22,33 +58,39 @@ describe('createParallelWorker', () => { const worker = createParallelWorker('ast-js', pool, { threads: 1 }); const items = []; - const results = await worker.map(items, items, {}); + const results = await collectStream(worker.stream(items, items, {})); ok(Array.isArray(results)); strictEqual(results.length, 0); + + await pool.terminate(); }); - it('should use main thread for small item counts', async () => { + it('should use main thread when threads is 1', async () => { const pool = new WorkerPool(workerPath, 4); - const worker = createParallelWorker('ast-js', pool, { threads: 4 }); + const worker = createParallelWorker('ast-js', pool, { threads: 1 }); const items = []; - const results = await worker.map(items, items, {}); + const results = await collectStream(worker.stream(items, items, {})); ok(Array.isArray(results)); strictEqual(results.length, 0); + + await pool.terminate(); }); - it('should chunk items for parallel processing', async () => { + it('should stream chunks for parallel processing', async () => { const pool = new WorkerPool(workerPath, 2); const worker = createParallelWorker('ast-js', pool, { threads: 2 }); const items = []; - const results = await worker.map(items, items, {}); + const results = await collectStream(worker.stream(items, items, {})); strictEqual(results.length, 0); ok(Array.isArray(results)); + + await pool.terminate(); }); it('should pass extra options to worker', async () => { @@ -58,9 +100,11 @@ describe('createParallelWorker', () => { const extra = { gitRef: 'main', customOption: 'value' }; const items = []; - const results = await worker.map(items, items, extra); + const results = await collectStream(worker.stream(items, items, extra)); ok(Array.isArray(results)); + + await pool.terminate(); }); it('should serialize and deserialize data correctly', async () => { @@ -69,17 +113,143 @@ describe('createParallelWorker', () => { const worker = createParallelWorker('ast-js', pool, { threads: 2 }); const items = []; - const results = await worker.map(items, items, {}); + const results = await collectStream(worker.stream(items, items, {})); ok(Array.isArray(results)); + + await pool.terminate(); }); it('should handle empty items array', async () => { const pool = new WorkerPool(workerPath, 2); const worker = createParallelWorker('ast-js', pool, { threads: 2 }); - const results = await worker.map([], [], {}); + const results = await collectStream(worker.stream([], [], {})); deepStrictEqual(results, []); + + await pool.terminate(); + }); + + it('should throw for generators without processChunk', async () => { + const pool = new WorkerPool(workerPath, 2); + + // 'json-simple' doesn't have processChunk + const worker = createParallelWorker('json-simple', pool, { + threads: 2, + chunkSize: 5, + }); + + // Non-empty items array to trigger processChunk check + const items = [{ file: { stem: 'test' }, tree: {} }]; + + await rejects( + async () => { + await collectStream(worker.stream(items, items, {})); + }, + { + message: /does not support chunk processing/, + } + ); + + await pool.terminate(); + }); + + it('should distribute items to multiple worker threads', async () => { + const pool = new WorkerPool(workerPath, 4); + + const worker = createParallelWorker('metadata', pool, { + threads: 4, + chunkSize: 20, // Large chunk size, but optimal calculation will use 1 per thread + }); + + // Create mock input that matches expected shape for metadata generator + const mockInput = [ + { + file: { stem: 'test1', basename: 'test1.md' }, + tree: { type: 'root', children: [] }, + }, + { + file: { stem: 'test2', basename: 'test2.md' }, + tree: { type: 'root', children: [] }, + }, + { + file: { stem: 'test3', basename: 'test3.md' }, + tree: { type: 'root', children: [] }, + }, + { + file: { stem: 'test4', basename: 'test4.md' }, + tree: { type: 'root', children: [] }, + }, + ]; + + const chunks = await collectChunks( + worker.stream(mockInput, mockInput, { typeMap: {} }) + ); + + // With 4 items and 4 threads, optimal chunk size is 1, so we get 4 chunks + strictEqual(chunks.length, 4); + + // Each chunk should be an array + for (const chunk of chunks) { + ok(Array.isArray(chunk)); + } + + await pool.terminate(); + }); + + it('should yield results as chunks complete', async () => { + const pool = new WorkerPool(workerPath, 2); + + const worker = createParallelWorker('metadata', pool, { + threads: 2, + chunkSize: 1, + }); + + const mockInput = [ + { + file: { stem: 'test1', basename: 'test1.md' }, + tree: { type: 'root', children: [] }, + }, + { + file: { stem: 'test2', basename: 'test2.md' }, + tree: { type: 'root', children: [] }, + }, + ]; + + const chunks = await collectChunks( + worker.stream(mockInput, mockInput, { typeMap: {} }) + ); + + // With 2 items and chunkSize 1, should get 2 chunks + strictEqual(chunks.length, 2); + + await pool.terminate(); + }); + + it('should work with single thread and items', async () => { + const pool = new WorkerPool(workerPath, 1); + + const worker = createParallelWorker('metadata', pool, { + threads: 1, + chunkSize: 5, + }); + + const mockInput = [ + { + file: { stem: 'test1', basename: 'test1.md' }, + tree: { type: 'root', children: [] }, + }, + ]; + + const chunks = await collectChunks( + worker.stream(mockInput, mockInput, { typeMap: {} }) + ); + + // Single thread mode yields one chunk + strictEqual(chunks.length, 1); + ok(Array.isArray(chunks[0])); + + await pool.terminate(); }); }); diff --git a/src/threading/chunk-worker.mjs b/src/threading/chunk-worker.mjs index 31112109..bbfc4012 100644 --- a/src/threading/chunk-worker.mjs +++ b/src/threading/chunk-worker.mjs @@ -1,13 +1,35 @@ -import { parentPort, workerData } from 'node:worker_threads'; +import { parentPort } from 'node:worker_threads'; import { allGenerators } from '../generators/index.mjs'; -const { generatorName, fullInput, itemIndices, options } = workerData; +/** + * Handles incoming work requests from the parent thread. + * Processes a chunk of items using the specified generator's processChunk method. + * + * @param {{ + * generatorName: string, + * fullInput: unknown[], + * itemIndices: number[], + * options: object + * }} opts - Task options from parent thread + * @returns {Promise} + */ +const handleWork = async opts => { + const { generatorName, fullInput, itemIndices, options } = opts; -const generator = allGenerators[generatorName]; + try { + const generator = allGenerators[generatorName]; -// Generators must implement processChunk for item-level parallelization -generator - .processChunk(fullInput, itemIndices, options) - .then(result => parentPort.postMessage(result)) - .catch(error => parentPort.postMessage({ error: error.message })); + const result = await generator.processChunk( + fullInput, + itemIndices, + options + ); + + parentPort.postMessage(result); + } catch (error) { + parentPort.postMessage({ error: error.message }); + } +}; + +parentPort.on('message', handleWork); diff --git a/src/threading/index.mjs b/src/threading/index.mjs index a6c19fd2..ca21ac2e 100644 --- a/src/threading/index.mjs +++ b/src/threading/index.mjs @@ -1,19 +1,58 @@ import { Worker } from 'node:worker_threads'; +import logger from '../logger/index.mjs'; + +const poolLogger = logger.child('WorkerPool'); + /** - * WorkerPool class to manage a pool of worker threads + * WorkerPool manages a pool of reusable Node.js worker threads for parallel processing. + * Workers are spawned on-demand and kept alive to process multiple tasks, avoiding + * the overhead of creating new workers for each task. + * + * Tasks are distributed to available workers. If all workers are busy, tasks are + * queued and processed in FIFO order as workers become free. + * + * @example + * const pool = new WorkerPool('./my-worker.mjs', 4); + * const result = await pool.run({ task: 'process', data: [1, 2, 3] }); */ export default class WorkerPool { - /** @private {SharedArrayBuffer} - Shared memory for active thread count */ - sharedBuffer = new SharedArrayBuffer(Int32Array.BYTES_PER_ELEMENT); - /** @private {Int32Array} - A typed array to access shared memory */ - activeThreads = new Int32Array(this.sharedBuffer); - /** @private {Array} - Queue of pending tasks */ + /** + * Pool of idle workers ready to accept tasks. + * @type {Worker[]} + */ + idleWorkers = []; + + /** + * Set of all spawned workers (for cleanup). + * @type {Set} + */ + allWorkers = new Set(); + + /** + * Queue of pending tasks waiting for available workers. + * Each entry contains { workerData, resolve, reject }. + * @type {Array<{ workerData: object, resolve: Function, reject: Function }>} + */ queue = []; /** - * @param {string | URL} workerScript - Path to the worker script (relative to this file or absolute URL) - * @param {number} threads - Maximum number of concurrent worker threads + * URL to the worker script file. + * @type {URL} + */ + workerScript; + + /** + * Maximum number of concurrent worker threads. + * @type {number} + */ + threads; + + /** + * Creates a new WorkerPool instance. + * + * @param {string | URL} workerScript - Path to worker script file (relative to this module or absolute URL) + * @param {number} [threads=1] - Maximum concurrent worker threads */ constructor(workerScript = './generator-worker.mjs', threads = 1) { this.workerScript = @@ -22,87 +61,173 @@ export default class WorkerPool { : new URL(workerScript, import.meta.url); this.threads = threads; + + poolLogger.debug(`WorkerPool initialized`, { threads, workerScript }); } /** - * Gets the current active thread count. - * @returns {number} The current active thread count. + * Spawns a new worker and sets up message handling. + * The worker will be reused for multiple tasks. + * + * @private + * @returns {Worker} The newly spawned worker */ - getActiveThreadCount() { - return Atomics.load(this.activeThreads, 0); + spawnWorker() { + const worker = new Worker(this.workerScript); + + this.allWorkers.add(worker); + + worker.on('message', result => { + // Get the current task before clearing it + const currentTask = worker.currentTask; + + worker.currentTask = null; + + // Resolve/reject the completed task first + if (currentTask) { + if (result?.error) { + currentTask.reject(new Error(result.error)); + } else { + currentTask.resolve(result); + } + } + + // Mark worker as idle and process any queued work + this.idleWorkers.push(worker); + this.processQueue(); + }); + + worker.on('error', err => { + poolLogger.debug(`Worker error`, { error: err.message }); + + // Remove failed worker from pool + this.allWorkers.delete(worker); + + const idx = this.idleWorkers.indexOf(worker); + + if (idx !== -1) { + this.idleWorkers.splice(idx, 1); + } + + // Reject current task if any + if (worker.currentTask) { + worker.currentTask.reject(err); + + worker.currentTask = null; + } + }); + + return worker; } /** - * Changes the active thread count atomically by a given delta. - * @param {number} delta - The value to increment or decrement the active thread count by. + * Executes a task on a specific worker. + * + * @private + * @param {Worker} worker - Worker to execute the task + * @param {object} workerData - Data to send to the worker + * @param {Function} resolve - Promise resolve function + * @param {Function} reject - Promise reject function */ - changeActiveThreadCount(delta) { - Atomics.add(this.activeThreads, 0, delta); + executeTask(worker, workerData, resolve, reject) { + worker.currentTask = { resolve, reject }; + + worker.postMessage(workerData); } /** - * Runs a task in a worker thread with the given data. - * @param {Object} workerData - Data to pass to the worker thread - * @returns {Promise} Resolves with the worker result, or rejects with an error + * Runs a task in a worker thread. If all workers are busy, the task + * is queued and executed when a worker becomes available. + * + * Workers are reused across tasks for efficiency. + * + * @template T + * @param {object} workerData - Data to pass to the worker thread + * @param {string} workerData.generatorName - Name of the generator to run + * @param {unknown} workerData.fullInput - Full input data for context + * @param {number[]} workerData.itemIndices - Indices of items to process + * @param {object} workerData.options - Generator options + * @returns {Promise} Resolves with the worker result, rejects on error */ run(workerData) { return new Promise((resolve, reject) => { - /** - * Runs the worker thread and handles the result or error. - * @private - */ - const run = () => { - this.changeActiveThreadCount(1); - - const worker = new Worker(this.workerScript, { workerData }); - - worker.on('message', result => { - this.changeActiveThreadCount(-1); - this.processQueue(); - - if (result?.error) { - reject(new Error(result.error)); - } else { - resolve(result); - } - }); + // Always queue the task first + this.queue.push({ workerData, resolve, reject }); - worker.on('error', err => { - this.changeActiveThreadCount(-1); - this.processQueue(); - reject(err); - }); - }; - - if (this.getActiveThreadCount() >= this.threads) { - this.queue.push(run); - } else { - run(); - } + // Then try to process the queue + this.processQueue(); }); } /** - * Run multiple tasks in parallel, distributing across worker threads. - * @template T, R - * @param {T[]} tasks - Array of task data to process - * @returns {Promise} Results in same order as input tasks - */ - async runAll(tasks) { - return Promise.all(tasks.map(task => this.run(task))); - } - - /** - * Process the worker thread queue to start the next available task. + * Processes queued tasks by assigning them to available or new workers. + * Spawns all needed workers in parallel to minimize startup latency. + * * @private */ processQueue() { - if (this.queue.length > 0 && this.getActiveThreadCount() < this.threads) { - const next = this.queue.shift(); + // First, assign tasks to any idle workers + while (this.queue.length > 0 && this.idleWorkers.length > 0) { + const worker = this.idleWorkers.pop(); + const { workerData, resolve, reject } = this.queue.shift(); + + poolLogger.debug(`Task started (reusing worker)`, { + idleWorkers: this.idleWorkers.length, + totalWorkers: this.allWorkers.size, + queueSize: this.queue.length, + }); - if (next) { - next(); + this.executeTask(worker, workerData, resolve, reject); + } + + // Calculate how many new workers we need + const workersNeeded = Math.min( + this.queue.length, + this.threads - this.allWorkers.size + ); + + if (workersNeeded > 0) { + poolLogger.debug(`Spawning workers in parallel`, { + workersNeeded, + currentWorkers: this.allWorkers.size, + maxThreads: this.threads, + queueSize: this.queue.length, + }); + + // Spawn all needed workers in parallel (don't await, just fire them off) + for (let i = 0; i < workersNeeded; i++) { + const { workerData, resolve, reject } = this.queue.shift(); + + // Use setImmediate to spawn workers concurrently rather than blocking + setImmediate(() => { + const worker = this.spawnWorker(); + + this.executeTask(worker, workerData, resolve, reject); + }); } } + + if (this.queue.length > 0) { + poolLogger.debug(`Tasks queued (waiting for workers)`, { + queueSize: this.queue.length, + totalWorkers: this.allWorkers.size, + }); + } + } + + /** + * Terminates all workers in the pool. + * Should be called when the pool is no longer needed. + * + * @returns {Promise} + */ + async terminate() { + const terminations = [...this.allWorkers].map(worker => worker.terminate()); + + await Promise.all(terminations); + + this.allWorkers.clear(); + this.idleWorkers = []; + this.queue = []; } } diff --git a/src/threading/parallel.mjs b/src/threading/parallel.mjs index 818a825f..9a42c48b 100644 --- a/src/threading/parallel.mjs +++ b/src/threading/parallel.mjs @@ -1,19 +1,82 @@ 'use strict'; import { allGenerators } from '../generators/index.mjs'; -import { - createIndexChunks, - createIndices, - yieldAsCompleted, -} from '../streaming.mjs'; +import logger from '../logger/index.mjs'; + +const parallelLogger = logger.child('parallel'); + +/** + * Splits a count of items into chunks of specified size. + * + * @param {number} count - Total number of items + * @param {number} size - Maximum items per chunk + * @returns {number[][]} Array of index arrays, each representing a chunk + */ +const createIndexChunks = (count, size) => { + const chunks = []; + + for (let start = 0; start < count; start += size) { + const end = Math.min(start + size, count); + const chunk = []; + + for (let i = start; i < end; i++) { + chunk.push(i); + } + + chunks.push(chunk); + } + + return chunks; +}; /** - * Creates a ParallelWorker that uses real Node.js Worker threads - * for parallel processing of items. + * Creates an array of sequential indices from 0 to count-1. * - * @param {string} generatorName - Name of the generator (for chunk processing) - * @param {import('./index.mjs').default} pool - WorkerPool instance for spawning workers - * @param {object} options - Generator options + * @param {number} count - Number of indices to create + * @returns {number[]} Array of indices [0, 1, 2, ..., count-1] + */ +const createIndices = count => Array.from({ length: count }, (_, i) => i); + +/** + * Yields results from an array of promises as they complete. + * Results are yielded in completion order, not input order. + * + * @template T + * @param {Promise[]} promises - Array of promises to race + * @yields {T} Results as they complete + */ +async function* yieldAsCompleted(promises) { + if (promises.length === 0) { + return; + } + + // Wrap each promise to track completion and remove from pending set + const pending = new Set(); + + for (const promise of promises) { + const tagged = promise.then(result => { + pending.delete(tagged); + + return result; + }); + + pending.add(tagged); + } + + // Yield results as each promise completes + while (pending.size > 0) { + yield await Promise.race(pending); + } +} + +/** + * Creates a ParallelWorker that uses Node.js Worker threads for parallel + * processing of items. The worker distributes work across multiple threads + * and streams results as chunks complete. + * + * @param {string} generatorName - Name of the generator for chunk processing + * @param {import('./index.mjs').default} pool - WorkerPool instance + * @param {Partial} options - Generator options * @returns {ParallelWorker} */ export default function createParallelWorker(generatorName, pool, options) { @@ -22,8 +85,10 @@ export default function createParallelWorker(generatorName, pool, options) { const generator = allGenerators[generatorName]; /** - * Strips non-serializable properties from options for worker transfer + * Strips non-serializable properties from options for worker transfer. + * * @param {object} extra - Extra options to merge + * @returns {object} Serializable options object */ const serializeOptions = extra => { const serialized = { ...options, ...extra }; @@ -35,70 +100,15 @@ export default function createParallelWorker(generatorName, pool, options) { return { /** - * Process items in parallel using real worker threads. - * Items are split into chunks, each chunk processed by a separate worker. - * Waits for all chunks to complete before returning. + * Processes items in parallel and yields each chunk's results as they complete. + * Enables pipeline parallelism where downstream generators can start processing + * results while upstream chunks are still being processed. * * @template T, R - * @param {T[]} items - Items to process (must be serializable) + * @param {T[]} items - Items to process (determines chunk distribution) * @param {T[]} fullInput - Full input data for context rebuilding in workers - * @param {object} extra - Generator-specific context (e.g. apiTemplate, parsedSideNav) - * @returns {Promise} - Results in same order as input items - */ - async map(items, fullInput, extra) { - const itemCount = items.length; - - if (itemCount === 0) { - return []; - } - - if (!generator.processChunk) { - throw new Error( - `Generator "${generatorName}" does not support chunk processing` - ); - } - - // For single thread or small workloads - run in main thread - if (threads <= 1 || itemCount <= 2) { - const indices = createIndices(itemCount); - - return generator.processChunk(fullInput, indices, { - ...options, - ...extra, - }); - } - - // Divide items into chunks based on chunkSize - const indexChunks = createIndexChunks(itemCount, chunkSize); - - // Process chunks in parallel using worker threads - const chunkResults = await pool.runAll( - indexChunks.map(indices => ({ - generatorName, - fullInput, - itemIndices: indices, - options: serializeOptions(extra), - })) - ); - - // Flatten results - return chunkResults.flat(); - }, - - /** - * Process items in parallel and yield each chunk's results as they complete. - * This enables pipeline parallelism: downstream generators can start processing - * chunk results while upstream chunks are still being processed. - * - * Use this when the consuming generator also supports chunking, allowing it - * to begin work immediately on each completed chunk rather than waiting - * for all chunks to finish. - * - * @template T, R - * @param {T[]} items - Items to process (must be serializable) - * @param {T[]} fullInput - Full input data for context rebuilding in workers - * @param {object} extra - Generator-specific context - * @yields {R[]} - Each chunk's results as they complete + * @param {object} extra - Generator-specific context (e.g., apiTemplate) + * @yields {R[]} Each chunk's results as they complete */ async *stream(items, fullInput, extra) { const itemCount = items.length; @@ -113,8 +123,12 @@ export default function createParallelWorker(generatorName, pool, options) { ); } - // For single thread or small workloads - yield single result - if (threads <= 1 || itemCount <= 2) { + // Single-threaded mode: process directly in main thread + if (threads <= 1) { + parallelLogger.debug(`Processing ${itemCount} items in main thread`, { + generator: generatorName, + }); + const indices = createIndices(itemCount); const result = await generator.processChunk(fullInput, indices, { @@ -127,10 +141,22 @@ export default function createParallelWorker(generatorName, pool, options) { return; } - // Divide items into chunks based on chunkSize - const indexChunks = createIndexChunks(itemCount, chunkSize); + // Multi-threaded mode: distribute work across worker threads + // Calculate optimal chunk size to maximize thread utilization + // Use provided chunkSize as maximum, but create at least as many chunks as threads + const optimalChunkSize = Math.max(1, Math.ceil(itemCount / threads)); + const effectiveChunkSize = Math.min(chunkSize, optimalChunkSize); + const indexChunks = createIndexChunks(itemCount, effectiveChunkSize); + + parallelLogger.debug( + `Distributing ${itemCount} items across ${threads} threads`, + { + generator: generatorName, + chunks: indexChunks.length, + chunkSize: effectiveChunkSize, + } + ); - // Create all chunk promises upfront for parallel execution const chunkPromises = indexChunks.map(indices => pool.run({ generatorName, @@ -140,8 +166,19 @@ export default function createParallelWorker(generatorName, pool, options) { }) ); - // Yield chunks as they complete - yield* yieldAsCompleted(chunkPromises); + // Yield results as each chunk completes + let completedChunks = 0; + + for await (const result of yieldAsCompleted(chunkPromises)) { + completedChunks++; + + parallelLogger.debug( + `Chunk ${completedChunks}/${indexChunks.length} completed`, + { generator: generatorName } + ); + + yield result; + } }, }; } From 2c6a8ac3956e5735331a0713854d379dd3b266a0 Mon Sep 17 00:00:00 2001 From: Claudio Wunder Date: Mon, 8 Dec 2025 19:57:21 +0100 Subject: [PATCH 03/25] fix: fixed correct awaiting and spawning --- src/__tests__/generators.test.mjs | 59 +++++---- src/generators.mjs | 28 ++++- src/generators/metadata/index.mjs | 21 ++-- src/generators/types.d.ts | 14 ++- src/generators/web/index.mjs | 68 ++++------- src/logger/__tests__/logger.test.mjs | 73 +++++++++++ .../__tests__/transports/console.test.mjs | 113 +++++++++++++++++ src/threading/__tests__/WorkerPool.test.mjs | 115 +++++++++++++++++- src/threading/__tests__/parallel.test.mjs | 30 +++++ src/threading/index.mjs | 33 +++-- src/threading/parallel.mjs | 20 ++- 11 files changed, 471 insertions(+), 103 deletions(-) diff --git a/src/__tests__/generators.test.mjs b/src/__tests__/generators.test.mjs index 9b599ca1..c9a91d8a 100644 --- a/src/__tests__/generators.test.mjs +++ b/src/__tests__/generators.test.mjs @@ -2,7 +2,6 @@ import { ok, strictEqual } from 'node:assert'; import { describe, it } from 'node:test'; import createGenerator from '../generators.mjs'; -import { isAsyncGenerator } from '../streaming.mjs'; describe('createGenerator', () => { // Simple mock input for testing @@ -37,88 +36,99 @@ describe('createGenerator', () => { it('should return the ast input directly when generators list is empty', async () => { const { runGenerators } = createGenerator(mockInput); - const result = await runGenerators({ + const results = await runGenerators({ ...mockOptions, generators: ['ast'], }); - // The 'ast' key should resolve to the original input - ok(result); + // Returns array of results, first element is the 'ast' result + ok(Array.isArray(results)); + strictEqual(results.length, 1); + ok(results[0]); }); it('should run metadata generator', async () => { const { runGenerators } = createGenerator(mockInput); - const result = await runGenerators({ + const results = await runGenerators({ ...mockOptions, generators: ['metadata'], }); - // metadata returns an async generator - ok(isAsyncGenerator(result)); + // Returns array with one element - the collected metadata array + ok(Array.isArray(results)); + strictEqual(results.length, 1); + ok(Array.isArray(results[0])); }); it('should handle generator with dependency', async () => { const { runGenerators } = createGenerator(mockInput); // legacy-html depends on metadata - const result = await runGenerators({ + const results = await runGenerators({ ...mockOptions, generators: ['legacy-html'], }); - // Should complete without error - ok(result !== undefined); + // Should complete without error - returns array of results + ok(Array.isArray(results)); + strictEqual(results.length, 1); }); it('should skip already scheduled generators', async () => { const { runGenerators } = createGenerator(mockInput); // Running with ['metadata', 'metadata'] should skip the second - const result = await runGenerators({ + const results = await runGenerators({ ...mockOptions, generators: ['metadata', 'metadata'], }); - ok(isAsyncGenerator(result)); + // Returns array with two elements (same result cached for both) + ok(Array.isArray(results)); + strictEqual(results.length, 2); }); it('should handle multiple generators in sequence', async () => { const { runGenerators } = createGenerator(mockInput); - // Run metadata twice - the system should skip the already scheduled one - // Avoid json-simple since it writes to disk - const result = await runGenerators({ + // Run metadata - just one generator + const results = await runGenerators({ ...mockOptions, generators: ['metadata'], }); - // Result should be from the last generator - ok(result !== undefined); + // Returns array of results + ok(Array.isArray(results)); + strictEqual(results.length, 1); }); it('should collect async generator results for dependents', async () => { const { runGenerators } = createGenerator(mockInput); // legacy-json depends on metadata (async generator) - const result = await runGenerators({ + const results = await runGenerators({ ...mockOptions, generators: ['legacy-json'], }); - ok(result !== undefined); + ok(Array.isArray(results)); + strictEqual(results.length, 1); }); it('should use multiple threads when specified', async () => { const { runGenerators } = createGenerator(mockInput); - const result = await runGenerators({ + const results = await runGenerators({ ...mockOptions, threads: 4, generators: ['metadata'], }); - ok(isAsyncGenerator(result)); + // Returns array of results + ok(Array.isArray(results)); + strictEqual(results.length, 1); + ok(Array.isArray(results[0])); }); it('should pass options to generators', async () => { @@ -126,12 +136,15 @@ describe('createGenerator', () => { const customTypeMap = { TestType: 'https://example.com/TestType' }; - const result = await runGenerators({ + const results = await runGenerators({ ...mockOptions, typeMap: customTypeMap, generators: ['metadata'], }); - ok(isAsyncGenerator(result)); + // Returns array of results + ok(Array.isArray(results)); + strictEqual(results.length, 1); + ok(Array.isArray(results[0])); }); }); diff --git a/src/generators.mjs b/src/generators.mjs index 49ca9ed5..37663314 100644 --- a/src/generators.mjs +++ b/src/generators.mjs @@ -120,7 +120,7 @@ const createGenerator = input => { * their dependencies to complete. * * @param {GeneratorOptions} options - Generator runtime options - * @returns {Promise} Result of the last generator in the pipeline + * @returns {Promise} Results of all requested generators */ const runGenerators = async options => { const { generators, threads } = options; @@ -138,15 +138,31 @@ const createGenerator = input => { // Schedule all generators using the shared pool scheduleGenerators(options, sharedPool); - // Wait for the last generator's result - const result = await cachedGenerators[generators[generators.length - 1]]; + // Wait for ALL requested generators to complete (not just the last one) + const results = []; - // Terminate workers after all work is complete - await sharedPool.terminate(); + for (const generatorName of generators) { + let result = await cachedGenerators[generatorName]; + + // If the generator returns an async generator, consume it + // to ensure all side effects (file writes, etc.) complete + if (isAsyncGenerator(result)) { + generatorsLogger.debug( + `Consuming async generator output from "${generatorName}"` + ); + + result = await streamingCache.getOrCollect(generatorName, result); + } + + results.push(result); + } + + // Terminate workers after all work is complete (fire-and-forget) + sharedPool.terminate(); sharedPool = null; - return result; + return results; }; return { runGenerators }; diff --git a/src/generators/metadata/index.mjs b/src/generators/metadata/index.mjs index 34d235d1..2fd88380 100644 --- a/src/generators/metadata/index.mjs +++ b/src/generators/metadata/index.mjs @@ -27,15 +27,18 @@ export default { * @param {Partial>} options - Serializable options * @returns {Promise} Metadata entries for processed files */ - async processChunk(fullInput, itemIndices, { typeMap }) { - const results = []; - - for (const idx of itemIndices) { - results.push(...parseApiDoc(fullInput[idx], typeMap)); - } - - return results; - }, + processChunk: Object.assign( + async (fullInput, itemIndices, { typeMap }) => { + const results = []; + + for (const idx of itemIndices) { + results.push(...parseApiDoc(fullInput[idx], typeMap)); + } + + return results; + }, + { sliceInput: true } // Only needs individual items, not full context + ), /** * @param {Input} inputs diff --git a/src/generators/types.d.ts b/src/generators/types.d.ts index caab0826..c899c96c 100644 --- a/src/generators/types.d.ts +++ b/src/generators/types.d.ts @@ -131,10 +131,20 @@ declare global { * @param options - Generator options (without worker, which isn't serializable) * @returns Array of results for the processed items */ - processChunk?: ( + processChunk?: (( fullInput: I, itemIndices: number[], options: Partial> - ) => Promise; + ) => Promise) & { + /** + * When true, only the items at the specified indices are sent to workers + * instead of the full input array. This reduces serialization overhead + * for generators that don't need full context to process individual items. + * + * Set this to true when processChunk only accesses `fullInput[idx]` for + * each index in itemIndices, and doesn't need the full array for context. + */ + sliceInput?: boolean; + }; } } diff --git a/src/generators/web/index.mjs b/src/generators/web/index.mjs index 2f072381..6521b84b 100644 --- a/src/generators/web/index.mjs +++ b/src/generators/web/index.mjs @@ -13,6 +13,9 @@ import { processJSXEntries } from './utils/processing.mjs'; * - Client-side JavaScript with code splitting * - Bundled CSS styles * + * Note: This generator does NOT support streaming/chunked processing because + * processJSXEntries needs all entries together to generate code-split bundles. + * * @type {GeneratorMetadata} */ export default { @@ -21,24 +24,6 @@ export default { description: 'Generates HTML/CSS/JS bundles from JSX AST entries', dependsOn: 'jsx-ast', - /** - * Process a chunk of JSX AST entries. - * This simply passes through the entries for aggregation in the main generate function. - * The actual processing happens in processJSXEntries which needs all entries together. - * - * @param {import('../jsx-ast/utils/buildContent.mjs').JSXContent[]} fullInput - * @param {number[]} itemIndices - */ - processChunk(fullInput, itemIndices) { - const results = []; - - for (const idx of itemIndices) { - results.push(fullInput[idx]); - } - - return results; - }, - /** * Main generation function that processes JSX AST entries into web bundles. * @@ -46,44 +31,31 @@ export default { * @param {Partial} options - Generator options. * @param {string} [options.output] - Output directory for generated files. * @param {string} options.version - Documentation version string. - * @returns {AsyncGenerator>} + * @returns {Promise} */ - async *generate(entries, { output, version, worker }) { - // Start loading template while chunks stream in (parallel I/O) - const templatePromise = readFile( + async generate(entries, { output, version }) { + const template = await readFile( new URL('template.html', import.meta.url), 'utf-8' ); - // Collect all chunks as they stream in from jsx-ast - const allEntries = []; + // Create AST builders for server and client programs + const astBuilders = createASTBuilder(); - for await (const chunkResult of worker.stream(entries, entries, {})) { - allEntries.push(...chunkResult); + // Create require function for resolving external packages in server code + const requireFn = createRequire(import.meta.url); - yield chunkResult; - } + // Process all entries: convert JSX to HTML/CSS/JS + const { results, css, chunks } = await processJSXEntries( + entries, + template, + astBuilders, + requireFn, + { version } + ); - // Now that all chunks are collected, process them together - // (processJSXEntries needs all entries to generate code-split bundles) + // Process all entries together (required for code-split bundles) if (output) { - const template = await templatePromise; - - // Create AST builders for server and client programs - const astBuilders = createASTBuilder(); - - // Create require function for resolving external packages in server code - const requireFn = createRequire(import.meta.url); - - // Process all entries: convert JSX to HTML/CSS/JS - const { results, css, chunks } = await processJSXEntries( - allEntries, - template, - astBuilders, - requireFn, - { version } - ); - // Write HTML files for (const { html, api } of results) { await writeFile(join(output, `${api}.html`), html, 'utf-8'); @@ -97,5 +69,7 @@ export default { // Write CSS bundle await writeFile(join(output, 'styles.css'), css, 'utf-8'); } + + return results; }, }; diff --git a/src/logger/__tests__/logger.test.mjs b/src/logger/__tests__/logger.test.mjs index d00d9982..1673b453 100644 --- a/src/logger/__tests__/logger.test.mjs +++ b/src/logger/__tests__/logger.test.mjs @@ -306,5 +306,78 @@ describe('createLogger', () => { child.error('Error message'); strictEqual(transport.mock.callCount(), 3); }); + + it('should propagate to nested child loggers', t => { + const transport = t.mock.fn(); + + const logger = createLogger(transport, LogLevel.error); + const child1 = logger.child('child1'); + const child2 = child1.child('child2'); + const child3 = child2.child('child3'); + + // None should log debug initially + logger.debug('root debug'); + child1.debug('child1 debug'); + child2.debug('child2 debug'); + child3.debug('child3 debug'); + strictEqual(transport.mock.callCount(), 0); + + // Change root to debug level + logger.setLogLevel(LogLevel.debug); + + // All should now log debug + child1.debug('child1 debug after'); + strictEqual(transport.mock.callCount(), 1); + + child2.debug('child2 debug after'); + strictEqual(transport.mock.callCount(), 2); + + child3.debug('child3 debug after'); + strictEqual(transport.mock.callCount(), 3); + }); + + it('should propagate to multiple children at same level', t => { + const transport = t.mock.fn(); + + const logger = createLogger(transport, LogLevel.error); + const childA = logger.child('childA'); + const childB = logger.child('childB'); + const childC = logger.child('childC'); + + // None should log info + childA.info('A info'); + childB.info('B info'); + childC.info('C info'); + strictEqual(transport.mock.callCount(), 0); + + // Change root to info + logger.setLogLevel(LogLevel.info); + + // All children should now log info + childA.info('A info after'); + strictEqual(transport.mock.callCount(), 1); + + childB.info('B info after'); + strictEqual(transport.mock.callCount(), 2); + + childC.info('C info after'); + strictEqual(transport.mock.callCount(), 3); + }); + + it('should ignore invalid string level names', t => { + const transport = t.mock.fn(); + + const logger = createLogger(transport, LogLevel.info); + + // Try to set invalid level + logger.setLogLevel('invalid'); + + // Should still log at info level + logger.info('Info message'); + strictEqual(transport.mock.callCount(), 1); + + logger.debug('Debug message'); + strictEqual(transport.mock.callCount(), 1); // Debug should be filtered + }); }); }); diff --git a/src/logger/__tests__/transports/console.test.mjs b/src/logger/__tests__/transports/console.test.mjs index cf346707..0cf3bf14 100644 --- a/src/logger/__tests__/transports/console.test.mjs +++ b/src/logger/__tests__/transports/console.test.mjs @@ -219,4 +219,117 @@ describe('console', () => { '\n', ]); }); + + it('should print inline metadata in magenta', t => { + process.env.FORCE_COLOR = '1'; + + t.mock.timers.enable({ apis: ['Date'] }); + + const fn = t.mock.method(process.stdout, 'write'); + + fn.mock.mockImplementation(() => {}); + + console({ + level: LogLevel.info, + message: 'Test message', + metadata: { + threads: 4, + generator: 'metadata', + }, + timestamp: Date.now(), + }); + + const callsArgs = process.stdout.write.mock.calls.map( + call => call.arguments[0] + ); + + strictEqual(process.stdout.write.mock.callCount(), 5); + deepStrictEqual(callsArgs, [ + '[00:00:00.000]', + ' \x1B[32mINFO\x1B[39m', + ': Test message', + ' \x1B[35m{"threads":4,"generator":"metadata"}\x1B[39m', + '\n', + ]); + }); + + it('should not print metadata block if only file/stack present', t => { + process.env.FORCE_COLOR = '1'; + + t.mock.timers.enable({ apis: ['Date'] }); + + const fn = t.mock.method(process.stdout, 'write'); + + fn.mock.mockImplementation(() => {}); + + console({ + level: LogLevel.info, + message: 'Test message', + metadata: { + file: { + path: 'test.md', + }, + stack: 'Error: test\n at test.mjs:1:1', + }, + timestamp: Date.now(), + }); + + const callsArgs = process.stdout.write.mock.calls.map( + call => call.arguments[0] + ); + + // Should have: timestamp, level, message, file path, newline, stack + // But NOT a metadata JSON block (since only file/stack are present) + strictEqual(process.stdout.write.mock.callCount(), 6); + deepStrictEqual(callsArgs, [ + '[00:00:00.000]', + ' \x1B[32mINFO\x1B[39m', + ': Test message', + ' at test.md', + '\n', + 'Error: test\n at test.mjs:1:1', + ]); + }); + + it('should print both file info and extra metadata', t => { + process.env.FORCE_COLOR = '1'; + + t.mock.timers.enable({ apis: ['Date'] }); + + const fn = t.mock.method(process.stdout, 'write'); + + fn.mock.mockImplementation(() => {}); + + console({ + level: LogLevel.debug, + message: 'Processing chunk', + metadata: { + file: { + path: 'api.md', + position: { + start: { line: 10 }, + end: { line: 20 }, + }, + }, + chunkId: 3, + itemCount: 15, + }, + timestamp: Date.now(), + }); + + const callsArgs = process.stdout.write.mock.calls.map( + call => call.arguments[0] + ); + + strictEqual(process.stdout.write.mock.callCount(), 7); + deepStrictEqual(callsArgs, [ + '[00:00:00.000]', + ' \x1B[34mDEBUG\x1B[39m', + ': Processing chunk', + ' at api.md', + '(10:20)', + ' \x1B[35m{"chunkId":3,"itemCount":15}\x1B[39m', + '\n', + ]); + }); }); diff --git a/src/threading/__tests__/WorkerPool.test.mjs b/src/threading/__tests__/WorkerPool.test.mjs index dda1a31f..a0b4e41b 100644 --- a/src/threading/__tests__/WorkerPool.test.mjs +++ b/src/threading/__tests__/WorkerPool.test.mjs @@ -1,4 +1,4 @@ -import { ok, rejects, strictEqual } from 'node:assert'; +import { deepStrictEqual, ok, rejects, strictEqual } from 'node:assert'; import { describe, it } from 'node:test'; import WorkerPool from '../index.mjs'; @@ -218,4 +218,117 @@ describe('WorkerPool', () => { strictEqual(pool.allWorkers.size, 0); strictEqual(pool.idleWorkers.length, 0); }); + + it('should clear queue on terminate', async () => { + const pool = new WorkerPool(workerPath, 1); + + // Start one task to occupy the single worker + const runningTask = pool.run({ + generatorName: 'ast-js', + fullInput: [], + itemIndices: [], + options: {}, + }); + + // Queue more tasks than threads available + pool.run({ + generatorName: 'ast-js', + fullInput: [], + itemIndices: [], + options: {}, + }); + + pool.run({ + generatorName: 'ast-js', + fullInput: [], + itemIndices: [], + options: {}, + }); + + // Wait for first task to finish + await runningTask; + + // Terminate should clear any remaining queue + await pool.terminate(); + + strictEqual(pool.queue.length, 0); + }); + + it('should handle multiple terminates gracefully', async () => { + const pool = new WorkerPool(workerPath, 2); + + await pool.run({ + generatorName: 'ast-js', + fullInput: [], + itemIndices: [], + options: {}, + }); + + await pool.terminate(); + await pool.terminate(); // Second terminate should not throw + + strictEqual(pool.allWorkers.size, 0); + }); + + it('should spawn workers up to thread limit only', async () => { + const pool = new WorkerPool(workerPath, 2); + + // Queue 4 tasks with limit of 2 threads + const tasks = Array.from({ length: 4 }, () => + pool.run({ + generatorName: 'ast-js', + fullInput: [], + itemIndices: [], + options: {}, + }) + ); + + await Promise.all(tasks); + + // After all tasks complete, should have at most 2 workers + ok(pool.allWorkers.size <= 2); + + await pool.terminate(); + }); + + it('should process tasks in FIFO order when queued', async () => { + const pool = new WorkerPool(workerPath, 1); + + const order = []; + + // Queue 3 tasks with single thread + const task1 = pool + .run({ + generatorName: 'ast-js', + fullInput: [], + itemIndices: [], + options: {}, + }) + .then(() => order.push(1)); + + const task2 = pool + .run({ + generatorName: 'ast-js', + fullInput: [], + itemIndices: [], + options: {}, + }) + .then(() => order.push(2)); + + const task3 = pool + .run({ + generatorName: 'ast-js', + fullInput: [], + itemIndices: [], + options: {}, + }) + .then(() => order.push(3)); + + await Promise.all([task1, task2, task3]); + + // Tasks should complete in order they were queued + deepStrictEqual(order, [1, 2, 3]); + + await pool.terminate(); + }); }); diff --git a/src/threading/__tests__/parallel.test.mjs b/src/threading/__tests__/parallel.test.mjs index 6ac9285b..f09cfca6 100644 --- a/src/threading/__tests__/parallel.test.mjs +++ b/src/threading/__tests__/parallel.test.mjs @@ -252,4 +252,34 @@ describe('createParallelWorker', () => { await pool.terminate(); }); + + it('should use sliceInput for metadata generator', async () => { + const pool = new WorkerPool(workerPath, 2); + + // metadata generator also has sliceInput = true + const worker = createParallelWorker('metadata', pool, { + threads: 2, + chunkSize: 1, + }); + + const mockInput = [ + { + file: { stem: 'test1', basename: 'test1.md' }, + tree: { type: 'root', children: [] }, + }, + { + file: { stem: 'test2', basename: 'test2.md' }, + tree: { type: 'root', children: [] }, + }, + ]; + + const chunks = await collectChunks( + worker.stream(mockInput, mockInput, { typeMap: {} }) + ); + + // Should process both items + strictEqual(chunks.length, 2); + + await pool.terminate(); + }); }); diff --git a/src/threading/index.mjs b/src/threading/index.mjs index ca21ac2e..91b58a6f 100644 --- a/src/threading/index.mjs +++ b/src/threading/index.mjs @@ -29,6 +29,12 @@ export default class WorkerPool { */ allWorkers = new Set(); + /** + * Number of workers currently being spawned (to prevent over-spawning). + * @type {number} + */ + spawningCount = 0; + /** * Queue of pending tasks waiting for available workers. * Each entry contains { workerData, resolve, reject }. @@ -169,9 +175,11 @@ export default class WorkerPool { // First, assign tasks to any idle workers while (this.queue.length > 0 && this.idleWorkers.length > 0) { const worker = this.idleWorkers.pop(); + const { workerData, resolve, reject } = this.queue.shift(); poolLogger.debug(`Task started (reusing worker)`, { + generator: workerData.generatorName, idleWorkers: this.idleWorkers.length, totalWorkers: this.allWorkers.size, queueSize: this.queue.length, @@ -180,16 +188,19 @@ export default class WorkerPool { this.executeTask(worker, workerData, resolve, reject); } - // Calculate how many new workers we need + // Calculate how many new workers we need (account for workers being spawned) + const totalPendingWorkers = this.allWorkers.size + this.spawningCount; + const workersNeeded = Math.min( this.queue.length, - this.threads - this.allWorkers.size + this.threads - totalPendingWorkers ); if (workersNeeded > 0) { poolLogger.debug(`Spawning workers in parallel`, { workersNeeded, currentWorkers: this.allWorkers.size, + spawning: this.spawningCount, maxThreads: this.threads, queueSize: this.queue.length, }); @@ -198,8 +209,13 @@ export default class WorkerPool { for (let i = 0; i < workersNeeded; i++) { const { workerData, resolve, reject } = this.queue.shift(); + // Track that we're spawning a worker + this.spawningCount++; + // Use setImmediate to spawn workers concurrently rather than blocking setImmediate(() => { + this.spawningCount--; + const worker = this.spawnWorker(); this.executeTask(worker, workerData, resolve, reject); @@ -217,17 +233,16 @@ export default class WorkerPool { /** * Terminates all workers in the pool. - * Should be called when the pool is no longer needed. - * - * @returns {Promise} + * Kills workers immediately without waiting for graceful shutdown. */ - async terminate() { - const terminations = [...this.allWorkers].map(worker => worker.terminate()); - - await Promise.all(terminations); + terminate() { + for (const worker of this.allWorkers) { + worker.terminate(); + } this.allWorkers.clear(); this.idleWorkers = []; this.queue = []; + this.spawningCount = 0; } } diff --git a/src/threading/parallel.mjs b/src/threading/parallel.mjs index 9a42c48b..d54f8d1e 100644 --- a/src/threading/parallel.mjs +++ b/src/threading/parallel.mjs @@ -157,14 +157,22 @@ export default function createParallelWorker(generatorName, pool, options) { } ); - const chunkPromises = indexChunks.map(indices => - pool.run({ + const chunkPromises = indexChunks.map(indices => { + // If generator's processChunk supports sliced input (doesn't need full context), + // send only the items at the specified indices to reduce serialization overhead + const inputData = generator.processChunk.sliceInput + ? indices.map(i => fullInput[i]) + : fullInput; + + return pool.run({ generatorName, - fullInput, - itemIndices: indices, + fullInput: inputData, + itemIndices: generator.processChunk.sliceInput + ? indices.map((_, i) => i) // Renumber indices for sliced array + : indices, options: serializeOptions(extra), - }) - ); + }); + }); // Yield results as each chunk completes let completedChunks = 0; From efcd27586119a29fba7172bca5f1ee93859a071c Mon Sep 17 00:00:00 2001 From: Claudio Wunder Date: Tue, 9 Dec 2025 01:57:50 +0100 Subject: [PATCH 04/25] refactor: code fixes, adopted piscina, improved performance --- bin/commands/generate.mjs | 10 +- npm-shrinkwrap.json | 339 +++++++++++++++++- package.json | 1 + src/generators.mjs | 152 +++----- .../api-links/__tests__/fixtures.test.mjs | 9 +- src/generators/ast-js/index.mjs | 55 +-- src/generators/jsx-ast/index.mjs | 96 ++--- src/generators/types.d.ts | 16 +- src/threading/__tests__/WorkerPool.test.mjs | 334 ----------------- src/threading/__tests__/parallel.test.mjs | 143 ++------ src/threading/chunk-worker.mjs | 35 +- src/threading/index.mjs | 258 +------------ src/threading/parallel.mjs | 186 +++------- 13 files changed, 594 insertions(+), 1040 deletions(-) delete mode 100644 src/threading/__tests__/WorkerPool.test.mjs diff --git a/bin/commands/generate.mjs b/bin/commands/generate.mjs index 535bfc87..7a4f04f5 100644 --- a/bin/commands/generate.mjs +++ b/bin/commands/generate.mjs @@ -18,7 +18,7 @@ const availableGenerators = Object.keys(publicGenerators); // When spawning more than a said number of threads, the overhead of context switching // and CPU contention starts to degrade performance rather than improve it. // Therefore, we set the optimal threads to half the number of CPU cores, with a minimum of 6. -const optimalThreads = Math.min(Math.floor(cpus().length / 2), 6); +const optimalThreads = Math.max(cpus().length, 2); /** * @typedef {Object} Options @@ -66,11 +66,11 @@ export default { }, threads: { flags: ['-p', '--threads '], - desc: 'Number of worker threads to use', + desc: 'Number of worker threads to use (minimum: 2)', prompt: { type: 'text', message: 'How many threads to allow', - initialValue: String(Math.max(optimalThreads, 1)), + initialValue: String(Math.max(optimalThreads, 2)), }, }, chunkSize: { @@ -79,7 +79,7 @@ export default { prompt: { type: 'text', message: 'Items per worker thread', - initialValue: '20', + initialValue: '10', }, }, version: { @@ -163,7 +163,7 @@ export default { version: coerce(opts.version), releases, gitRef: opts.gitRef, - threads: parseInt(opts.threads, 10), + threads: Math.max(parseInt(opts.threads, 10), 2), chunkSize: parseInt(opts.chunkSize, 10), index, typeMap, diff --git a/npm-shrinkwrap.json b/npm-shrinkwrap.json index 30894767..03faf578 100644 --- a/npm-shrinkwrap.json +++ b/npm-shrinkwrap.json @@ -28,6 +28,7 @@ "hastscript": "^9.0.1", "lightningcss": "^1.30.2", "mdast-util-slice-markdown": "^2.0.1", + "piscina": "^5.1.4", "preact": "^10.28.0", "preact-render-to-string": "^6.6.3", "reading-time": "^1.5.0", @@ -768,6 +769,311 @@ "win32" ] }, + "node_modules/@napi-rs/nice": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/@napi-rs/nice/-/nice-1.1.1.tgz", + "integrity": "sha512-xJIPs+bYuc9ASBl+cvGsKbGrJmS6fAKaSZCnT0lhahT5rhA2VVy9/EcIgd2JhtEuFOJNx7UHNn/qiTPTY4nrQw==", + "license": "MIT", + "optional": true, + "engines": { + "node": ">= 10" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/Brooooooklyn" + }, + "optionalDependencies": { + "@napi-rs/nice-android-arm-eabi": "1.1.1", + "@napi-rs/nice-android-arm64": "1.1.1", + "@napi-rs/nice-darwin-arm64": "1.1.1", + "@napi-rs/nice-darwin-x64": "1.1.1", + "@napi-rs/nice-freebsd-x64": "1.1.1", + "@napi-rs/nice-linux-arm-gnueabihf": "1.1.1", + "@napi-rs/nice-linux-arm64-gnu": "1.1.1", + "@napi-rs/nice-linux-arm64-musl": "1.1.1", + "@napi-rs/nice-linux-ppc64-gnu": "1.1.1", + "@napi-rs/nice-linux-riscv64-gnu": "1.1.1", + "@napi-rs/nice-linux-s390x-gnu": "1.1.1", + "@napi-rs/nice-linux-x64-gnu": "1.1.1", + "@napi-rs/nice-linux-x64-musl": "1.1.1", + "@napi-rs/nice-openharmony-arm64": "1.1.1", + "@napi-rs/nice-win32-arm64-msvc": "1.1.1", + "@napi-rs/nice-win32-ia32-msvc": "1.1.1", + "@napi-rs/nice-win32-x64-msvc": "1.1.1" + } + }, + "node_modules/@napi-rs/nice-android-arm-eabi": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/@napi-rs/nice-android-arm-eabi/-/nice-android-arm-eabi-1.1.1.tgz", + "integrity": "sha512-kjirL3N6TnRPv5iuHw36wnucNqXAO46dzK9oPb0wj076R5Xm8PfUVA9nAFB5ZNMmfJQJVKACAPd/Z2KYMppthw==", + "cpu": [ + "arm" + ], + "license": "MIT", + "optional": true, + "os": [ + "android" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@napi-rs/nice-android-arm64": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/@napi-rs/nice-android-arm64/-/nice-android-arm64-1.1.1.tgz", + "integrity": "sha512-blG0i7dXgbInN5urONoUCNf+DUEAavRffrO7fZSeoRMJc5qD+BJeNcpr54msPF6qfDD6kzs9AQJogZvT2KD5nw==", + "cpu": [ + "arm64" + ], + "license": "MIT", + "optional": true, + "os": [ + "android" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@napi-rs/nice-darwin-arm64": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/@napi-rs/nice-darwin-arm64/-/nice-darwin-arm64-1.1.1.tgz", + "integrity": "sha512-s/E7w45NaLqTGuOjC2p96pct4jRfo61xb9bU1unM/MJ/RFkKlJyJDx7OJI/O0ll/hrfpqKopuAFDV8yo0hfT7A==", + "cpu": [ + "arm64" + ], + "license": "MIT", + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@napi-rs/nice-darwin-x64": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/@napi-rs/nice-darwin-x64/-/nice-darwin-x64-1.1.1.tgz", + "integrity": "sha512-dGoEBnVpsdcC+oHHmW1LRK5eiyzLwdgNQq3BmZIav+9/5WTZwBYX7r5ZkQC07Nxd3KHOCkgbHSh4wPkH1N1LiQ==", + "cpu": [ + "x64" + ], + "license": "MIT", + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@napi-rs/nice-freebsd-x64": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/@napi-rs/nice-freebsd-x64/-/nice-freebsd-x64-1.1.1.tgz", + "integrity": "sha512-kHv4kEHAylMYmlNwcQcDtXjklYp4FCf0b05E+0h6nDHsZ+F0bDe04U/tXNOqrx5CmIAth4vwfkjjUmp4c4JktQ==", + "cpu": [ + "x64" + ], + "license": "MIT", + "optional": true, + "os": [ + "freebsd" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@napi-rs/nice-linux-arm-gnueabihf": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/@napi-rs/nice-linux-arm-gnueabihf/-/nice-linux-arm-gnueabihf-1.1.1.tgz", + "integrity": "sha512-E1t7K0efyKXZDoZg1LzCOLxgolxV58HCkaEkEvIYQx12ht2pa8hoBo+4OB3qh7e+QiBlp1SRf+voWUZFxyhyqg==", + "cpu": [ + "arm" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@napi-rs/nice-linux-arm64-gnu": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/@napi-rs/nice-linux-arm64-gnu/-/nice-linux-arm64-gnu-1.1.1.tgz", + "integrity": "sha512-CIKLA12DTIZlmTaaKhQP88R3Xao+gyJxNWEn04wZwC2wmRapNnxCUZkVwggInMJvtVElA+D4ZzOU5sX4jV+SmQ==", + "cpu": [ + "arm64" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@napi-rs/nice-linux-arm64-musl": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/@napi-rs/nice-linux-arm64-musl/-/nice-linux-arm64-musl-1.1.1.tgz", + "integrity": "sha512-+2Rzdb3nTIYZ0YJF43qf2twhqOCkiSrHx2Pg6DJaCPYhhaxbLcdlV8hCRMHghQ+EtZQWGNcS2xF4KxBhSGeutg==", + "cpu": [ + "arm64" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@napi-rs/nice-linux-ppc64-gnu": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/@napi-rs/nice-linux-ppc64-gnu/-/nice-linux-ppc64-gnu-1.1.1.tgz", + "integrity": "sha512-4FS8oc0GeHpwvv4tKciKkw3Y4jKsL7FRhaOeiPei0X9T4Jd619wHNe4xCLmN2EMgZoeGg+Q7GY7BsvwKpL22Tg==", + "cpu": [ + "ppc64" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@napi-rs/nice-linux-riscv64-gnu": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/@napi-rs/nice-linux-riscv64-gnu/-/nice-linux-riscv64-gnu-1.1.1.tgz", + "integrity": "sha512-HU0nw9uD4FO/oGCCk409tCi5IzIZpH2agE6nN4fqpwVlCn5BOq0MS1dXGjXaG17JaAvrlpV5ZeyZwSon10XOXw==", + "cpu": [ + "riscv64" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@napi-rs/nice-linux-s390x-gnu": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/@napi-rs/nice-linux-s390x-gnu/-/nice-linux-s390x-gnu-1.1.1.tgz", + "integrity": "sha512-2YqKJWWl24EwrX0DzCQgPLKQBxYDdBxOHot1KWEq7aY2uYeX+Uvtv4I8xFVVygJDgf6/92h9N3Y43WPx8+PAgQ==", + "cpu": [ + "s390x" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@napi-rs/nice-linux-x64-gnu": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/@napi-rs/nice-linux-x64-gnu/-/nice-linux-x64-gnu-1.1.1.tgz", + "integrity": "sha512-/gaNz3R92t+dcrfCw/96pDopcmec7oCcAQ3l/M+Zxr82KT4DljD37CpgrnXV+pJC263JkW572pdbP3hP+KjcIg==", + "cpu": [ + "x64" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@napi-rs/nice-linux-x64-musl": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/@napi-rs/nice-linux-x64-musl/-/nice-linux-x64-musl-1.1.1.tgz", + "integrity": "sha512-xScCGnyj/oppsNPMnevsBe3pvNaoK7FGvMjT35riz9YdhB2WtTG47ZlbxtOLpjeO9SqqQ2J2igCmz6IJOD5JYw==", + "cpu": [ + "x64" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@napi-rs/nice-openharmony-arm64": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/@napi-rs/nice-openharmony-arm64/-/nice-openharmony-arm64-1.1.1.tgz", + "integrity": "sha512-6uJPRVwVCLDeoOaNyeiW0gp2kFIM4r7PL2MczdZQHkFi9gVlgm+Vn+V6nTWRcu856mJ2WjYJiumEajfSm7arPQ==", + "cpu": [ + "arm64" + ], + "license": "MIT", + "optional": true, + "os": [ + "openharmony" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@napi-rs/nice-win32-arm64-msvc": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/@napi-rs/nice-win32-arm64-msvc/-/nice-win32-arm64-msvc-1.1.1.tgz", + "integrity": "sha512-uoTb4eAvM5B2aj/z8j+Nv8OttPf2m+HVx3UjA5jcFxASvNhQriyCQF1OB1lHL43ZhW+VwZlgvjmP5qF3+59atA==", + "cpu": [ + "arm64" + ], + "license": "MIT", + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@napi-rs/nice-win32-ia32-msvc": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/@napi-rs/nice-win32-ia32-msvc/-/nice-win32-ia32-msvc-1.1.1.tgz", + "integrity": "sha512-CNQqlQT9MwuCsg1Vd/oKXiuH+TcsSPJmlAFc5frFyX/KkOh0UpBLEj7aoY656d5UKZQMQFP7vJNa1DNUNORvug==", + "cpu": [ + "ia32" + ], + "license": "MIT", + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@napi-rs/nice-win32-x64-msvc": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/@napi-rs/nice-win32-x64-msvc/-/nice-win32-x64-msvc-1.1.1.tgz", + "integrity": "sha512-vB+4G/jBQCAh0jelMTY3+kgFy00Hlx2f2/1zjMoH821IbplbWZOkLiTYXQkygNTzQJTq5cvwBDgn2ppHD+bglQ==", + "cpu": [ + "x64" + ], + "license": "MIT", + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">= 10" + } + }, "node_modules/@napi-rs/wasm-runtime": { "version": "0.2.12", "resolved": "https://registry.npmjs.org/@napi-rs/wasm-runtime/-/wasm-runtime-0.2.12.tgz", @@ -937,6 +1243,7 @@ "resolved": "https://registry.npmjs.org/@orama/core/-/core-0.0.10.tgz", "integrity": "sha512-rZ4AHeHoFTxOXMhM0An2coO3OfR+FpL0ejXc1PPrNsGB4p6VNlky7FAGeuqOvS5gUYB5ywJsmDzCxeflPtgk4w==", "license": "AGPL-3.0", + "peer": true, "dependencies": { "@orama/cuid2": "2.2.3", "dedent": "1.5.3" @@ -947,6 +1254,7 @@ "resolved": "https://registry.npmjs.org/dedent/-/dedent-1.5.3.tgz", "integrity": "sha512-NHQtfOOW68WD8lgypbLA5oT+Bt0xXJhiYvoR6SmmNXZfpzOGXwdKWmcwG8N7PwVVWV3eF/68nmD9BaJSsTBhyQ==", "license": "MIT", + "peer": true, "peerDependencies": { "babel-plugin-macros": "^3.1.0" }, @@ -976,7 +1284,6 @@ "resolved": "https://registry.npmjs.org/@orama/orama/-/orama-3.1.16.tgz", "integrity": "sha512-scSmQBD8eANlMUOglxHrN1JdSW8tDghsPuS83otqealBiIeMukCQMOf/wc0JJjDXomqwNdEQFLXLGHrU6PGxuA==", "license": "Apache-2.0", - "peer": true, "engines": { "node": ">= 20.0.0" } @@ -985,7 +1292,8 @@ "version": "0.0.5", "resolved": "https://registry.npmjs.org/@orama/oramacore-events-parser/-/oramacore-events-parser-0.0.5.tgz", "integrity": "sha512-yAuSwog+HQBAXgZ60TNKEwu04y81/09mpbYBCmz1RCxnr4ObNY2JnPZI7HmALbjAhLJ8t5p+wc2JHRK93ubO4w==", - "license": "AGPL-3.0" + "license": "AGPL-3.0", + "peer": true }, "node_modules/@orama/react-components": { "version": "0.8.1", @@ -1207,7 +1515,6 @@ "resolved": "https://registry.npmjs.org/@oramacloud/client/-/client-2.1.4.tgz", "integrity": "sha512-uNPFs4wq/iOPbggCwTkVNbIr64Vfd7ZS/h+cricXVnzXWocjDTfJ3wLL4lr0qiSu41g8z+eCAGBqJ30RO2O4AA==", "license": "ISC", - "peer": true, "dependencies": { "@orama/cuid2": "^2.2.3", "@orama/orama": "^3.0.0", @@ -3642,7 +3949,6 @@ "resolved": "https://registry.npmjs.org/acorn/-/acorn-8.15.0.tgz", "integrity": "sha512-NZyJarBfL7nWwIq+FDL6Zp/yHEhePMNnnJ0y3qfieCrmNvYct8uvtiV41UvlSe6apAfk0fY1FbWx+NwfmpvtTg==", "license": "MIT", - "peer": true, "bin": { "acorn": "bin/acorn" }, @@ -4183,7 +4489,8 @@ "version": "3.1.3", "resolved": "https://registry.npmjs.org/csstype/-/csstype-3.1.3.tgz", "integrity": "sha512-M1uQkMl8rQK/szD0LNhtqxIPLpimGm8sOBwU7lLnCpSbTyY3yeU1Vc7l4KT5zT4s/yOxHH5O7tIuuLOCnLADRw==", - "license": "MIT" + "license": "MIT", + "peer": true }, "node_modules/debug": { "version": "4.4.3", @@ -4465,7 +4772,6 @@ "resolved": "https://registry.npmjs.org/eslint/-/eslint-9.39.1.tgz", "integrity": "sha512-BhHmn2yNOFA9H9JmmIVKJmd288g9hrVRDkdoIgRCRuSySRUHH7r/DI6aAXW9T1WwUuY3DFgrcaqB+deURBLR5g==", "license": "MIT", - "peer": true, "dependencies": { "@eslint-community/eslint-utils": "^4.8.0", "@eslint-community/regexpp": "^4.12.1", @@ -6311,7 +6617,6 @@ "resolved": "https://registry.npmjs.org/marked/-/marked-13.0.3.tgz", "integrity": "sha512-rqRix3/TWzE9rIoFGIn8JmsVfhiuC8VIQ8IdX5TfzmeBucdY05/0UlzKaw0eVtpcN/OdVFpBk7CjKGo9iHJ/zA==", "license": "MIT", - "peer": true, "bin": { "marked": "bin/marked.js" }, @@ -7562,6 +7867,18 @@ "node": ">=0.10" } }, + "node_modules/piscina": { + "version": "5.1.4", + "resolved": "https://registry.npmjs.org/piscina/-/piscina-5.1.4.tgz", + "integrity": "sha512-7uU4ZnKeQq22t9AsmHGD2w4OYQGonwFnTypDypaWi7Qr2EvQIFVtG8J5D/3bE7W123Wdc9+v4CZDu5hJXVCtBg==", + "license": "MIT", + "engines": { + "node": ">=20.x" + }, + "optionalDependencies": { + "@napi-rs/nice": "^1.0.4" + } + }, "node_modules/postcss": { "version": "8.5.6", "resolved": "https://registry.npmjs.org/postcss/-/postcss-8.5.6.tgz", @@ -7581,7 +7898,6 @@ } ], "license": "MIT", - "peer": true, "dependencies": { "nanoid": "^3.3.11", "picocolors": "^1.1.1", @@ -7631,7 +7947,6 @@ "resolved": "https://registry.npmjs.org/preact/-/preact-10.28.0.tgz", "integrity": "sha512-rytDAoiXr3+t6OIP3WGlDd0ouCUG1iCWzkcY3++Nreuoi17y6T5i/zRhe6uYfoVcxq6YU+sBtJouuRDsq8vvqA==", "license": "MIT", - "peer": true, "funding": { "type": "opencollective", "url": "https://opencollective.com/preact" @@ -8199,7 +8514,8 @@ "version": "0.26.0", "resolved": "https://registry.npmjs.org/scheduler/-/scheduler-0.26.0.tgz", "integrity": "sha512-NlHwttCI/l5gCPR3D1nNXtWABUmBwvZpEQiD4IXSbIDq8BzLIK/7Ir5gTFSGZDUu37K5cMNp0hFtzO38sC7gWA==", - "license": "MIT" + "license": "MIT", + "peer": true }, "node_modules/semver": { "version": "7.7.3", @@ -8800,7 +9116,6 @@ "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.3.tgz", "integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==", "license": "MIT", - "peer": true, "engines": { "node": ">=12" }, @@ -9168,7 +9483,6 @@ "integrity": "sha512-VUyWiTNQD7itdiMuJy+EuLEErLj3uwX/EpHQF8EOf33Dq3Ju6VW1GXm+swk6+1h7a49uv9fKZ+dft9jU7esdLA==", "dev": true, "hasInstallScript": true, - "peer": true, "dependencies": { "napi-postinstall": "^0.2.4" }, @@ -9585,7 +9899,6 @@ "resolved": "https://registry.npmjs.org/zod/-/zod-3.24.3.tgz", "integrity": "sha512-HhY1oqzWCQWuUqvBFnsyrtZRhyPeR7SUGv+C4+MsisMuVfSPx8HpwWqH8tRahSlt6M3PiFAcoeFhZAqIXTxoSg==", "license": "MIT", - "peer": true, "funding": { "url": "https://github.com/sponsors/colinhacks" } diff --git a/package.json b/package.json index 7d9c2970..b92a40f4 100644 --- a/package.json +++ b/package.json @@ -60,6 +60,7 @@ "hastscript": "^9.0.1", "lightningcss": "^1.30.2", "mdast-util-slice-markdown": "^2.0.1", + "piscina": "^5.1.4", "preact": "^10.28.0", "preact-render-to-string": "^6.6.3", "reading-time": "^1.5.0", diff --git a/src/generators.mjs b/src/generators.mjs index 37663314..45131908 100644 --- a/src/generators.mjs +++ b/src/generators.mjs @@ -3,7 +3,7 @@ import { allGenerators } from './generators/index.mjs'; import logger from './logger/index.mjs'; import { isAsyncGenerator, createStreamingCache } from './streaming.mjs'; -import WorkerPool from './threading/index.mjs'; +import createWorkerPool from './threading/index.mjs'; import createParallelWorker from './threading/parallel.mjs'; const generatorsLogger = logger.child('generators'); @@ -13,52 +13,28 @@ const generatorsLogger = logger.child('generators'); * documentation generators in dependency order, with support for parallel * processing and streaming results. * - * Generators can output content consumed by other generators or write to files. - * The system handles dependency resolution, parallel scheduling, and result caching. - * - * @typedef {{ ast: GeneratorMetadata}} AstGenerator - * @typedef {AvailableGenerators & AstGenerator} AllGenerators - * * @param {ParserOutput} input - The API doc AST tree - * @returns {{ runGenerators: (options: GeneratorOptions) => Promise }} + * @returns {{ runGenerators: (options: GeneratorOptions) => Promise }} */ const createGenerator = input => { - /** - * Cache for generator results (Promises or AsyncGenerators). - * @type {{ [K in keyof AllGenerators]?: ReturnType }} - */ + /** @type {{ [key: string]: Promise | AsyncGenerator }} */ const cachedGenerators = { ast: Promise.resolve(input) }; - /** - * Cache for async generator collection results. - * Ensures collection happens only once when multiple generators depend on - * the same streaming generator. - */ const streamingCache = createStreamingCache(); - /** - * Shared WorkerPool instance for all generators. - * @type {WorkerPool | null} - */ - let sharedPool = null; + /** @type {import('piscina').Piscina} */ + let pool; /** - * Resolves the dependency input for a generator, handling both regular - * promises and async generators. For async generators, creates a shared - * collection so multiple dependents reuse the same result. + * Gets the collected input from a dependency generator. * - * @param {string} dependsOn - Name of the dependency generator - * @returns {Promise} Collected results from the dependency + * @param {string} dependsOn - Dependency generator name + * @returns {Promise} */ const getDependencyInput = async dependsOn => { const result = await cachedGenerators[dependsOn]; - // For async generators, collect all chunks (shared across dependents) if (isAsyncGenerator(result)) { - generatorsLogger.debug( - `Collecting async generator output from "${dependsOn}"` - ); - return streamingCache.getOrCollect(dependsOn, result); } @@ -66,101 +42,87 @@ const createGenerator = input => { }; /** - * Schedules generators for execution without creating new pools. - * Uses the shared pool for all parallel work. + * Schedules a generator and its dependencies for execution. * - * @param {GeneratorOptions} options - Generator runtime options - * @param {WorkerPool} pool - Shared worker pool + * @param {string} generatorName - Generator to schedule + * @param {GeneratorOptions} options - Runtime options */ - const scheduleGenerators = (options, pool) => { - const { generators } = options; - - for (const generatorName of generators) { - // Skip already scheduled generators - if (generatorName in cachedGenerators) { - generatorsLogger.debug(`Skipping "${generatorName}"`); - - continue; - } - - const { dependsOn, generate } = allGenerators[generatorName]; + const scheduleGenerator = (generatorName, options) => { + if (generatorName in cachedGenerators) { + return; + } - // Recursively schedule dependencies (without awaiting) - if (dependsOn && !(dependsOn in cachedGenerators)) { - generatorsLogger.debug(`Scheduling "${dependsOn}":"${generatorName}"`); + const { dependsOn, generate, processChunk } = allGenerators[generatorName]; - scheduleGenerators({ ...options, generators: [dependsOn] }, pool); - } + // Schedule dependency first + if (dependsOn && !(dependsOn in cachedGenerators)) { + scheduleGenerator(dependsOn, options); + } - // Create a ParallelWorker for this generator's chunk processing - const worker = createParallelWorker(generatorName, pool, options); + generatorsLogger.debug(`Scheduling "${generatorName}"`, { + dependsOn: dependsOn || 'none', + streaming: Boolean(processChunk), + }); - generatorsLogger.debug(`Scheduling generator "${generatorName}"`, { - dependsOn: dependsOn || 'none', - }); + // Schedule the generator + cachedGenerators[generatorName] = (async () => { + const dependencyInput = await getDependencyInput(dependsOn); - // Schedule the generator (awaits dependency internally) - cachedGenerators[generatorName] = (async () => { - const dependencyInput = await getDependencyInput(dependsOn); + generatorsLogger.debug(`Starting "${generatorName}"`); - generatorsLogger.debug(`Starting generator "${generatorName}"`); + // Create parallel worker for streaming generators + const worker = processChunk + ? createParallelWorker(generatorName, pool, options) + : null; - const result = await generate(dependencyInput, { ...options, worker }); + const result = await generate(dependencyInput, { ...options, worker }); - generatorsLogger.debug(`Completed generator "${generatorName}"`); + // For streaming generators, "Completed" is logged when collection finishes + // (in streamingCache.getOrCollect), not here when the generator returns + if (!isAsyncGenerator(result)) { + generatorsLogger.debug(`Completed "${generatorName}"`); + } - return result; - })(); - } + return result; + })(); }; /** - * Schedules and runs all requested generators with their dependencies. - * Independent generators run in parallel; dependent generators wait for - * their dependencies to complete. + * Runs all requested generators with their dependencies. * - * @param {GeneratorOptions} options - Generator runtime options + * @param {GeneratorOptions} options - Runtime options * @returns {Promise} Results of all requested generators */ const runGenerators = async options => { const { generators, threads } = options; - generatorsLogger.debug(`Starting generator pipeline`, { + generatorsLogger.debug(`Starting pipeline`, { generators: generators.join(', '), threads, }); - // Create shared WorkerPool for all generators (only once) - if (!sharedPool) { - sharedPool = new WorkerPool('./chunk-worker.mjs', threads); - } - - // Schedule all generators using the shared pool - scheduleGenerators(options, sharedPool); + // Create worker pool + pool = createWorkerPool(threads); - // Wait for ALL requested generators to complete (not just the last one) - const results = []; + // Schedule all generators + for (const name of generators) { + scheduleGenerator(name, options); + } - for (const generatorName of generators) { - let result = await cachedGenerators[generatorName]; + // Start all collections in parallel (don't await sequentially) + const resultPromises = generators.map(async name => { + let result = await cachedGenerators[name]; - // If the generator returns an async generator, consume it - // to ensure all side effects (file writes, etc.) complete if (isAsyncGenerator(result)) { - generatorsLogger.debug( - `Consuming async generator output from "${generatorName}"` - ); - - result = await streamingCache.getOrCollect(generatorName, result); + result = await streamingCache.getOrCollect(name, result); } - results.push(result); - } + return result; + }); - // Terminate workers after all work is complete (fire-and-forget) - sharedPool.terminate(); + const results = await Promise.all(resultPromises); - sharedPool = null; + await pool.destroy(); return results; }; diff --git a/src/generators/api-links/__tests__/fixtures.test.mjs b/src/generators/api-links/__tests__/fixtures.test.mjs index 171fb560..efd53de5 100644 --- a/src/generators/api-links/__tests__/fixtures.test.mjs +++ b/src/generators/api-links/__tests__/fixtures.test.mjs @@ -3,7 +3,7 @@ import { cpus } from 'node:os'; import { basename, extname, join } from 'node:path'; import { describe, it } from 'node:test'; -import WorkerPool from '../../../threading/index.mjs'; +import createWorkerPool from '../../../threading/index.mjs'; import createParallelWorker from '../../../threading/parallel.mjs'; import astJs from '../../ast-js/index.mjs'; import apiLinks from '../index.mjs'; @@ -19,10 +19,11 @@ describe('api links', () => { describe('should work correctly for all fixtures', () => { sourceFiles.forEach(sourceFile => { it(`${basename(sourceFile)}`, async t => { - const pool = new WorkerPool('../chunk-worker.mjs', cpus().length); + const threads = cpus().length; + const pool = createWorkerPool(threads); const worker = createParallelWorker('ast-js', pool, { - threads: 1, + threads, chunkSize: 10, }); @@ -45,6 +46,8 @@ describe('api links', () => { } t.assert.snapshot(actualOutput); + + await pool.destroy(); }); }); }); diff --git a/src/generators/ast-js/index.mjs b/src/generators/ast-js/index.mjs index 70cb2cb8..36d88a8e 100644 --- a/src/generators/ast-js/index.mjs +++ b/src/generators/ast-js/index.mjs @@ -5,6 +5,9 @@ import { globSync } from 'glob'; import createJsLoader from '../../loaders/javascript.mjs'; import createJsParser from '../../parsers/javascript.mjs'; +const { loadFiles } = createJsLoader(); +const { parseJsSource } = createJsParser(); + /** * This generator parses Javascript sources passed into the generator's input * field. This is separate from the Markdown parsing step since it's not as @@ -26,46 +29,44 @@ export default { dependsOn: 'metadata', - /** - * Process a chunk of JavaScript files in a worker thread. - * Parses JS source files into AST representations. - * - * @param {Input} _ - Unused (files loaded from input paths) - * @param {number[]} itemIndices - Indices of input paths to process - * @param {Partial>} options - Serializable options - * @returns {Promise} Parsed JS AST objects for each file - */ - async processChunk(_, itemIndices, { input }) { - const { loadFiles } = createJsLoader(); - const { parseJsSource } = createJsParser(); - - const results = []; + processChunk: Object.assign( + /** + * Process a chunk of JavaScript files in a worker thread. + * Parses JS source files into AST representations. + * + * @param {string[]} inputSlice - Sliced input paths for this chunk + * @param {number[]} itemIndices - Indices into the sliced array + * @returns {Promise} Parsed JS AST objects for each file + */ + async (inputSlice, itemIndices) => { + const results = []; - for (const idx of itemIndices) { - const [file] = loadFiles(input[idx]); + for (const idx of itemIndices) { + const [file] = loadFiles(inputSlice[idx]); - const parsedFile = await parseJsSource(file); + const parsedFile = await parseJsSource(file); - results.push(parsedFile); - } + results.push(parsedFile); + } - return results; - }, + return results; + }, + { sliceInput: true } + ), /** + * Generates a JavaScript AST from the input files. + * * @param {Input} _ - Unused (files loaded from input paths) * @param {Partial} options * @returns {AsyncGenerator>} */ async *generate(_, { input = [], worker }) { - const sourceFiles = globSync(input).filter( - filePath => extname(filePath) === '.js' - ); - - const deps = { input: sourceFiles }; + const source = globSync(input).filter(path => extname(path) === '.js'); // Parse the Javascript sources into ASTs in parallel using worker threads - for await (const chunkResult of worker.stream(sourceFiles, _, deps)) { + // source is both the items list and the fullInput since we use sliceInput + for await (const chunkResult of worker.stream(source, source)) { yield chunkResult; } }, diff --git a/src/generators/jsx-ast/index.mjs b/src/generators/jsx-ast/index.mjs index 0f0d283d..3a674ee6 100644 --- a/src/generators/jsx-ast/index.mjs +++ b/src/generators/jsx-ast/index.mjs @@ -4,6 +4,8 @@ import { getSortedHeadNodes } from './utils/getSortedHeadNodes.mjs'; import { groupNodesByModule } from '../../utils/generators.mjs'; import { getRemarkRecma } from '../../utils/remark.mjs'; +const remarkRecma = getRemarkRecma(); + /** * Generator for converting MDAST to JSX AST. * @@ -19,48 +21,49 @@ export default { dependsOn: 'metadata', - /** - * Process a chunk of items in a worker thread. - * Transforms metadata entries into JSX AST nodes. - * - * @param {Input} fullInput - Full metadata input for context rebuilding - * @param {number[]} itemIndices - Indices of head nodes to process - * @param {Partial>} options - Serializable options - * @returns {Promise>} JSX AST programs for each module - */ - async processChunk(fullInput, itemIndices, { index, releases, version }) { - const remarkRecma = getRemarkRecma(); - const groupedModules = groupNodesByModule(fullInput); - const headNodes = getSortedHeadNodes(fullInput); - - const docPages = index - ? index.map(({ section, api }) => [section, `${api}.html`]) - : headNodes.map(node => [node.heading.data.name, `${node.api}.html`]); - - const results = []; + processChunk: Object.assign( + /** + * Process a chunk of items in a worker thread. + * Transforms metadata entries into JSX AST nodes. + * + * With sliceInput, each item is a SlicedModuleInput containing the head node + * and all entries for that module - no need to recompute grouping. + * + * @param {Array<{head: ApiDocMetadataEntry, entries: Array}>} slicedInput - Pre-sliced module data + * @param {number[]} itemIndices - Indices of items to process + * @param {object} options - Serializable options + * @param {Array<[string, string]>} options.docPages - Pre-computed doc pages for sidebar + * @param {Array} options.releases - Release information + * @param {import('semver').SemVer} options.version - Target Node.js version + * @returns {Promise>} JSX AST programs for each module + */ + async (slicedInput, itemIndices, { docPages, releases, version }) => { + const results = []; - for (const idx of itemIndices) { - const entry = headNodes[idx]; + for (const idx of itemIndices) { + const { head, entries } = slicedInput[idx]; - const sideBarProps = buildSideBarProps( - entry, - releases, - version, - docPages - ); + const sideBarProps = buildSideBarProps( + head, + releases, + version, + docPages + ); - const content = await buildContent( - groupedModules.get(entry.api), - entry, - sideBarProps, - remarkRecma - ); + const content = await buildContent( + entries, + head, + sideBarProps, + remarkRecma + ); - results.push(content); - } + results.push(content); + } - return results; - }, + return results; + }, + { sliceInput: true } + ), /** * Generates a JSX AST @@ -70,11 +73,24 @@ export default { * @returns {AsyncGenerator>} */ async *generate(entries, { index, releases, version, worker }) { - const headNodes = entries.filter(node => node.heading.depth === 1); + const groupedModules = groupNodesByModule(entries); + const headNodes = getSortedHeadNodes(entries); + + // Pre-compute docPages once in main thread + const docPages = index + ? index.map(({ section, api }) => [section, `${api}.html`]) + : headNodes.map(node => [node.heading.data.name, `${node.api}.html`]); + + // Create sliced input: each item contains head + its module's entries + // This avoids sending all 4700+ entries to every worker + const input = headNodes.map(head => ({ + head, + entries: groupedModules.get(head.api), + })); - const deps = { index, releases, version }; + const deps = { docPages, releases, version }; - for await (const chunkResult of worker.stream(headNodes, entries, deps)) { + for await (const chunkResult of worker.stream(input, input, deps)) { yield chunkResult; } }, diff --git a/src/generators/types.d.ts b/src/generators/types.d.ts index c899c96c..d766afd7 100644 --- a/src/generators/types.d.ts +++ b/src/generators/types.d.ts @@ -1,11 +1,13 @@ import type { ApiDocReleaseEntry } from '../types'; -import type { publicGenerators } from './index.mjs'; +import type { publicGenerators, allGenerators } from './index.mjs'; declare global { - // All available generators as an inferable type, to allow Generator interfaces - // to be type complete and runtime friendly within `runGenerators` + // Public generators exposed to the CLI export type AvailableGenerators = typeof publicGenerators; + // All generators including internal ones (metadata, jsx-ast, ast-js) + export type AllGenerators = typeof allGenerators; + /** * ParallelWorker interface for distributing work across Node.js worker threads. * Streams results as chunks complete, enabling pipeline parallelism. @@ -23,7 +25,7 @@ declare global { */ stream( items: T[], - fullInput: unknown, + fullInput: T[], opts?: Record ): AsyncGenerator; } @@ -75,8 +77,8 @@ declare global { } export interface GeneratorMetadata { - // The name of the Generator. Must match the Key in the AvailableGenerators - name: keyof AvailableGenerators; + // The name of the Generator. Must match the Key in AllGenerators + name: keyof AllGenerators; version: string; @@ -106,7 +108,7 @@ declare global { * passes the ASTs for any JavaScript files given in the input. Like `ast`, * any generator depending on it is marked as a top-level generator. */ - dependsOn: keyof AvailableGenerators | 'ast' | 'ast-js'; + dependsOn: keyof AllGenerators | 'ast'; /** * Generators are abstract and the different generators have different sort of inputs and outputs. diff --git a/src/threading/__tests__/WorkerPool.test.mjs b/src/threading/__tests__/WorkerPool.test.mjs deleted file mode 100644 index a0b4e41b..00000000 --- a/src/threading/__tests__/WorkerPool.test.mjs +++ /dev/null @@ -1,334 +0,0 @@ -import { deepStrictEqual, ok, rejects, strictEqual } from 'node:assert'; -import { describe, it } from 'node:test'; - -import WorkerPool from '../index.mjs'; - -describe('WorkerPool', () => { - // Use relative path from WorkerPool's location (src/threading/) - const workerPath = './chunk-worker.mjs'; - - it('should create a worker pool with specified thread count', () => { - const pool = new WorkerPool(workerPath, 4); - - strictEqual(pool.threads, 4); - strictEqual(pool.allWorkers.size, 0); - }); - - it('should initialize with no workers', () => { - const pool = new WorkerPool(workerPath, 2); - - strictEqual(pool.allWorkers.size, 0); - strictEqual(pool.idleWorkers.length, 0); - }); - - it('should queue tasks when thread limit is reached', async () => { - const pool = new WorkerPool(workerPath, 1); - - const task1 = pool.run({ - generatorName: 'ast-js', - fullInput: [], - itemIndices: [], - options: {}, - }); - - const task2 = pool.run({ - generatorName: 'ast-js', - fullInput: [], - itemIndices: [], - options: {}, - }); - - const results = await Promise.all([task1, task2]); - - ok(Array.isArray(results)); - strictEqual(results.length, 2); - - await pool.terminate(); - }); - - it('should run multiple tasks via individual run calls', async () => { - const pool = new WorkerPool(workerPath, 2); - - const tasks = [ - { - generatorName: 'ast-js', - fullInput: [], - itemIndices: [], - options: {}, - }, - { - generatorName: 'ast-js', - fullInput: [], - itemIndices: [], - options: {}, - }, - ]; - - const results = await Promise.all(tasks.map(task => pool.run(task))); - - ok(Array.isArray(results)); - strictEqual(results.length, 2); - - await pool.terminate(); - }); - - it('should handle default thread count', () => { - const pool = new WorkerPool(workerPath); - - strictEqual(pool.threads, 1); - }); - - it('should accept URL for worker script', () => { - const url = new URL('./chunk-worker.mjs', import.meta.url); - const pool = new WorkerPool(url, 2); - - ok(pool.workerScript instanceof URL); - strictEqual(pool.threads, 2); - }); - - it('should process queued tasks after completion', async () => { - const pool = new WorkerPool(workerPath, 1); - - // Queue up 3 tasks with only 1 thread - const tasks = []; - - for (let i = 0; i < 3; i++) { - tasks.push( - pool.run({ - generatorName: 'ast-js', - fullInput: [], - itemIndices: [], - options: {}, - }) - ); - } - - // All should complete even with only 1 thread - const results = await Promise.all(tasks); - - strictEqual(results.length, 3); - - await pool.terminate(); - }); - - it('should reject on worker error with result.error', async () => { - const pool = new WorkerPool(workerPath, 1); - - // Using an invalid generator name should cause an error - await rejects(async () => { - await pool.run({ - generatorName: 'nonexistent-generator', - fullInput: [], - itemIndices: [0], - options: {}, - }); - }, Error); - - await pool.terminate(); - }); - - it('should handle concurrent tasks up to thread limit', async () => { - const pool = new WorkerPool(workerPath, 4); - - // Run 4 tasks concurrently (at thread limit) - const tasks = Array.from({ length: 4 }, () => - pool.run({ - generatorName: 'ast-js', - fullInput: [], - itemIndices: [], - options: {}, - }) - ); - - const results = await Promise.all(tasks); - - strictEqual(results.length, 4); - results.forEach(r => ok(Array.isArray(r))); - - await pool.terminate(); - }); - - it('should return results correctly from workers', async () => { - const pool = new WorkerPool(workerPath, 2); - - const result = await pool.run({ - generatorName: 'ast-js', - fullInput: [], - itemIndices: [], - options: {}, - }); - - ok(Array.isArray(result)); - - await pool.terminate(); - }); - - it('should reuse workers for multiple tasks', async () => { - const pool = new WorkerPool(workerPath, 2); - - // Run first batch - await pool.run({ - generatorName: 'ast-js', - fullInput: [], - itemIndices: [], - options: {}, - }); - - // Workers should now be idle - strictEqual(pool.idleWorkers.length, 1); - strictEqual(pool.allWorkers.size, 1); - - // Run another task - should reuse idle worker - await pool.run({ - generatorName: 'ast-js', - fullInput: [], - itemIndices: [], - options: {}, - }); - - // Still same number of workers - strictEqual(pool.allWorkers.size, 1); - - await pool.terminate(); - }); - - it('should terminate all workers', async () => { - const pool = new WorkerPool(workerPath, 2); - - // Spawn some workers - await Promise.all([ - pool.run({ - generatorName: 'ast-js', - fullInput: [], - itemIndices: [], - options: {}, - }), - pool.run({ - generatorName: 'ast-js', - fullInput: [], - itemIndices: [], - options: {}, - }), - ]); - - strictEqual(pool.allWorkers.size, 2); - - await pool.terminate(); - - strictEqual(pool.allWorkers.size, 0); - strictEqual(pool.idleWorkers.length, 0); - }); - - it('should clear queue on terminate', async () => { - const pool = new WorkerPool(workerPath, 1); - - // Start one task to occupy the single worker - const runningTask = pool.run({ - generatorName: 'ast-js', - fullInput: [], - itemIndices: [], - options: {}, - }); - - // Queue more tasks than threads available - pool.run({ - generatorName: 'ast-js', - fullInput: [], - itemIndices: [], - options: {}, - }); - - pool.run({ - generatorName: 'ast-js', - fullInput: [], - itemIndices: [], - options: {}, - }); - - // Wait for first task to finish - await runningTask; - - // Terminate should clear any remaining queue - await pool.terminate(); - - strictEqual(pool.queue.length, 0); - }); - - it('should handle multiple terminates gracefully', async () => { - const pool = new WorkerPool(workerPath, 2); - - await pool.run({ - generatorName: 'ast-js', - fullInput: [], - itemIndices: [], - options: {}, - }); - - await pool.terminate(); - await pool.terminate(); // Second terminate should not throw - - strictEqual(pool.allWorkers.size, 0); - }); - - it('should spawn workers up to thread limit only', async () => { - const pool = new WorkerPool(workerPath, 2); - - // Queue 4 tasks with limit of 2 threads - const tasks = Array.from({ length: 4 }, () => - pool.run({ - generatorName: 'ast-js', - fullInput: [], - itemIndices: [], - options: {}, - }) - ); - - await Promise.all(tasks); - - // After all tasks complete, should have at most 2 workers - ok(pool.allWorkers.size <= 2); - - await pool.terminate(); - }); - - it('should process tasks in FIFO order when queued', async () => { - const pool = new WorkerPool(workerPath, 1); - - const order = []; - - // Queue 3 tasks with single thread - const task1 = pool - .run({ - generatorName: 'ast-js', - fullInput: [], - itemIndices: [], - options: {}, - }) - .then(() => order.push(1)); - - const task2 = pool - .run({ - generatorName: 'ast-js', - fullInput: [], - itemIndices: [], - options: {}, - }) - .then(() => order.push(2)); - - const task3 = pool - .run({ - generatorName: 'ast-js', - fullInput: [], - itemIndices: [], - options: {}, - }) - .then(() => order.push(3)); - - await Promise.all([task1, task2, task3]); - - // Tasks should complete in order they were queued - deepStrictEqual(order, [1, 2, 3]); - - await pool.terminate(); - }); -}); diff --git a/src/threading/__tests__/parallel.test.mjs b/src/threading/__tests__/parallel.test.mjs index f09cfca6..41b04c6f 100644 --- a/src/threading/__tests__/parallel.test.mjs +++ b/src/threading/__tests__/parallel.test.mjs @@ -1,7 +1,7 @@ -import { deepStrictEqual, ok, rejects, strictEqual } from 'node:assert'; +import { deepStrictEqual, ok, strictEqual } from 'node:assert'; import { describe, it } from 'node:test'; -import WorkerPool from '../index.mjs'; +import createWorkerPool from '../index.mjs'; import createParallelWorker from '../parallel.mjs'; /** @@ -39,131 +39,37 @@ async function collectChunks(generator) { } describe('createParallelWorker', () => { - // Use relative path from WorkerPool's location (src/threading/) - const workerPath = './chunk-worker.mjs'; - it('should create a ParallelWorker with stream method', async () => { - const pool = new WorkerPool(workerPath, 2); - + const pool = createWorkerPool(2); const worker = createParallelWorker('metadata', pool, { threads: 2 }); ok(worker); strictEqual(typeof worker.stream, 'function'); - await pool.terminate(); - }); - - it('should use main thread for single-threaded execution', async () => { - const pool = new WorkerPool(workerPath, 1); - - const worker = createParallelWorker('ast-js', pool, { threads: 1 }); - const items = []; - const results = await collectStream(worker.stream(items, items, {})); - - ok(Array.isArray(results)); - strictEqual(results.length, 0); - - await pool.terminate(); - }); - - it('should use main thread when threads is 1', async () => { - const pool = new WorkerPool(workerPath, 4); - - const worker = createParallelWorker('ast-js', pool, { threads: 1 }); - const items = []; - const results = await collectStream(worker.stream(items, items, {})); - - ok(Array.isArray(results)); - strictEqual(results.length, 0); - - await pool.terminate(); - }); - - it('should stream chunks for parallel processing', async () => { - const pool = new WorkerPool(workerPath, 2); - - const worker = createParallelWorker('ast-js', pool, { threads: 2 }); - const items = []; - - const results = await collectStream(worker.stream(items, items, {})); - - strictEqual(results.length, 0); - ok(Array.isArray(results)); - - await pool.terminate(); - }); - - it('should pass extra options to worker', async () => { - const pool = new WorkerPool(workerPath, 1); - - const worker = createParallelWorker('ast-js', pool, { threads: 1 }); - const extra = { gitRef: 'main', customOption: 'value' }; - const items = []; - - const results = await collectStream(worker.stream(items, items, extra)); - - ok(Array.isArray(results)); - - await pool.terminate(); - }); - - it('should serialize and deserialize data correctly', async () => { - const pool = new WorkerPool(workerPath, 2); - - const worker = createParallelWorker('ast-js', pool, { threads: 2 }); - const items = []; - - const results = await collectStream(worker.stream(items, items, {})); - - ok(Array.isArray(results)); - - await pool.terminate(); + await pool.destroy(); }); it('should handle empty items array', async () => { - const pool = new WorkerPool(workerPath, 2); - - const worker = createParallelWorker('ast-js', pool, { threads: 2 }); - const results = await collectStream(worker.stream([], [], {})); - - deepStrictEqual(results, []); - - await pool.terminate(); - }); - - it('should throw for generators without processChunk', async () => { - const pool = new WorkerPool(workerPath, 2); - - // 'json-simple' doesn't have processChunk - const worker = createParallelWorker('json-simple', pool, { + const pool = createWorkerPool(2); + const worker = createParallelWorker('ast-js', pool, { threads: 2, - chunkSize: 5, + chunkSize: 10, }); - // Non-empty items array to trigger processChunk check - const items = [{ file: { stem: 'test' }, tree: {} }]; + const results = await collectStream(worker.stream([], [], {})); - await rejects( - async () => { - await collectStream(worker.stream(items, items, {})); - }, - { - message: /does not support chunk processing/, - } - ); + deepStrictEqual(results, []); - await pool.terminate(); + await pool.destroy(); }); it('should distribute items to multiple worker threads', async () => { - const pool = new WorkerPool(workerPath, 4); - + const pool = createWorkerPool(4); const worker = createParallelWorker('metadata', pool, { threads: 4, - chunkSize: 20, // Large chunk size, but optimal calculation will use 1 per thread + chunkSize: 1, }); - // Create mock input that matches expected shape for metadata generator const mockInput = [ { file: { stem: 'test1', basename: 'test1.md' }, @@ -187,20 +93,17 @@ describe('createParallelWorker', () => { worker.stream(mockInput, mockInput, { typeMap: {} }) ); - // With 4 items and 4 threads, optimal chunk size is 1, so we get 4 chunks strictEqual(chunks.length, 4); - // Each chunk should be an array for (const chunk of chunks) { ok(Array.isArray(chunk)); } - await pool.terminate(); + await pool.destroy(); }); it('should yield results as chunks complete', async () => { - const pool = new WorkerPool(workerPath, 2); - + const pool = createWorkerPool(2); const worker = createParallelWorker('metadata', pool, { threads: 2, chunkSize: 1, @@ -221,17 +124,15 @@ describe('createParallelWorker', () => { worker.stream(mockInput, mockInput, { typeMap: {} }) ); - // With 2 items and chunkSize 1, should get 2 chunks strictEqual(chunks.length, 2); - await pool.terminate(); + await pool.destroy(); }); it('should work with single thread and items', async () => { - const pool = new WorkerPool(workerPath, 1); - + const pool = createWorkerPool(2); const worker = createParallelWorker('metadata', pool, { - threads: 1, + threads: 2, chunkSize: 5, }); @@ -246,17 +147,14 @@ describe('createParallelWorker', () => { worker.stream(mockInput, mockInput, { typeMap: {} }) ); - // Single thread mode yields one chunk strictEqual(chunks.length, 1); ok(Array.isArray(chunks[0])); - await pool.terminate(); + await pool.destroy(); }); it('should use sliceInput for metadata generator', async () => { - const pool = new WorkerPool(workerPath, 2); - - // metadata generator also has sliceInput = true + const pool = createWorkerPool(2); const worker = createParallelWorker('metadata', pool, { threads: 2, chunkSize: 1, @@ -277,9 +175,8 @@ describe('createParallelWorker', () => { worker.stream(mockInput, mockInput, { typeMap: {} }) ); - // Should process both items strictEqual(chunks.length, 2); - await pool.terminate(); + await pool.destroy(); }); }); diff --git a/src/threading/chunk-worker.mjs b/src/threading/chunk-worker.mjs index bbfc4012..4074d7b5 100644 --- a/src/threading/chunk-worker.mjs +++ b/src/threading/chunk-worker.mjs @@ -1,35 +1,24 @@ -import { parentPort } from 'node:worker_threads'; - import { allGenerators } from '../generators/index.mjs'; /** - * Handles incoming work requests from the parent thread. * Processes a chunk of items using the specified generator's processChunk method. + * This is the worker entry point for Piscina. * * @param {{ * generatorName: string, * fullInput: unknown[], * itemIndices: number[], * options: object - * }} opts - Task options from parent thread - * @returns {Promise} + * }} opts - Task options from Piscina + * @returns {Promise} The processed result */ -const handleWork = async opts => { - const { generatorName, fullInput, itemIndices, options } = opts; - - try { - const generator = allGenerators[generatorName]; - - const result = await generator.processChunk( - fullInput, - itemIndices, - options - ); - - parentPort.postMessage(result); - } catch (error) { - parentPort.postMessage({ error: error.message }); - } -}; +export default async function processChunk({ + generatorName, + fullInput, + itemIndices, + options, +}) { + const generator = allGenerators[generatorName]; -parentPort.on('message', handleWork); + return generator.processChunk(fullInput, itemIndices, options); +} diff --git a/src/threading/index.mjs b/src/threading/index.mjs index 91b58a6f..d1ba88d6 100644 --- a/src/threading/index.mjs +++ b/src/threading/index.mjs @@ -1,248 +1,28 @@ -import { Worker } from 'node:worker_threads'; +import Piscina from 'piscina'; import logger from '../logger/index.mjs'; const poolLogger = logger.child('WorkerPool'); +const workerScript = new URL('./chunk-worker.mjs', import.meta.url).href; + /** - * WorkerPool manages a pool of reusable Node.js worker threads for parallel processing. - * Workers are spawned on-demand and kept alive to process multiple tasks, avoiding - * the overhead of creating new workers for each task. - * - * Tasks are distributed to available workers. If all workers are busy, tasks are - * queued and processed in FIFO order as workers become free. + * Creates a Piscina worker pool for parallel processing. * - * @example - * const pool = new WorkerPool('./my-worker.mjs', 4); - * const result = await pool.run({ task: 'process', data: [1, 2, 3] }); + * @param {number} threads - Maximum number of worker threads + * @returns {import('piscina').Piscina} Configured Piscina instance */ -export default class WorkerPool { - /** - * Pool of idle workers ready to accept tasks. - * @type {Worker[]} - */ - idleWorkers = []; - - /** - * Set of all spawned workers (for cleanup). - * @type {Set} - */ - allWorkers = new Set(); - - /** - * Number of workers currently being spawned (to prevent over-spawning). - * @type {number} - */ - spawningCount = 0; - - /** - * Queue of pending tasks waiting for available workers. - * Each entry contains { workerData, resolve, reject }. - * @type {Array<{ workerData: object, resolve: Function, reject: Function }>} - */ - queue = []; - - /** - * URL to the worker script file. - * @type {URL} - */ - workerScript; - - /** - * Maximum number of concurrent worker threads. - * @type {number} - */ - threads; - - /** - * Creates a new WorkerPool instance. - * - * @param {string | URL} workerScript - Path to worker script file (relative to this module or absolute URL) - * @param {number} [threads=1] - Maximum concurrent worker threads - */ - constructor(workerScript = './generator-worker.mjs', threads = 1) { - this.workerScript = - workerScript instanceof URL - ? workerScript - : new URL(workerScript, import.meta.url); - - this.threads = threads; - - poolLogger.debug(`WorkerPool initialized`, { threads, workerScript }); - } - - /** - * Spawns a new worker and sets up message handling. - * The worker will be reused for multiple tasks. - * - * @private - * @returns {Worker} The newly spawned worker - */ - spawnWorker() { - const worker = new Worker(this.workerScript); - - this.allWorkers.add(worker); - - worker.on('message', result => { - // Get the current task before clearing it - const currentTask = worker.currentTask; - - worker.currentTask = null; - - // Resolve/reject the completed task first - if (currentTask) { - if (result?.error) { - currentTask.reject(new Error(result.error)); - } else { - currentTask.resolve(result); - } - } - - // Mark worker as idle and process any queued work - this.idleWorkers.push(worker); - this.processQueue(); - }); - - worker.on('error', err => { - poolLogger.debug(`Worker error`, { error: err.message }); - - // Remove failed worker from pool - this.allWorkers.delete(worker); - - const idx = this.idleWorkers.indexOf(worker); - - if (idx !== -1) { - this.idleWorkers.splice(idx, 1); - } - - // Reject current task if any - if (worker.currentTask) { - worker.currentTask.reject(err); - - worker.currentTask = null; - } - }); - - return worker; - } - - /** - * Executes a task on a specific worker. - * - * @private - * @param {Worker} worker - Worker to execute the task - * @param {object} workerData - Data to send to the worker - * @param {Function} resolve - Promise resolve function - * @param {Function} reject - Promise reject function - */ - executeTask(worker, workerData, resolve, reject) { - worker.currentTask = { resolve, reject }; - - worker.postMessage(workerData); - } - - /** - * Runs a task in a worker thread. If all workers are busy, the task - * is queued and executed when a worker becomes available. - * - * Workers are reused across tasks for efficiency. - * - * @template T - * @param {object} workerData - Data to pass to the worker thread - * @param {string} workerData.generatorName - Name of the generator to run - * @param {unknown} workerData.fullInput - Full input data for context - * @param {number[]} workerData.itemIndices - Indices of items to process - * @param {object} workerData.options - Generator options - * @returns {Promise} Resolves with the worker result, rejects on error - */ - run(workerData) { - return new Promise((resolve, reject) => { - // Always queue the task first - this.queue.push({ workerData, resolve, reject }); - - // Then try to process the queue - this.processQueue(); - }); - } - - /** - * Processes queued tasks by assigning them to available or new workers. - * Spawns all needed workers in parallel to minimize startup latency. - * - * @private - */ - processQueue() { - // First, assign tasks to any idle workers - while (this.queue.length > 0 && this.idleWorkers.length > 0) { - const worker = this.idleWorkers.pop(); - - const { workerData, resolve, reject } = this.queue.shift(); - - poolLogger.debug(`Task started (reusing worker)`, { - generator: workerData.generatorName, - idleWorkers: this.idleWorkers.length, - totalWorkers: this.allWorkers.size, - queueSize: this.queue.length, - }); - - this.executeTask(worker, workerData, resolve, reject); - } - - // Calculate how many new workers we need (account for workers being spawned) - const totalPendingWorkers = this.allWorkers.size + this.spawningCount; - - const workersNeeded = Math.min( - this.queue.length, - this.threads - totalPendingWorkers - ); - - if (workersNeeded > 0) { - poolLogger.debug(`Spawning workers in parallel`, { - workersNeeded, - currentWorkers: this.allWorkers.size, - spawning: this.spawningCount, - maxThreads: this.threads, - queueSize: this.queue.length, - }); - - // Spawn all needed workers in parallel (don't await, just fire them off) - for (let i = 0; i < workersNeeded; i++) { - const { workerData, resolve, reject } = this.queue.shift(); - - // Track that we're spawning a worker - this.spawningCount++; - - // Use setImmediate to spawn workers concurrently rather than blocking - setImmediate(() => { - this.spawningCount--; - - const worker = this.spawnWorker(); - - this.executeTask(worker, workerData, resolve, reject); - }); - } - } - - if (this.queue.length > 0) { - poolLogger.debug(`Tasks queued (waiting for workers)`, { - queueSize: this.queue.length, - totalWorkers: this.allWorkers.size, - }); - } - } - - /** - * Terminates all workers in the pool. - * Kills workers immediately without waiting for graceful shutdown. - */ - terminate() { - for (const worker of this.allWorkers) { - worker.terminate(); - } - - this.allWorkers.clear(); - this.idleWorkers = []; - this.queue = []; - this.spawningCount = 0; - } +export default function createWorkerPool(threads) { + poolLogger.debug(`WorkerPool initialized`, { + threads, + workerScript: './chunk-worker.mjs', + }); + + return new Piscina({ + filename: workerScript, + minThreads: threads, + maxThreads: threads, + maxQueue: threads * 2, + idleTimeout: Infinity, // Keep workers alive + }); } diff --git a/src/threading/parallel.mjs b/src/threading/parallel.mjs index d54f8d1e..08ea97c9 100644 --- a/src/threading/parallel.mjs +++ b/src/threading/parallel.mjs @@ -6,182 +6,106 @@ import logger from '../logger/index.mjs'; const parallelLogger = logger.child('parallel'); /** - * Splits a count of items into chunks of specified size. + * Splits items into chunks of specified size. * * @param {number} count - Total number of items * @param {number} size - Maximum items per chunk - * @returns {number[][]} Array of index arrays, each representing a chunk + * @returns {number[][]} Array of index arrays for each chunk */ -const createIndexChunks = (count, size) => { +const createChunks = (count, size) => { const chunks = []; - for (let start = 0; start < count; start += size) { - const end = Math.min(start + size, count); - const chunk = []; - - for (let i = start; i < end; i++) { - chunk.push(i); - } - - chunks.push(chunk); + for (let i = 0; i < count; i += size) { + chunks.push( + Array.from({ length: Math.min(size, count - i) }, (_, j) => i + j) + ); } return chunks; }; /** - * Creates an array of sequential indices from 0 to count-1. - * - * @param {number} count - Number of indices to create - * @returns {number[]} Array of indices [0, 1, 2, ..., count-1] - */ -const createIndices = count => Array.from({ length: count }, (_, i) => i); - -/** - * Yields results from an array of promises as they complete. - * Results are yielded in completion order, not input order. + * Prepares task data for a chunk, handling sliceInput optimization. * - * @template T - * @param {Promise[]} promises - Array of promises to race - * @yields {T} Results as they complete + * @param {object} generator - Generator with processChunk method + * @param {unknown[]} fullInput - Full input array + * @param {number[]} indices - Indices to process + * @param {object} options - Serialized options + * @param {string} generatorName - Name of the generator + * @returns {object} Task data for Piscina */ -async function* yieldAsCompleted(promises) { - if (promises.length === 0) { - return; - } - - // Wrap each promise to track completion and remove from pending set - const pending = new Set(); - - for (const promise of promises) { - const tagged = promise.then(result => { - pending.delete(tagged); - - return result; - }); - - pending.add(tagged); - } - - // Yield results as each promise completes - while (pending.size > 0) { - yield await Promise.race(pending); - } -} +const createTask = (generator, fullInput, indices, options, generatorName) => ({ + generatorName, + fullInput: generator.processChunk.sliceInput + ? indices.map(i => fullInput[i]) + : fullInput, + itemIndices: generator.processChunk.sliceInput + ? indices.map((_, i) => i) + : indices, + options, +}); /** - * Creates a ParallelWorker that uses Node.js Worker threads for parallel - * processing of items. The worker distributes work across multiple threads - * and streams results as chunks complete. + * Creates a parallel worker that distributes work across a Piscina thread pool. * - * @param {string} generatorName - Name of the generator for chunk processing - * @param {import('./index.mjs').default} pool - WorkerPool instance + * @param {keyof AllGenerators} generatorName - Generator name + * @param {import('piscina').Piscina} pool - Piscina instance * @param {Partial} options - Generator options * @returns {ParallelWorker} */ export default function createParallelWorker(generatorName, pool, options) { const { threads, chunkSize } = options; - const generator = allGenerators[generatorName]; - /** - * Strips non-serializable properties from options for worker transfer. - * - * @param {object} extra - Extra options to merge - * @returns {object} Serializable options object - */ + /** @param {object} extra */ const serializeOptions = extra => { - const serialized = { ...options, ...extra }; + const opts = { ...options, ...extra }; - delete serialized.worker; + delete opts.worker; - return serialized; + return opts; }; return { /** - * Processes items in parallel and yields each chunk's results as they complete. - * Enables pipeline parallelism where downstream generators can start processing - * results while upstream chunks are still being processed. + * Processes items in parallel, yielding results as chunks complete. * * @template T, R - * @param {T[]} items - Items to process (determines chunk distribution) - * @param {T[]} fullInput - Full input data for context rebuilding in workers - * @param {object} extra - Generator-specific context (e.g., apiTemplate) - * @yields {R[]} Each chunk's results as they complete + * @param {T[]} items - Items to process + * @param {T[]} fullInput - Full input for context + * @param {object} extra - Extra options + * @yields {R[]} Chunk results as they complete */ async *stream(items, fullInput, extra) { - const itemCount = items.length; - - if (itemCount === 0) { + if (items.length === 0) { return; } - if (!generator.processChunk) { - throw new Error( - `Generator "${generatorName}" does not support chunk processing` - ); - } - - // Single-threaded mode: process directly in main thread - if (threads <= 1) { - parallelLogger.debug(`Processing ${itemCount} items in main thread`, { - generator: generatorName, - }); - - const indices = createIndices(itemCount); + const chunks = createChunks(items.length, chunkSize); - const result = await generator.processChunk(fullInput, indices, { - ...options, - ...extra, - }); - - yield result; - - return; - } - - // Multi-threaded mode: distribute work across worker threads - // Calculate optimal chunk size to maximize thread utilization - // Use provided chunkSize as maximum, but create at least as many chunks as threads - const optimalChunkSize = Math.max(1, Math.ceil(itemCount / threads)); - const effectiveChunkSize = Math.min(chunkSize, optimalChunkSize); - const indexChunks = createIndexChunks(itemCount, effectiveChunkSize); + const opts = serializeOptions(extra); parallelLogger.debug( - `Distributing ${itemCount} items across ${threads} threads`, - { - generator: generatorName, - chunks: indexChunks.length, - chunkSize: effectiveChunkSize, - } + `Distributing ${items.length} items across ${chunks.length} chunks`, + { generator: generatorName, chunks: chunks.length, chunkSize, threads } ); - const chunkPromises = indexChunks.map(indices => { - // If generator's processChunk supports sliced input (doesn't need full context), - // send only the items at the specified indices to reduce serialization overhead - const inputData = generator.processChunk.sliceInput - ? indices.map(i => fullInput[i]) - : fullInput; - - return pool.run({ - generatorName, - fullInput: inputData, - itemIndices: generator.processChunk.sliceInput - ? indices.map((_, i) => i) // Renumber indices for sliced array - : indices, - options: serializeOptions(extra), - }); - }); - - // Yield results as each chunk completes - let completedChunks = 0; - - for await (const result of yieldAsCompleted(chunkPromises)) { - completedChunks++; + // Submit all tasks to Piscina and wrap with index tracking + const pending = chunks.map((indices, i) => + pool + .run(createTask(generator, fullInput, indices, opts, generatorName)) + .then(result => ({ i, result })) + ); + + // Yield results as they complete (true parallel collection) + for (let completed = 0; completed < chunks.length; completed++) { + const { i, result } = await Promise.race(pending); + + // Replace completed promise with one that never resolves + pending[i] = new Promise(() => {}); parallelLogger.debug( - `Chunk ${completedChunks}/${indexChunks.length} completed`, + `Chunk ${completed + 1}/${chunks.length} completed`, { generator: generatorName } ); From a5222d60768bb1f3ec42fb6227bb0d796f72e1d5 Mon Sep 17 00:00:00 2001 From: Claudio Wunder Date: Tue, 9 Dec 2025 02:02:37 +0100 Subject: [PATCH 05/25] fix: backpressure --- src/threading/index.mjs | 1 - 1 file changed, 1 deletion(-) diff --git a/src/threading/index.mjs b/src/threading/index.mjs index d1ba88d6..81744c02 100644 --- a/src/threading/index.mjs +++ b/src/threading/index.mjs @@ -22,7 +22,6 @@ export default function createWorkerPool(threads) { filename: workerScript, minThreads: threads, maxThreads: threads, - maxQueue: threads * 2, idleTimeout: Infinity, // Keep workers alive }); } From c92ac397b4fde9f23c8f64924db445b9a5a2e1fb Mon Sep 17 00:00:00 2001 From: Claudio Wunder Date: Tue, 9 Dec 2025 02:05:04 +0100 Subject: [PATCH 06/25] chore: updated build script --- scripts/vercel-build.sh | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/scripts/vercel-build.sh b/scripts/vercel-build.sh index 862d5340..720880a6 100755 --- a/scripts/vercel-build.sh +++ b/scripts/vercel-build.sh @@ -5,6 +5,9 @@ node bin/cli.mjs generate \ -t web \ -i "./node/doc/api/*.md" \ -o "./out" \ - --index "./node/doc/api/index.md" + -c "./node/CHANGELOG.md" \ + --index "./node/doc/api/index.md" \ + --type-map "./node/doc/type-map.json" \ + --log-level debug rm -rf node/ From 40cead832b8e97f4684b65ba6aab9abdc3f478f3 Mon Sep 17 00:00:00 2001 From: Claudio Wunder Date: Tue, 9 Dec 2025 02:08:34 +0100 Subject: [PATCH 07/25] chore: fallback type-map --- scripts/vercel-build.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/scripts/vercel-build.sh b/scripts/vercel-build.sh index 720880a6..6ca6cb2c 100755 --- a/scripts/vercel-build.sh +++ b/scripts/vercel-build.sh @@ -7,7 +7,6 @@ node bin/cli.mjs generate \ -o "./out" \ -c "./node/CHANGELOG.md" \ --index "./node/doc/api/index.md" \ - --type-map "./node/doc/type-map.json" \ --log-level debug rm -rf node/ From 9f42aa5dee92a032827a471b540853226df33092 Mon Sep 17 00:00:00 2001 From: Claudio Wunder Date: Tue, 9 Dec 2025 02:34:41 +0100 Subject: [PATCH 08/25] chore: streamlined legacy-json and ast generation --- bin/commands/generate.mjs | 18 ++-- src/generators.mjs | 11 ++- src/generators/__tests__/index.test.mjs | 13 ++- .../api-links/__tests__/fixtures.test.mjs | 18 ++-- src/generators/ast/index.mjs | 90 +++++++++++++++++++ src/generators/index.mjs | 2 + src/generators/jsx-ast/utils/buildContent.mjs | 5 +- src/generators/legacy-json/index.mjs | 59 ++++++------ src/generators/types.d.ts | 8 +- 9 files changed, 166 insertions(+), 58 deletions(-) create mode 100644 src/generators/ast/index.mjs diff --git a/bin/commands/generate.mjs b/bin/commands/generate.mjs index 7a4f04f5..69926bfa 100644 --- a/bin/commands/generate.mjs +++ b/bin/commands/generate.mjs @@ -6,20 +6,13 @@ import { coerce } from 'semver'; import { NODE_CHANGELOG_URL, NODE_VERSION } from '../../src/constants.mjs'; import { publicGenerators } from '../../src/generators/index.mjs'; import createGenerator from '../../src/generators.mjs'; +import logger from '../../src/logger/index.mjs'; import { parseChangelog, parseIndex } from '../../src/parsers/markdown.mjs'; import { DEFAULT_TYPE_MAP } from '../../src/utils/parser/constants.mjs'; import { loadFromURL } from '../../src/utils/parser.mjs'; -import { loadAndParse } from '../utils.mjs'; const availableGenerators = Object.keys(publicGenerators); -// Half of available logical CPUs guarantees in general all physical CPUs are being used -// which in most scenarios is the best way to maximize performance -// When spawning more than a said number of threads, the overhead of context switching -// and CPU contention starts to degrade performance rather than improve it. -// Therefore, we set the optimal threads to half the number of CPU cores, with a minimum of 6. -const optimalThreads = Math.max(cpus().length, 2); - /** * @typedef {Object} Options * @property {Array|string} input - Specifies the glob/path for input files. @@ -70,7 +63,7 @@ export default { prompt: { type: 'text', message: 'How many threads to allow', - initialValue: String(Math.max(optimalThreads, 2)), + initialValue: String(cpus().length), }, }, chunkSize: { @@ -146,7 +139,10 @@ export default { * @returns {Promise} */ async action(opts) { - const docs = await loadAndParse(opts.input, opts.ignore); + logger.debug('Starting doc-kit', opts); + + const { runGenerators } = createGenerator(); + const releases = await parseChangelog(opts.changelog); const rawTypeMap = await loadFromURL(opts.typeMap); @@ -154,7 +150,7 @@ export default { const index = opts.index && (await parseIndex(opts.index)); - const { runGenerators } = createGenerator(docs); + logger.debug(`Starting generation with targets: ${opts.target.join(', ')}`); await runGenerators({ generators: opts.target, diff --git a/src/generators.mjs b/src/generators.mjs index 45131908..0a76a29e 100644 --- a/src/generators.mjs +++ b/src/generators.mjs @@ -13,12 +13,11 @@ const generatorsLogger = logger.child('generators'); * documentation generators in dependency order, with support for parallel * processing and streaming results. * - * @param {ParserOutput} input - The API doc AST tree * @returns {{ runGenerators: (options: GeneratorOptions) => Promise }} */ -const createGenerator = input => { +const createGenerator = () => { /** @type {{ [key: string]: Promise | AsyncGenerator }} */ - const cachedGenerators = { ast: Promise.resolve(input) }; + const cachedGenerators = {}; const streamingCache = createStreamingCache(); @@ -28,10 +27,14 @@ const createGenerator = input => { /** * Gets the collected input from a dependency generator. * - * @param {string} dependsOn - Dependency generator name + * @param {string | undefined} dependsOn - Dependency generator name * @returns {Promise} */ const getDependencyInput = async dependsOn => { + if (!dependsOn) { + return undefined; + } + const result = await cachedGenerators[dependsOn]; if (isAsyncGenerator(result)) { diff --git a/src/generators/__tests__/index.test.mjs b/src/generators/__tests__/index.test.mjs index abcb5851..1e456b0a 100644 --- a/src/generators/__tests__/index.test.mjs +++ b/src/generators/__tests__/index.test.mjs @@ -5,7 +5,7 @@ import semver from 'semver'; import { allGenerators } from '../index.mjs'; -const validDependencies = [...Object.keys(allGenerators), 'ast']; +const validDependencies = Object.keys(allGenerators); const generatorEntries = Object.entries(allGenerators); describe('All Generators', () => { @@ -34,9 +34,18 @@ describe('All Generators', () => { if (generator.dependsOn) { assert.ok( validDependencies.includes(generator.dependsOn), - `Generator "${key}" depends on "${generator.dependsOn}" which is not a valid generator or 'ast'` + `Generator "${key}" depends on "${generator.dependsOn}" which is not a valid generator` ); } }); }); + + it('should have ast generator as a top-level generator with no dependencies', () => { + assert.ok(allGenerators.ast, 'ast generator should exist'); + assert.equal( + allGenerators.ast.dependsOn, + undefined, + 'ast generator should have no dependencies' + ); + }); }); diff --git a/src/generators/api-links/__tests__/fixtures.test.mjs b/src/generators/api-links/__tests__/fixtures.test.mjs index efd53de5..d20fdfd5 100644 --- a/src/generators/api-links/__tests__/fixtures.test.mjs +++ b/src/generators/api-links/__tests__/fixtures.test.mjs @@ -1,7 +1,7 @@ import { readdir } from 'node:fs/promises'; import { cpus } from 'node:os'; import { basename, extname, join } from 'node:path'; -import { describe, it } from 'node:test'; +import { after, before, describe, it } from 'node:test'; import createWorkerPool from '../../../threading/index.mjs'; import createParallelWorker from '../../../threading/parallel.mjs'; @@ -16,12 +16,20 @@ const sourceFiles = fixtures .map(fixture => join(FIXTURES_DIRECTORY, fixture)); describe('api links', () => { + const threads = cpus().length; + let pool; + + before(() => { + pool = createWorkerPool(threads); + }); + + after(async () => { + await pool.destroy(); + }); + describe('should work correctly for all fixtures', () => { sourceFiles.forEach(sourceFile => { it(`${basename(sourceFile)}`, async t => { - const threads = cpus().length; - const pool = createWorkerPool(threads); - const worker = createParallelWorker('ast-js', pool, { threads, chunkSize: 10, @@ -46,8 +54,6 @@ describe('api links', () => { } t.assert.snapshot(actualOutput); - - await pool.destroy(); }); }); }); diff --git a/src/generators/ast/index.mjs b/src/generators/ast/index.mjs new file mode 100644 index 00000000..67b3c6f0 --- /dev/null +++ b/src/generators/ast/index.mjs @@ -0,0 +1,90 @@ +'use strict'; + +import { readFile } from 'node:fs/promises'; +import { extname } from 'node:path'; + +import { globSync } from 'glob'; +import { VFile } from 'vfile'; + +import createQueries from '../../utils/queries/index.mjs'; +import { getRemark } from '../../utils/remark.mjs'; + +const remarkProcessor = getRemark(); + +const { updateStabilityPrefixToLink } = createQueries(); + +/** + * Parses a single markdown file into an AST. + * + * @param {string} filePath - Path to the markdown file + * @returns {Promise>} + */ +const parseMarkdownFile = async filePath => { + const fileContents = await readFile(filePath, 'utf-8'); + const vfile = new VFile({ path: filePath, value: fileContents }); + + // Normalizes all the Stability Index prefixes with Markdown links + updateStabilityPrefixToLink(vfile); + + // Parses the API doc into an AST tree using `unified` and `remark` + const tree = remarkProcessor.parse(vfile); + + return { file: { stem: vfile.stem, basename: vfile.basename }, tree }; +}; + +/** + * This generator parses Markdown API doc files into AST trees. + * It parallelizes the parsing across worker threads for better performance. + * + * @typedef {undefined} Input + * + * @type {GeneratorMetadata>>} + */ +export default { + name: 'ast', + + version: '1.0.0', + + description: 'Parses Markdown API doc files into AST trees', + + dependsOn: undefined, + + processChunk: Object.assign( + /** + * Process a chunk of markdown files in a worker thread. + * Loads and parses markdown files into AST representations. + * + * @param {string[]} inputSlice - Sliced input paths for this chunk + * @param {number[]} itemIndices - Indices into the sliced array + * @returns {Promise>>} + */ + async (inputSlice, itemIndices) => { + const results = []; + + for (const idx of itemIndices) { + const parsed = await parseMarkdownFile(inputSlice[idx]); + + results.push(parsed); + } + + return results; + }, + { sliceInput: true } + ), + + /** + * Generates AST trees from markdown input files. + * + * @param {Input} _ - Unused (top-level generator) + * @param {Partial} options + * @returns {AsyncGenerator>>} + */ + async *generate(_, { input = [], worker }) { + const files = globSync(input).filter(path => extname(path) === '.md'); + + // Parse markdown files in parallel using worker threads + for await (const chunkResult of worker.stream(files, files)) { + yield chunkResult; + } + }, +}; diff --git a/src/generators/index.mjs b/src/generators/index.mjs index cca7767e..09a4c1a0 100644 --- a/src/generators/index.mjs +++ b/src/generators/index.mjs @@ -2,6 +2,7 @@ import addonVerify from './addon-verify/index.mjs'; import apiLinks from './api-links/index.mjs'; +import ast from './ast/index.mjs'; import astJs from './ast-js/index.mjs'; import jsonSimple from './json-simple/index.mjs'; import jsxAst from './jsx-ast/index.mjs'; @@ -32,6 +33,7 @@ export const publicGenerators = { // These ones are special since they don't produce standard output, // and hence, we don't expose them to the CLI. const internalGenerators = { + ast, metadata, 'jsx-ast': jsxAst, 'ast-js': astJs, diff --git a/src/generators/jsx-ast/utils/buildContent.mjs b/src/generators/jsx-ast/utils/buildContent.mjs index 17b48bf9..5d92801c 100644 --- a/src/generators/jsx-ast/utils/buildContent.mjs +++ b/src/generators/jsx-ast/utils/buildContent.mjs @@ -295,10 +295,7 @@ const buildContent = async (metadataEntries, head, sideBarProps, remark) => { const ast = await remark.run(root); // The final MDX content is the expression in the Program's first body node - return { - ...ast.body[0].expression, - data: head, - }; + return { ...ast.body[0].expression, data: head }; }; export default buildContent; diff --git a/src/generators/legacy-json/index.mjs b/src/generators/legacy-json/index.mjs index 92f14c75..326fcaef 100644 --- a/src/generators/legacy-json/index.mjs +++ b/src/generators/legacy-json/index.mjs @@ -30,31 +30,31 @@ export default { dependsOn: 'metadata', - /** - * Process a chunk of items in a worker thread. - * Builds JSON sections - FS operations happen in generate(). - * - * @param {Input} fullInput - Full metadata input for context rebuilding - * @param {number[]} itemIndices - Indices of head nodes to process - * @param {Partial>} _options - Serializable options (unused) - * @returns {Promise} JSON sections for each processed module - */ - async processChunk(fullInput, itemIndices) { - const groupedModules = groupNodesByModule(fullInput); - - const headNodes = fullInput.filter(node => node.heading.depth === 1); - - const results = []; - - for (const idx of itemIndices) { - const head = headNodes[idx]; - const nodes = groupedModules.get(head.api); + processChunk: Object.assign( + /** + * Process a chunk of items in a worker thread. + * Builds JSON sections - FS operations happen in generate(). + * + * With sliceInput, each item is pre-grouped {head, nodes} - no need to + * recompute groupNodesByModule for every chunk. + * + * @param {Array<{head: ApiDocMetadataEntry, nodes: ApiDocMetadataEntry[]}>} slicedInput - Pre-sliced module data + * @param {number[]} itemIndices - Indices into the sliced array + * @returns {Promise} JSON sections for each processed module + */ + async (slicedInput, itemIndices) => { + const results = []; + + for (const idx of itemIndices) { + const { head, nodes } = slicedInput[idx]; + + results.push(buildSection(head, nodes)); + } - results.push(buildSection(head, nodes)); - } - - return results; - }, + return results; + }, + { sliceInput: true } + ), /** * Generates a legacy JSON file. @@ -64,11 +64,18 @@ export default { * @returns {AsyncGenerator>} */ async *generate(input, { output, worker }) { + const groupedModules = groupNodesByModule(input); + const headNodes = input.filter(node => node.heading.depth === 1); - const deps = { output }; + // Create sliced input: each item contains head + its module's entries + // This avoids sending all 4900+ entries to every worker + const slicedInput = headNodes.map(head => ({ + head, + nodes: groupedModules.get(head.api), + })); - for await (const chunkResult of worker.stream(headNodes, input, deps)) { + for await (const chunkResult of worker.stream(slicedInput, slicedInput)) { if (output) { for (const section of chunkResult) { const out = join(output, `${section.api}.json`); diff --git a/src/generators/types.d.ts b/src/generators/types.d.ts index d766afd7..f994f452 100644 --- a/src/generators/types.d.ts +++ b/src/generators/types.d.ts @@ -101,14 +101,12 @@ declare global { * If you pass `createGenerator` with ['react', 'html'], the 'react' generator will be executed first, * as it is a top level generator and then the 'html' generator would be executed after the 'react' generator. * - * The 'ast' generator is the top-level parser, and if 'ast' is passed to `dependsOn`, then the generator - * will be marked as a top-level generator. + * The 'ast' generator is the top-level parser for markdown files. It has no dependencies. * * The `ast-js` generator is the top-level parser for JavaScript files. It - * passes the ASTs for any JavaScript files given in the input. Like `ast`, - * any generator depending on it is marked as a top-level generator. + * passes the ASTs for any JavaScript files given in the input. */ - dependsOn: keyof AllGenerators | 'ast'; + dependsOn: keyof AllGenerators | undefined; /** * Generators are abstract and the different generators have different sort of inputs and outputs. From d049a41cdcc8daf9e5b2585a9d1315cb0e86322e Mon Sep 17 00:00:00 2001 From: Claudio Wunder Date: Tue, 9 Dec 2025 02:39:01 +0100 Subject: [PATCH 09/25] chore: tiny console improvements --- bin/commands/generate.mjs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/bin/commands/generate.mjs b/bin/commands/generate.mjs index 69926bfa..6f3064a8 100644 --- a/bin/commands/generate.mjs +++ b/bin/commands/generate.mjs @@ -145,12 +145,11 @@ export default { const releases = await parseChangelog(opts.changelog); - const rawTypeMap = await loadFromURL(opts.typeMap); - const typeMap = JSON.parse(rawTypeMap); + const typeMap = JSON.parse(await loadFromURL(opts.typeMap)); const index = opts.index && (await parseIndex(opts.index)); - logger.debug(`Starting generation with targets: ${opts.target.join(', ')}`); + logger.debug('Starting generation', { targets: opts.target }); await runGenerators({ generators: opts.target, From bdedcb29d1c51bdf7cd6404ac7c39a58e8f576f1 Mon Sep 17 00:00:00 2001 From: Claudio Wunder Date: Tue, 9 Dec 2025 02:41:16 +0100 Subject: [PATCH 10/25] chore: delete resolved promises --- src/threading/parallel.mjs | 32 +++++++++++++++++++------------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/src/threading/parallel.mjs b/src/threading/parallel.mjs index 08ea97c9..41706b1c 100644 --- a/src/threading/parallel.mjs +++ b/src/threading/parallel.mjs @@ -90,24 +90,30 @@ export default function createParallelWorker(generatorName, pool, options) { { generator: generatorName, chunks: chunks.length, chunkSize, threads } ); - // Submit all tasks to Piscina and wrap with index tracking - const pending = chunks.map((indices, i) => - pool - .run(createTask(generator, fullInput, indices, opts, generatorName)) - .then(result => ({ i, result })) + // Submit all tasks to Piscina - each promise resolves to itself for removal + const pending = new Set( + chunks.map(indices => { + const promise = pool + .run(createTask(generator, fullInput, indices, opts, generatorName)) + .then(result => ({ promise, result })); + + return promise; + }) ); // Yield results as they complete (true parallel collection) - for (let completed = 0; completed < chunks.length; completed++) { - const { i, result } = await Promise.race(pending); + let completed = 0; - // Replace completed promise with one that never resolves - pending[i] = new Promise(() => {}); + while (pending.size > 0) { + const { promise, result } = await Promise.race(pending); - parallelLogger.debug( - `Chunk ${completed + 1}/${chunks.length} completed`, - { generator: generatorName } - ); + pending.delete(promise); + + completed++; + + parallelLogger.debug(`Chunk ${completed}/${chunks.length} completed`, { + generator: generatorName, + }); yield result; } From 3676bb4ddc8a116f6265fd65cc1c5ca890027ef9 Mon Sep 17 00:00:00 2001 From: Claudio Wunder Date: Tue, 9 Dec 2025 03:01:52 +0100 Subject: [PATCH 11/25] chore: perf improvement for web gen --- src/generators/web/utils/processing.mjs | 71 ++++++++++++++++--------- 1 file changed, 46 insertions(+), 25 deletions(-) diff --git a/src/generators/web/utils/processing.mjs b/src/generators/web/utils/processing.mjs index 2c61211f..a2839f6f 100644 --- a/src/generators/web/utils/processing.mjs +++ b/src/generators/web/utils/processing.mjs @@ -7,6 +7,38 @@ import { SPECULATION_RULES } from '../constants.mjs'; import bundleCode from './bundle.mjs'; import { createChunkedRequire } from './chunks.mjs'; +/** + * Converts JSX AST entries to server and client JavaScript code. + * This is the CPU-intensive step that can be parallelized. + * + * @param {Array} entries - JSX AST entries + * @param {function} buildServerProgram - Wraps code for server execution + * @param {function} buildClientProgram - Wraps code for client hydration + * @returns {{serverCodeMap: Map, clientCodeMap: Map}} + */ +export function convertJSXToCode( + entries, + { buildServerProgram, buildClientProgram } +) { + const serverCodeMap = new Map(); + const clientCodeMap = new Map(); + + for (const entry of entries) { + const fileName = `${entry.data.api}.jsx`; + + // Convert AST to JavaScript string with JSX syntax + const { value: code } = toJs(entry, { handlers: jsx }); + + // Prepare code for server-side execution (wrapped for SSR) + serverCodeMap.set(fileName, buildServerProgram(code)); + + // Prepare code for client-side execution (wrapped for hydration) + clientCodeMap.set(fileName, buildClientProgram(code)); + } + + return { serverCodeMap, clientCodeMap }; +} + /** * Executes server-side JavaScript code in an isolated context with virtual module support. * @@ -56,38 +88,27 @@ export async function executeServerCode(serverCodeMap, requireFn) { export async function processJSXEntries( entries, template, - { buildServerProgram, buildClientProgram }, + astBuilders, requireFn, { version } ) { - const serverCodeMap = new Map(); - const clientCodeMap = new Map(); - - // Convert JSX AST to JavaScript for both server and client - for (const entry of entries) { - const fileName = `${entry.data.api}.jsx`; - - // Convert AST to JavaScript string with JSX syntax - const { value: code } = toJs(entry, { handlers: jsx }); - - // Prepare code for server-side execution (wrapped for SSR) - serverCodeMap.set(fileName, buildServerProgram(code)); - - // Prepare code for client-side execution (wrapped for hydration) - clientCodeMap.set(fileName, buildClientProgram(code)); - } - - // Execute all server code at once to get dehydrated HTML - const serverBundle = await executeServerCode(serverCodeMap, requireFn); - - // Bundle all client code at once (with code splitting for shared chunks) - const clientBundle = await bundleCode(clientCodeMap); + // Step 1: Convert JSX AST to JavaScript (CPU-intensive, could be parallelized) + const { serverCodeMap, clientCodeMap } = convertJSXToCode( + entries, + astBuilders + ); + + // Step 2: Bundle server and client code IN PARALLEL + // Both need all entries for code-splitting, but are independent of each other + const [serverBundle, clientBundle] = await Promise.all([ + executeServerCode(serverCodeMap, requireFn), + bundleCode(clientCodeMap), + ]); const titleSuffix = `Node.js v${version.version} Documentation`; - const speculationRulesString = JSON.stringify(SPECULATION_RULES, null, 2); - // Process each entry to create final HTML + // Step 3: Create final HTML (could be parallelized in workers) const results = entries.map(({ data: { api, heading } }) => { const fileName = `${api}.js`; From 583564a676c91572c97094f6b1beaeaa0028b4fa Mon Sep 17 00:00:00 2001 From: Claudio Wunder Date: Tue, 9 Dec 2025 03:06:52 +0100 Subject: [PATCH 12/25] chore: cleanup --- src/__tests__/generators.test.mjs | 26 +++++++++----------------- src/generators/legacy-html/index.mjs | 4 ---- 2 files changed, 9 insertions(+), 21 deletions(-) diff --git a/src/__tests__/generators.test.mjs b/src/__tests__/generators.test.mjs index c9a91d8a..7464784e 100644 --- a/src/__tests__/generators.test.mjs +++ b/src/__tests__/generators.test.mjs @@ -4,14 +4,6 @@ import { describe, it } from 'node:test'; import createGenerator from '../generators.mjs'; describe('createGenerator', () => { - // Simple mock input for testing - const mockInput = [ - { - file: { stem: 'test', basename: 'test.md' }, - tree: { type: 'root', children: [] }, - }, - ]; - // Mock options with minimal required fields const mockOptions = { input: '/tmp/test', @@ -27,14 +19,14 @@ describe('createGenerator', () => { }; it('should create a generator orchestrator with runGenerators method', () => { - const { runGenerators } = createGenerator(mockInput); + const { runGenerators } = createGenerator(); ok(runGenerators); strictEqual(typeof runGenerators, 'function'); }); it('should return the ast input directly when generators list is empty', async () => { - const { runGenerators } = createGenerator(mockInput); + const { runGenerators } = createGenerator(); const results = await runGenerators({ ...mockOptions, @@ -48,7 +40,7 @@ describe('createGenerator', () => { }); it('should run metadata generator', async () => { - const { runGenerators } = createGenerator(mockInput); + const { runGenerators } = createGenerator(); const results = await runGenerators({ ...mockOptions, @@ -62,7 +54,7 @@ describe('createGenerator', () => { }); it('should handle generator with dependency', async () => { - const { runGenerators } = createGenerator(mockInput); + const { runGenerators } = createGenerator(); // legacy-html depends on metadata const results = await runGenerators({ @@ -76,7 +68,7 @@ describe('createGenerator', () => { }); it('should skip already scheduled generators', async () => { - const { runGenerators } = createGenerator(mockInput); + const { runGenerators } = createGenerator(); // Running with ['metadata', 'metadata'] should skip the second const results = await runGenerators({ @@ -90,7 +82,7 @@ describe('createGenerator', () => { }); it('should handle multiple generators in sequence', async () => { - const { runGenerators } = createGenerator(mockInput); + const { runGenerators } = createGenerator(); // Run metadata - just one generator const results = await runGenerators({ @@ -104,7 +96,7 @@ describe('createGenerator', () => { }); it('should collect async generator results for dependents', async () => { - const { runGenerators } = createGenerator(mockInput); + const { runGenerators } = createGenerator(); // legacy-json depends on metadata (async generator) const results = await runGenerators({ @@ -117,7 +109,7 @@ describe('createGenerator', () => { }); it('should use multiple threads when specified', async () => { - const { runGenerators } = createGenerator(mockInput); + const { runGenerators } = createGenerator(); const results = await runGenerators({ ...mockOptions, @@ -132,7 +124,7 @@ describe('createGenerator', () => { }); it('should pass options to generators', async () => { - const { runGenerators } = createGenerator(mockInput); + const { runGenerators } = createGenerator(); const customTypeMap = { TestType: 'https://example.com/TestType' }; diff --git a/src/generators/legacy-html/index.mjs b/src/generators/legacy-html/index.mjs index 6877f179..b7e589b5 100644 --- a/src/generators/legacy-html/index.mjs +++ b/src/generators/legacy-html/index.mjs @@ -137,11 +137,7 @@ export default { ); const deps = { - index, - releases, version, - output, - apiTemplate, parsedSideNav: String(parsedSideNav), }; From 8292801e4e14da6777a5578d2f0dedacdf88e521 Mon Sep 17 00:00:00 2001 From: Claudio Wunder Date: Tue, 9 Dec 2025 03:09:46 +0100 Subject: [PATCH 13/25] chore: slice input approach on legacy-html --- src/generators/legacy-html/index.mjs | 128 +++++++++++++++------------ 1 file changed, 71 insertions(+), 57 deletions(-) diff --git a/src/generators/legacy-html/index.mjs b/src/generators/legacy-html/index.mjs index b7e589b5..61287702 100644 --- a/src/generators/legacy-html/index.mjs +++ b/src/generators/legacy-html/index.mjs @@ -52,63 +52,63 @@ export default { dependsOn: 'metadata', - /** - * Process a chunk of items in a worker thread. - * Builds HTML template objects - FS operations happen in generate(). - * - * @param {Input} fullInput - Full metadata input for context rebuilding - * @param {number[]} itemIndices - Indices of head nodes to process - * @param {Partial>} options - Serializable options - * @returns {Promise} Template objects for each processed module - */ - async processChunk(fullInput, itemIndices, { version, parsedSideNav }) { - const groupedModules = groupNodesByModule(fullInput); - - const headNodes = fullInput - .filter(node => node.heading.depth === 1) - .sort((a, b) => a.heading.data.name.localeCompare(b.heading.data.name)); - - const results = []; - - for (const idx of itemIndices) { - const head = headNodes[idx]; - const nodes = groupedModules.get(head.api); - - const activeSideNav = String(parsedSideNav).replace( - `class="nav-${head.api}`, - `class="nav-${head.api} active` - ); - - const parsedToC = remarkRehypeProcessor.processSync( - tableOfContents(nodes, { - maxDepth: 4, - parser: tableOfContents.parseToCNode, - }) - ); - - const parsedContent = buildContent( - headNodes, - nodes, - remarkRehypeProcessor - ); - - const apiAsHeading = head.api.charAt(0).toUpperCase() + head.api.slice(1); - - const template = { - api: head.api, - added: head.introduced_in ?? '', - section: head.heading.data.name || apiAsHeading, - version: `v${version.version}`, - toc: String(parsedToC), - nav: String(activeSideNav), - content: parsedContent, - }; - - results.push(template); - } + processChunk: Object.assign( + /** + * Process a chunk of items in a worker thread. + * Builds HTML template objects - FS operations happen in generate(). + * + * With sliceInput, each item is pre-grouped {head, nodes, headNodes} - no need to + * recompute groupNodesByModule for every chunk. + * + * @param {Array<{head: ApiDocMetadataEntry, nodes: ApiDocMetadataEntry[], headNodes: ApiDocMetadataEntry[]}>} slicedInput - Pre-sliced module data + * @param {number[]} itemIndices - Indices into the sliced array + * @param {{version: string, parsedSideNav: string}} options - Dependencies passed from generate() + * @returns {Promise} Template objects for each processed module + */ + async (slicedInput, itemIndices, { version, parsedSideNav }) => { + const results = []; + + for (const idx of itemIndices) { + const { head, nodes, headNodes } = slicedInput[idx]; + + const activeSideNav = String(parsedSideNav).replace( + `class="nav-${head.api}`, + `class="nav-${head.api} active` + ); + + const parsedToC = remarkRehypeProcessor.processSync( + tableOfContents(nodes, { + maxDepth: 4, + parser: tableOfContents.parseToCNode, + }) + ); + + const parsedContent = buildContent( + headNodes, + nodes, + remarkRehypeProcessor + ); + + const apiAsHeading = + head.api.charAt(0).toUpperCase() + head.api.slice(1); + + const template = { + api: head.api, + added: head.introduced_in ?? '', + section: head.heading.data.name || apiAsHeading, + version: `v${version.version}`, + toc: String(parsedToC), + nav: String(activeSideNav), + content: parsedContent, + }; + + results.push(template); + } - return results; - }, + return results; + }, + { sliceInput: true } + ), /** * Generates the legacy version of the API docs in HTML @@ -121,6 +121,8 @@ export default { const apiTemplate = await readFile(join(baseDir, 'template.html'), 'utf-8'); + const groupedModules = groupNodesByModule(input); + const headNodes = input .filter(node => node.heading.depth === 1) .sort((a, b) => a.heading.data.name.localeCompare(b.heading.data.name)); @@ -136,6 +138,14 @@ export default { }) ); + // Create sliced input: each item contains head + its module's entries + headNodes reference + // This avoids sending all ~4900 entries to every worker and recomputing groupings + const slicedInput = headNodes.map(head => ({ + head, + nodes: groupedModules.get(head.api), + headNodes, + })); + const deps = { version, parsedSideNav: String(parsedSideNav), @@ -156,7 +166,11 @@ export default { } // Stream chunks as they complete - HTML files are written immediately - for await (const chunkResult of worker.stream(headNodes, input, deps)) { + for await (const chunkResult of worker.stream( + slicedInput, + slicedInput, + deps + )) { // Write files for this chunk in the generate method (main thread) if (output) { for (const template of chunkResult) { From e97f09ecc0b37ed8c14567d534f5c6200ee25185 Mon Sep 17 00:00:00 2001 From: Claudio Wunder Date: Tue, 9 Dec 2025 03:34:02 +0100 Subject: [PATCH 14/25] chore: code cleanup --- src/generators/legacy-html/index.mjs | 2 +- src/generators/metadata/index.mjs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/generators/legacy-html/index.mjs b/src/generators/legacy-html/index.mjs index 61287702..cbbd7514 100644 --- a/src/generators/legacy-html/index.mjs +++ b/src/generators/legacy-html/index.mjs @@ -62,7 +62,7 @@ export default { * * @param {Array<{head: ApiDocMetadataEntry, nodes: ApiDocMetadataEntry[], headNodes: ApiDocMetadataEntry[]}>} slicedInput - Pre-sliced module data * @param {number[]} itemIndices - Indices into the sliced array - * @param {{version: string, parsedSideNav: string}} options - Dependencies passed from generate() + * @param {{version: SemVer, parsedSideNav: string}} deps - Dependencies passed from generate() * @returns {Promise} Template objects for each processed module */ async (slicedInput, itemIndices, { version, parsedSideNav }) => { diff --git a/src/generators/metadata/index.mjs b/src/generators/metadata/index.mjs index 2fd88380..4fcb6912 100644 --- a/src/generators/metadata/index.mjs +++ b/src/generators/metadata/index.mjs @@ -24,7 +24,7 @@ export default { * * @param {Input} fullInput - Full input array (parsed API doc files) * @param {number[]} itemIndices - Indices of files to process - * @param {Partial>} options - Serializable options + * @param {{typeMap: Record}} deps - Dependencies passed from generate() * @returns {Promise} Metadata entries for processed files */ processChunk: Object.assign( From 9828ed259a2118c225c218262dc9977e23b65c7e Mon Sep 17 00:00:00 2001 From: Claudio Wunder Date: Tue, 9 Dec 2025 11:40:58 +0100 Subject: [PATCH 15/25] chore: self review and improvements --- bin/cli.mjs | 13 +- bin/commands/generate.mjs | 21 ++- bin/commands/index.mjs | 3 +- bin/commands/interactive.mjs | 231 ++++++++++++----------- bin/utils.mjs | 54 ------ src/generators/ast-js/index.mjs | 37 ++-- src/generators/ast/index.mjs | 44 ++--- src/generators/jsx-ast/index.mjs | 82 ++++---- src/generators/legacy-html/index.mjs | 108 +++++------ src/generators/legacy-json-all/index.mjs | 15 +- src/generators/legacy-json/index.mjs | 45 +++-- src/generators/metadata/index.mjs | 21 +-- src/generators/web/index.mjs | 2 +- src/loaders/markdown.mjs | 47 ----- src/parsers/markdown.mjs | 55 ------ src/threading/chunk-worker.mjs | 9 +- src/threading/parallel.mjs | 19 +- 17 files changed, 312 insertions(+), 494 deletions(-) delete mode 100644 src/loaders/markdown.mjs diff --git a/bin/cli.mjs b/bin/cli.mjs index 70bc2727..4a1c4769 100755 --- a/bin/cli.mjs +++ b/bin/cli.mjs @@ -5,7 +5,6 @@ import process from 'node:process'; import { Command, Option } from 'commander'; import commands from './commands/index.mjs'; -import interactive from './commands/interactive.mjs'; import { errorWrap } from './utils.mjs'; import logger from '../src/logger/index.mjs'; @@ -22,11 +21,9 @@ program.addOption( // Set log level before any command runs program.hook('preAction', thisCommand => { - const logLevel = thisCommand.opts().logLevel; + const { logLevel } = thisCommand.opts(); - if (logLevel) { - logger.setLogLevel(logLevel); - } + logger.setLogLevel(logLevel); }); // Registering commands @@ -54,11 +51,5 @@ commands.forEach(({ name, description, options, action }) => { cmd.action(errorWrap(action)); }); -// Register the interactive command -program - .command('interactive') - .description('Launch guided CLI wizard') - .action(errorWrap(interactive)); - // Parse and execute command-line arguments program.parse(process.argv); diff --git a/bin/commands/generate.mjs b/bin/commands/generate.mjs index 6f3064a8..9ff34a75 100644 --- a/bin/commands/generate.mjs +++ b/bin/commands/generate.mjs @@ -14,16 +14,17 @@ import { loadFromURL } from '../../src/utils/parser.mjs'; const availableGenerators = Object.keys(publicGenerators); /** - * @typedef {Object} Options - * @property {Array|string} input - Specifies the glob/path for input files. - * @property {Array|string} [ignore] - Specifies the glob/path for ignoring files. - * @property {Array} target - Specifies the generator target mode. - * @property {string} version - Specifies the target Node.js version. - * @property {string} changelog - Specifies the path to the Node.js CHANGELOG.md file. - * @property {string} typeMap - Specifies the path to the Node.js Type Map. - * @property {string} [gitRef] - Git ref/commit URL. - * @property {number} [threads] - Number of threads to allow. - * @property {number} [chunkSize] - Number of items to process per worker thread. + * @typedef {{ + * input: Array | string; + * ignore?: Array | string; + * target: Array; + * version: string; + * changelog: string; + * typeMap: string; + * gitRef?: string; + * threads?: number; + * chunkSize?: number; + * }} Options */ /** diff --git a/bin/commands/index.mjs b/bin/commands/index.mjs index 3e6d9d97..ece48157 100644 --- a/bin/commands/index.mjs +++ b/bin/commands/index.mjs @@ -1,3 +1,4 @@ import generate from './generate.mjs'; +import interactive from './interactive.mjs'; -export default [generate]; +export default [generate, interactive]; diff --git a/bin/commands/interactive.mjs b/bin/commands/interactive.mjs index 618c9593..fef401cb 100644 --- a/bin/commands/interactive.mjs +++ b/bin/commands/interactive.mjs @@ -12,7 +12,6 @@ import { cancel, } from '@clack/prompts'; -import commands from './index.mjs'; import logger from '../../src/logger/index.mjs'; /** @@ -53,127 +52,143 @@ function escapeShellArg(arg) { } /** - * Main interactive function for the API Docs Tooling command line interface. - * Guides the user through a series of prompts, validates inputs, and generates a command to run. - * @returns {Promise} Resolves once the command is generated and executed. + * @type {import('../utils.mjs').Command} */ -export default async function interactive() { - // Step 1: Introduction to the tool - intro('Welcome to API Docs Tooling'); - - // Step 2: Choose the action based on available command definitions - const actionOptions = commands.map(({ description }, i) => ({ - label: description, - value: i, - })); - - const selectedAction = await select({ - message: 'What would you like to do?', - options: actionOptions, - }); - - if (isCancel(selectedAction)) { - cancel('Cancelled.'); - process.exit(0); - } - - // Retrieve the options for the selected action - const { options, name } = commands[selectedAction]; - const answers = {}; // Store answers from user prompts - - // Step 3: Collect input for each option - for (const [key, { prompt }] of Object.entries(options)) { - let response; - const promptMessage = getMessage(prompt); - - switch (prompt.type) { - case 'text': - response = await text({ - message: promptMessage, - initialValue: prompt.initialValue || '', - validate: prompt.required ? requireValue : undefined, - }); - if (response) { - // Store response; split into an array if variadic - answers[key] = prompt.variadic - ? response.split(',').map(s => s.trim()) - : response; - } - break; - - case 'confirm': - response = await confirm({ - message: promptMessage, - initialValue: prompt.initialValue, - }); - answers[key] = response; - break; - - case 'multiselect': - response = await multiselect({ - message: promptMessage, - options: prompt.options, - required: !!prompt.required, - }); - answers[key] = response; - break; - - case 'select': - response = await select({ - message: promptMessage, - options: prompt.options, - }); - answers[key] = response; - break; - } +export default { + name: 'interactive', + description: 'Launch guided CLI wizard', + options: {}, + /** + * Main interactive function for the API Docs Tooling command line interface. + * Guides the user through a series of prompts, validates inputs, and generates a command to run. + * @returns {Promise} Resolves once the command is generated and executed. + */ + async action() { + // Import commands dynamically to avoid circular dependency + const { default: commands } = await import('./index.mjs'); + + // Filter out the interactive command itself + const availableCommands = commands.filter( + cmd => cmd.name !== 'interactive' + ); + + // Step 1: Introduction to the tool + intro('Welcome to API Docs Tooling'); + + // Step 2: Choose the action based on available command definitions + const actionOptions = availableCommands.map((cmd, i) => ({ + label: cmd.description, + value: i, + })); + + const selectedAction = await select({ + message: 'What would you like to do?', + options: actionOptions, + }); - // Handle cancellation - if (isCancel(response)) { + if (isCancel(selectedAction)) { cancel('Cancelled.'); process.exit(0); } - } - // Step 4: Build the final command by escaping values - const cmdParts = ['npx', 'doc-kit', name]; - const executionArgs = [name]; + // Retrieve the options for the selected action + const { options, name } = availableCommands[selectedAction]; + const answers = {}; // Store answers from user prompts + + // Step 3: Collect input for each option + for (const [key, { prompt }] of Object.entries(options)) { + let response; + const promptMessage = getMessage(prompt); + + switch (prompt.type) { + case 'text': + response = await text({ + message: promptMessage, + initialValue: prompt.initialValue || '', + validate: prompt.required ? requireValue : undefined, + }); + if (response) { + // Store response; split into an array if variadic + answers[key] = prompt.variadic + ? response.split(',').map(s => s.trim()) + : response; + } + break; + + case 'confirm': + response = await confirm({ + message: promptMessage, + initialValue: prompt.initialValue, + }); + answers[key] = response; + break; + + case 'multiselect': + response = await multiselect({ + message: promptMessage, + options: prompt.options, + required: !!prompt.required, + }); + answers[key] = response; + break; + + case 'select': + response = await select({ + message: promptMessage, + options: prompt.options, + }); + answers[key] = response; + break; + } - for (const [key, { flags }] of Object.entries(options)) { - const value = answers[key]; - // Skip empty values - if (value == null || (Array.isArray(value) && value.length === 0)) { - continue; + // Handle cancellation + if (isCancel(response)) { + cancel('Cancelled.'); + process.exit(0); + } } - const flag = flags[0].split(/[\s,]+/)[0]; // Use the first flag + // Step 4: Build the final command by escaping values + const cmdParts = ['npx', 'doc-kit', name]; + const executionArgs = [name]; - // Handle different value types (boolean, array, string) - if (typeof value === 'boolean') { - if (value) { - cmdParts.push(flag); - executionArgs.push(flag); + for (const [key, { flags }] of Object.entries(options)) { + const value = answers[key]; + // Skip empty values + if (value == null || (Array.isArray(value) && value.length === 0)) { + continue; } - } else if (Array.isArray(value)) { - for (const item of value) { - cmdParts.push(flag, escapeShellArg(item)); - executionArgs.push(flag, item); + + const flag = flags[0].split(/[\s,]+/)[0]; // Use the first flag + + // Handle different value types (boolean, array, string) + if (typeof value === 'boolean') { + if (value) { + cmdParts.push(flag); + executionArgs.push(flag); + } + } else if (Array.isArray(value)) { + for (const item of value) { + cmdParts.push(flag, escapeShellArg(item)); + executionArgs.push(flag, item); + } + } else { + cmdParts.push(flag, escapeShellArg(value)); + executionArgs.push(flag, value); } - } else { - cmdParts.push(flag, escapeShellArg(value)); - executionArgs.push(flag, value); } - } - const finalCommand = cmdParts.join(' '); + const finalCommand = cmdParts.join(' '); - logger.info(`\nGenerated command:\n${finalCommand}\n`); + logger.info(`\nGenerated command:\n${finalCommand}\n`); - // Step 5: Confirm and execute the generated command - if (await confirm({ message: 'Run now?', initialValue: true })) { - spawnSync(process.execPath, [process.argv[1], ...executionArgs], { - stdio: 'inherit', - }); - } + // Step 5: Confirm and execute the generated command + if (await confirm({ message: 'Run now?', initialValue: true })) { + spawnSync(process.execPath, [process.argv[1], ...executionArgs], { + stdio: 'inherit', + }); + } - outro('Done!'); -} + outro('Done!'); + }, +}; diff --git a/bin/utils.mjs b/bin/utils.mjs index 8d0df2eb..561d9098 100644 --- a/bin/utils.mjs +++ b/bin/utils.mjs @@ -1,35 +1,4 @@ -import createMarkdownLoader from '../src/loaders/markdown.mjs'; import logger from '../src/logger/index.mjs'; -import createMarkdownParser from '../src/parsers/markdown.mjs'; - -/** - * Generic lazy initializer. - * @template T - * @param {() => T} factory - Function to create the instance. - * @returns {() => T} - A function that returns the singleton instance. - */ -export const lazy = factory => { - let instance; - return args => (instance ??= factory(args)); -}; - -// Instantiate loader and parser once to reuse, -// but only if/when we actually need them. No need -// to create these objects just to load a different -// utility. -const loader = lazy(createMarkdownLoader); -const parser = lazy(createMarkdownParser); - -/** - * Load and parse markdown API docs. - * @param {string[]} input - Glob patterns for input files. - * @param {string[]} [ignore] - Glob patterns to ignore. - * @returns {Promise>>} - */ -export async function loadAndParse(input, ignore) { - const files = await loader().loadFiles(input, ignore); - return parser().parseApiDocs(files); -} /** * Wraps a function to catch both synchronous and asynchronous errors. @@ -47,26 +16,3 @@ export const errorWrap = process.exit(1); } }; - -/** - * Represents a command-line option for the CLI. - * @typedef {Object} Option - * @property {string[]} flags - Command-line flags, e.g., ['-i, --input ']. - * @property {string} desc - Description of the option. - * @property {Object} [prompt] - Optional prompt configuration. - * @property {'text'|'confirm'|'select'|'multiselect'} prompt.type - Type of the prompt. - * @property {string} prompt.message - Message displayed in the prompt. - * @property {boolean} [prompt.variadic] - Indicates if the prompt accepts multiple values. - * @property {boolean} [prompt.required] - Whether the prompt is required. - * @property {boolean} [prompt.initialValue] - Default value for confirm prompts. - * @property {{label: string, value: string}[]} [prompt.options] - Options for select/multiselect prompts. - */ - -/** - * Represents a command-line subcommand - * @typedef {Object} Command - * @property {{ [key: string]: Option }} options - * @property {string} name - * @property {string} description - * @property {Function} action - */ diff --git a/src/generators/ast-js/index.mjs b/src/generators/ast-js/index.mjs index 36d88a8e..8bdba115 100644 --- a/src/generators/ast-js/index.mjs +++ b/src/generators/ast-js/index.mjs @@ -29,30 +29,27 @@ export default { dependsOn: 'metadata', - processChunk: Object.assign( - /** - * Process a chunk of JavaScript files in a worker thread. - * Parses JS source files into AST representations. - * - * @param {string[]} inputSlice - Sliced input paths for this chunk - * @param {number[]} itemIndices - Indices into the sliced array - * @returns {Promise} Parsed JS AST objects for each file - */ - async (inputSlice, itemIndices) => { - const results = []; + /** + * Process a chunk of JavaScript files in a worker thread. + * Parses JS source files into AST representations. + * + * @param {string[]} inputSlice - Sliced input paths for this chunk + * @param {number[]} itemIndices - Indices into the sliced array + * @returns {Promise} Parsed JS AST objects for each file + */ + async processChunk(inputSlice, itemIndices) { + const results = []; - for (const idx of itemIndices) { - const [file] = loadFiles(inputSlice[idx]); + for (const idx of itemIndices) { + const [file] = loadFiles(inputSlice[idx]); - const parsedFile = await parseJsSource(file); + const parsedFile = await parseJsSource(file); - results.push(parsedFile); - } + results.push(parsedFile); + } - return results; - }, - { sliceInput: true } - ), + return results; + }, /** * Generates a JavaScript AST from the input files. diff --git a/src/generators/ast/index.mjs b/src/generators/ast/index.mjs index 67b3c6f0..8d39d450 100644 --- a/src/generators/ast/index.mjs +++ b/src/generators/ast/index.mjs @@ -21,6 +21,7 @@ const { updateStabilityPrefixToLink } = createQueries(); */ const parseMarkdownFile = async filePath => { const fileContents = await readFile(filePath, 'utf-8'); + const vfile = new VFile({ path: filePath, value: fileContents }); // Normalizes all the Stability Index prefixes with Markdown links @@ -47,30 +48,25 @@ export default { description: 'Parses Markdown API doc files into AST trees', - dependsOn: undefined, - - processChunk: Object.assign( - /** - * Process a chunk of markdown files in a worker thread. - * Loads and parses markdown files into AST representations. - * - * @param {string[]} inputSlice - Sliced input paths for this chunk - * @param {number[]} itemIndices - Indices into the sliced array - * @returns {Promise>>} - */ - async (inputSlice, itemIndices) => { - const results = []; - - for (const idx of itemIndices) { - const parsed = await parseMarkdownFile(inputSlice[idx]); - - results.push(parsed); - } - - return results; - }, - { sliceInput: true } - ), + /** + * Process a chunk of markdown files in a worker thread. + * Loads and parses markdown files into AST representations. + * + * @param {string[]} inputSlice - Sliced input paths for this chunk + * @param {number[]} itemIndices - Indices into the sliced array + * @returns {Promise>>} + */ + async processChunk(inputSlice, itemIndices) { + const results = []; + + for (const idx of itemIndices) { + const parsed = await parseMarkdownFile(inputSlice[idx]); + + results.push(parsed); + } + + return results; + }, /** * Generates AST trees from markdown input files. diff --git a/src/generators/jsx-ast/index.mjs b/src/generators/jsx-ast/index.mjs index 3a674ee6..1c9cd3a0 100644 --- a/src/generators/jsx-ast/index.mjs +++ b/src/generators/jsx-ast/index.mjs @@ -21,49 +21,45 @@ export default { dependsOn: 'metadata', - processChunk: Object.assign( - /** - * Process a chunk of items in a worker thread. - * Transforms metadata entries into JSX AST nodes. - * - * With sliceInput, each item is a SlicedModuleInput containing the head node - * and all entries for that module - no need to recompute grouping. - * - * @param {Array<{head: ApiDocMetadataEntry, entries: Array}>} slicedInput - Pre-sliced module data - * @param {number[]} itemIndices - Indices of items to process - * @param {object} options - Serializable options - * @param {Array<[string, string]>} options.docPages - Pre-computed doc pages for sidebar - * @param {Array} options.releases - Release information - * @param {import('semver').SemVer} options.version - Target Node.js version - * @returns {Promise>} JSX AST programs for each module - */ - async (slicedInput, itemIndices, { docPages, releases, version }) => { - const results = []; - - for (const idx of itemIndices) { - const { head, entries } = slicedInput[idx]; - - const sideBarProps = buildSideBarProps( - head, - releases, - version, - docPages - ); - - const content = await buildContent( - entries, - head, - sideBarProps, - remarkRecma - ); - - results.push(content); - } - - return results; - }, - { sliceInput: true } - ), + /** + * Process a chunk of items in a worker thread. + * Transforms metadata entries into JSX AST nodes. + * + * Each item is a SlicedModuleInput containing the head node + * and all entries for that module - no need to recompute grouping. + * + * @param {Array<{head: ApiDocMetadataEntry, entries: Array}>} slicedInput - Pre-sliced module data + * @param {number[]} itemIndices - Indices of items to process + * @param {object} options - Serializable options + * @param {Array<[string, string]>} options.docPages - Pre-computed doc pages for sidebar + * @param {Array} options.releases - Release information + * @param {import('semver').SemVer} options.version - Target Node.js version + * @returns {Promise>} JSX AST programs for each module + */ + async processChunk( + slicedInput, + itemIndices, + { docPages, releases, version } + ) { + const results = []; + + for (const idx of itemIndices) { + const { head, entries } = slicedInput[idx]; + + const sideBarProps = buildSideBarProps(head, releases, version, docPages); + + const content = await buildContent( + entries, + head, + sideBarProps, + remarkRecma + ); + + results.push(content); + } + + return results; + }, /** * Generates a JSX AST diff --git a/src/generators/legacy-html/index.mjs b/src/generators/legacy-html/index.mjs index cbbd7514..d87b5921 100644 --- a/src/generators/legacy-html/index.mjs +++ b/src/generators/legacy-html/index.mjs @@ -52,63 +52,59 @@ export default { dependsOn: 'metadata', - processChunk: Object.assign( - /** - * Process a chunk of items in a worker thread. - * Builds HTML template objects - FS operations happen in generate(). - * - * With sliceInput, each item is pre-grouped {head, nodes, headNodes} - no need to - * recompute groupNodesByModule for every chunk. - * - * @param {Array<{head: ApiDocMetadataEntry, nodes: ApiDocMetadataEntry[], headNodes: ApiDocMetadataEntry[]}>} slicedInput - Pre-sliced module data - * @param {number[]} itemIndices - Indices into the sliced array - * @param {{version: SemVer, parsedSideNav: string}} deps - Dependencies passed from generate() - * @returns {Promise} Template objects for each processed module - */ - async (slicedInput, itemIndices, { version, parsedSideNav }) => { - const results = []; - - for (const idx of itemIndices) { - const { head, nodes, headNodes } = slicedInput[idx]; - - const activeSideNav = String(parsedSideNav).replace( - `class="nav-${head.api}`, - `class="nav-${head.api} active` - ); - - const parsedToC = remarkRehypeProcessor.processSync( - tableOfContents(nodes, { - maxDepth: 4, - parser: tableOfContents.parseToCNode, - }) - ); - - const parsedContent = buildContent( - headNodes, - nodes, - remarkRehypeProcessor - ); - - const apiAsHeading = - head.api.charAt(0).toUpperCase() + head.api.slice(1); - - const template = { - api: head.api, - added: head.introduced_in ?? '', - section: head.heading.data.name || apiAsHeading, - version: `v${version.version}`, - toc: String(parsedToC), - nav: String(activeSideNav), - content: parsedContent, - }; - - results.push(template); - } + /** + * Process a chunk of items in a worker thread. + * Builds HTML template objects - FS operations happen in generate(). + * + * Each item is pre-grouped {head, nodes, headNodes} - no need to + * recompute groupNodesByModule for every chunk. + * + * @param {Array<{head: ApiDocMetadataEntry, nodes: ApiDocMetadataEntry[], headNodes: ApiDocMetadataEntry[]}>} slicedInput - Pre-sliced module data + * @param {number[]} itemIndices - Indices into the sliced array + * @param {{version: SemVer, parsedSideNav: string}} deps - Dependencies passed from generate() + * @returns {Promise} Template objects for each processed module + */ + async processChunk(slicedInput, itemIndices, { version, parsedSideNav }) { + const results = []; + + for (const idx of itemIndices) { + const { head, nodes, headNodes } = slicedInput[idx]; + + const activeSideNav = String(parsedSideNav).replace( + `class="nav-${head.api}`, + `class="nav-${head.api} active` + ); + + const parsedToC = remarkRehypeProcessor.processSync( + tableOfContents(nodes, { + maxDepth: 4, + parser: tableOfContents.parseToCNode, + }) + ); + + const parsedContent = buildContent( + headNodes, + nodes, + remarkRehypeProcessor + ); + + const apiAsHeading = head.api.charAt(0).toUpperCase() + head.api.slice(1); + + const template = { + api: head.api, + added: head.introduced_in ?? '', + section: head.heading.data.name || apiAsHeading, + version: `v${version.version}`, + toc: String(parsedToC), + nav: String(activeSideNav), + content: parsedContent, + }; + + results.push(template); + } - return results; - }, - { sliceInput: true } - ), + return results; + }, /** * Generates the legacy version of the API docs in HTML diff --git a/src/generators/legacy-json-all/index.mjs b/src/generators/legacy-json-all/index.mjs index 01aaf255..1e9c51cf 100644 --- a/src/generators/legacy-json-all/index.mjs +++ b/src/generators/legacy-json-all/index.mjs @@ -3,14 +3,6 @@ import { writeFile } from 'node:fs/promises'; import { join } from 'node:path'; -const PROPERTIES_TO_COPY = [ - 'miscs', - 'modules', - 'classes', - 'globals', - 'methods', -]; - /** * This generator consolidates data from the `legacy-json` generator into a single * JSON file (`all.json`). @@ -51,9 +43,14 @@ export default { methods: [], }; + /** + * The properties to copy from each section in the input + */ + const propertiesToCopy = Object.keys(generatedValue); + // Aggregate all sections into the output for (const section of input) { - for (const property of PROPERTIES_TO_COPY) { + for (const property of propertiesToCopy) { const items = section[property]; if (Array.isArray(items)) { diff --git a/src/generators/legacy-json/index.mjs b/src/generators/legacy-json/index.mjs index 326fcaef..80e9680f 100644 --- a/src/generators/legacy-json/index.mjs +++ b/src/generators/legacy-json/index.mjs @@ -30,31 +30,28 @@ export default { dependsOn: 'metadata', - processChunk: Object.assign( - /** - * Process a chunk of items in a worker thread. - * Builds JSON sections - FS operations happen in generate(). - * - * With sliceInput, each item is pre-grouped {head, nodes} - no need to - * recompute groupNodesByModule for every chunk. - * - * @param {Array<{head: ApiDocMetadataEntry, nodes: ApiDocMetadataEntry[]}>} slicedInput - Pre-sliced module data - * @param {number[]} itemIndices - Indices into the sliced array - * @returns {Promise} JSON sections for each processed module - */ - async (slicedInput, itemIndices) => { - const results = []; - - for (const idx of itemIndices) { - const { head, nodes } = slicedInput[idx]; - - results.push(buildSection(head, nodes)); - } + /** + * Process a chunk of items in a worker thread. + * Builds JSON sections - FS operations happen in generate(). + * + * Each item is pre-grouped {head, nodes} - no need to + * recompute groupNodesByModule for every chunk. + * + * @param {Array<{head: ApiDocMetadataEntry, nodes: ApiDocMetadataEntry[]}>} slicedInput - Pre-sliced module data + * @param {number[]} itemIndices - Indices into the sliced array + * @returns {Promise} JSON sections for each processed module + */ + async processChunk(slicedInput, itemIndices) { + const results = []; - return results; - }, - { sliceInput: true } - ), + for (const idx of itemIndices) { + const { head, nodes } = slicedInput[idx]; + + results.push(buildSection(head, nodes)); + } + + return results; + }, /** * Generates a legacy JSON file. diff --git a/src/generators/metadata/index.mjs b/src/generators/metadata/index.mjs index 4fcb6912..c026df70 100644 --- a/src/generators/metadata/index.mjs +++ b/src/generators/metadata/index.mjs @@ -27,18 +27,15 @@ export default { * @param {{typeMap: Record}} deps - Dependencies passed from generate() * @returns {Promise} Metadata entries for processed files */ - processChunk: Object.assign( - async (fullInput, itemIndices, { typeMap }) => { - const results = []; - - for (const idx of itemIndices) { - results.push(...parseApiDoc(fullInput[idx], typeMap)); - } - - return results; - }, - { sliceInput: true } // Only needs individual items, not full context - ), + async processChunk(fullInput, itemIndices, { typeMap }) { + const results = []; + + for (const idx of itemIndices) { + results.push(...parseApiDoc(fullInput[idx], typeMap)); + } + + return results; + }, /** * @param {Input} inputs diff --git a/src/generators/web/index.mjs b/src/generators/web/index.mjs index 6521b84b..cdae1185 100644 --- a/src/generators/web/index.mjs +++ b/src/generators/web/index.mjs @@ -70,6 +70,6 @@ export default { await writeFile(join(output, 'styles.css'), css, 'utf-8'); } - return results; + return results.map(({ html }) => ({ html, css })); }, }; diff --git a/src/loaders/markdown.mjs b/src/loaders/markdown.mjs deleted file mode 100644 index 5e92904c..00000000 --- a/src/loaders/markdown.mjs +++ /dev/null @@ -1,47 +0,0 @@ -'use strict'; - -import { readFile } from 'node:fs/promises'; -import { extname } from 'node:path'; - -import { globSync } from 'glob'; -import { VFile } from 'vfile'; - -/** - * This method creates a simple abstract "Loader", which technically - * could be used for different things, but here we want to use it to load - * Markdown files and transform them into VFiles - */ -const createLoader = () => { - /** - * Loads API Doc files and transforms it into VFiles - * - * @param {Array} searchPath A glob/path for API docs to be loaded - * @param {Array | undefined} [ignorePath] A glob/path of files to ignore - * The input string can be a simple path (relative or absolute) - * The input string can also be any allowed glob string - * - * @see https://code.visualstudio.com/docs/editor/glob-patterns - */ - const loadFiles = async (searchPath, ignorePath) => { - const ignoredFiles = ignorePath - ? globSync(ignorePath).filter(filePath => extname(filePath) === '.md') - : []; - - const resolvedFiles = globSync(searchPath).filter( - filePath => - extname(filePath) === '.md' && !ignoredFiles.includes(filePath) - ); - - return Promise.all( - resolvedFiles.map(async filePath => { - const fileContents = await readFile(filePath, 'utf-8'); - - return new VFile({ path: filePath, value: fileContents }); - }) - ); - }; - - return { loadFiles }; -}; - -export default createLoader; diff --git a/src/parsers/markdown.mjs b/src/parsers/markdown.mjs index cce3618b..318df30f 100644 --- a/src/parsers/markdown.mjs +++ b/src/parsers/markdown.mjs @@ -3,8 +3,6 @@ import { coerce } from 'semver'; import { loadFromURL } from '../utils/parser.mjs'; -import createQueries from '../utils/queries/index.mjs'; -import { getRemark } from '../utils/remark.mjs'; // A ReGeX for retrieving Node.js version headers from the CHANGELOG.md const NODE_VERSIONS_REGEX = /\* \[Node\.js ([0-9.]+)\]\S+ (.*)\r?\n/g; @@ -15,57 +13,6 @@ const LIST_ITEM_REGEX = /\* \[(.*?)\]\((.*?)\.md\)/g; // A ReGeX for checking if a Node.js version is an LTS release const NODE_LTS_VERSION_REGEX = /Long Term Support/i; -/** - * Creates an API doc parser for a given Markdown API doc file - */ -const createParser = () => { - // Creates an instance of the Remark processor with GFM support - const remarkProcessor = getRemark(); - - const { updateStabilityPrefixToLink } = createQueries(); - - /** - * Parses a given API doc file into a AST tree - * - * @param {import('vfile').VFile | Promise} apiDoc - * @returns {Promise>} - */ - const parseApiDoc = async apiDoc => { - // We allow the API doc VFile to be a Promise of a VFile also, - // hence we want to ensure that it first resolves before we pass it to the parser - const resolvedApiDoc = await Promise.resolve(apiDoc); - - // Normalizes all the Stability Index prefixes with Markdown links - updateStabilityPrefixToLink(resolvedApiDoc); - - // Parses the API doc into an AST tree using `unified` and `remark` - const apiDocTree = remarkProcessor.parse(resolvedApiDoc); - - return { - file: { - stem: resolvedApiDoc.stem, - basename: resolvedApiDoc.basename, - }, - tree: apiDocTree, - }; - }; - - /** - * This method allows to parse multiple API doc files at once - * and it simply wraps parseApiDoc with the given API docs - * - * @param {Array>} apiDocs List of API doc files to be parsed - * @returns {Promise>>} - */ - const parseApiDocs = async apiDocs => { - // We do a Promise.all, to ensure that each API doc is resolved asynchronously - // but all need to be resolved first before we return the result to the caller - return Promise.all(apiDocs.map(parseApiDoc)); - }; - - return { parseApiDocs, parseApiDoc }; -}; - /** * Retrieves all Node.js major versions from the provided CHANGELOG.md file * and returns an array of objects containing the version and LTS status. @@ -96,5 +43,3 @@ export const parseIndex = async path => { return items.map(([, section, api]) => ({ section, api })); }; - -export default createParser; diff --git a/src/threading/chunk-worker.mjs b/src/threading/chunk-worker.mjs index 4074d7b5..f46bc005 100644 --- a/src/threading/chunk-worker.mjs +++ b/src/threading/chunk-worker.mjs @@ -12,13 +12,8 @@ import { allGenerators } from '../generators/index.mjs'; * }} opts - Task options from Piscina * @returns {Promise} The processed result */ -export default async function processChunk({ - generatorName, - fullInput, - itemIndices, - options, -}) { +export default async ({ generatorName, fullInput, itemIndices, options }) => { const generator = allGenerators[generatorName]; return generator.processChunk(fullInput, itemIndices, options); -} +}; diff --git a/src/threading/parallel.mjs b/src/threading/parallel.mjs index 41706b1c..71491671 100644 --- a/src/threading/parallel.mjs +++ b/src/threading/parallel.mjs @@ -1,6 +1,5 @@ 'use strict'; -import { allGenerators } from '../generators/index.mjs'; import logger from '../logger/index.mjs'; const parallelLogger = logger.child('parallel'); @@ -25,23 +24,20 @@ const createChunks = (count, size) => { }; /** - * Prepares task data for a chunk, handling sliceInput optimization. + * Prepares task data for a chunk, slicing input to only include relevant items. * - * @param {object} generator - Generator with processChunk method * @param {unknown[]} fullInput - Full input array * @param {number[]} indices - Indices to process * @param {object} options - Serialized options * @param {string} generatorName - Name of the generator * @returns {object} Task data for Piscina */ -const createTask = (generator, fullInput, indices, options, generatorName) => ({ +const createTask = (fullInput, indices, options, generatorName) => ({ generatorName, - fullInput: generator.processChunk.sliceInput - ? indices.map(i => fullInput[i]) - : fullInput, - itemIndices: generator.processChunk.sliceInput - ? indices.map((_, i) => i) - : indices, + // Only send the items needed for this chunk (reduces serialization overhead) + fullInput: indices.map(i => fullInput[i]), + // Remap indices to 0-based for the sliced array + itemIndices: indices.map((_, i) => i), options, }); @@ -55,7 +51,6 @@ const createTask = (generator, fullInput, indices, options, generatorName) => ({ */ export default function createParallelWorker(generatorName, pool, options) { const { threads, chunkSize } = options; - const generator = allGenerators[generatorName]; /** @param {object} extra */ const serializeOptions = extra => { @@ -94,7 +89,7 @@ export default function createParallelWorker(generatorName, pool, options) { const pending = new Set( chunks.map(indices => { const promise = pool - .run(createTask(generator, fullInput, indices, opts, generatorName)) + .run(createTask(fullInput, indices, opts, generatorName)) .then(result => ({ promise, result })); return promise; From 3a705777020f1e0a86328fd674f1c9ece2d8316f Mon Sep 17 00:00:00 2001 From: Claudio Wunder Date: Tue, 9 Dec 2025 16:05:27 +0100 Subject: [PATCH 16/25] chore: minor improvements --- src/generators/ast-js/index.mjs | 13 +++------- src/generators/ast/index.mjs | 42 +++++++++---------------------- src/loaders/markdown.mjs | 44 +++++++++++++++++++++++++++++++++ 3 files changed, 59 insertions(+), 40 deletions(-) create mode 100644 src/loaders/markdown.mjs diff --git a/src/generators/ast-js/index.mjs b/src/generators/ast-js/index.mjs index 8bdba115..ee6825d8 100644 --- a/src/generators/ast-js/index.mjs +++ b/src/generators/ast-js/index.mjs @@ -6,6 +6,7 @@ import createJsLoader from '../../loaders/javascript.mjs'; import createJsParser from '../../parsers/javascript.mjs'; const { loadFiles } = createJsLoader(); + const { parseJsSource } = createJsParser(); /** @@ -38,17 +39,11 @@ export default { * @returns {Promise} Parsed JS AST objects for each file */ async processChunk(inputSlice, itemIndices) { - const results = []; - - for (const idx of itemIndices) { - const [file] = loadFiles(inputSlice[idx]); + const filePaths = itemIndices.map(idx => inputSlice[idx]); - const parsedFile = await parseJsSource(file); - - results.push(parsedFile); - } + const vfiles = await Promise.all(loadFiles(filePaths)); - return results; + return Promise.all(vfiles.map(parseJsSource)); }, /** diff --git a/src/generators/ast/index.mjs b/src/generators/ast/index.mjs index 8d39d450..f429cf41 100644 --- a/src/generators/ast/index.mjs +++ b/src/generators/ast/index.mjs @@ -1,37 +1,15 @@ 'use strict'; -import { readFile } from 'node:fs/promises'; import { extname } from 'node:path'; import { globSync } from 'glob'; -import { VFile } from 'vfile'; -import createQueries from '../../utils/queries/index.mjs'; +import createLoader from '../../loaders/markdown.mjs'; import { getRemark } from '../../utils/remark.mjs'; -const remarkProcessor = getRemark(); - -const { updateStabilityPrefixToLink } = createQueries(); - -/** - * Parses a single markdown file into an AST. - * - * @param {string} filePath - Path to the markdown file - * @returns {Promise>} - */ -const parseMarkdownFile = async filePath => { - const fileContents = await readFile(filePath, 'utf-8'); - - const vfile = new VFile({ path: filePath, value: fileContents }); +const { loadFiles } = createLoader(); - // Normalizes all the Stability Index prefixes with Markdown links - updateStabilityPrefixToLink(vfile); - - // Parses the API doc into an AST tree using `unified` and `remark` - const tree = remarkProcessor.parse(vfile); - - return { file: { stem: vfile.stem, basename: vfile.basename }, tree }; -}; +const remarkProcessor = getRemark(); /** * This generator parses Markdown API doc files into AST trees. @@ -57,15 +35,17 @@ export default { * @returns {Promise>>} */ async processChunk(inputSlice, itemIndices) { - const results = []; + const filePaths = itemIndices.map(idx => inputSlice[idx]); - for (const idx of itemIndices) { - const parsed = await parseMarkdownFile(inputSlice[idx]); + const vfiles = await Promise.all(loadFiles(filePaths)); - results.push(parsed); - } + return vfiles.map(vfile => { + const tree = remarkProcessor.parse(vfile); + + const minimalVfile = { stem: vfile.stem, basename: vfile.basename }; - return results; + return { file: minimalVfile, tree }; + }); }, /** diff --git a/src/loaders/markdown.mjs b/src/loaders/markdown.mjs new file mode 100644 index 00000000..26dae7c6 --- /dev/null +++ b/src/loaders/markdown.mjs @@ -0,0 +1,44 @@ +'use strict'; + +import { readFile } from 'node:fs/promises'; +import { extname } from 'node:path'; + +import { globSync } from 'glob'; +import { VFile } from 'vfile'; + +import createQueries from '../utils/queries/index.mjs'; + +const { updateStabilityPrefixToLink } = createQueries(); + +/** + * This creates a "loader" for loading Markdown API doc files into VFiles. + */ +const createLoader = () => { + /** + * Loads Markdown source files and transforms them into VFiles. + * Applies stability index normalization during load. + * + * @param {string | string[]} searchPath - Glob pattern(s) or file paths + * @returns {Promise[]} Array of promises resolving to VFiles + */ + const loadFiles = searchPath => { + const resolvedFiles = globSync(searchPath).filter( + filePath => extname(filePath) === '.md' + ); + + return resolvedFiles.map(async filePath => { + const fileContents = await readFile(filePath, 'utf-8'); + + const vfile = new VFile({ path: filePath, value: fileContents }); + + // Normalizes all the Stability Index prefixes with Markdown links + updateStabilityPrefixToLink(vfile); + + return vfile; + }); + }; + + return { loadFiles }; +}; + +export default createLoader; From 9f7148e490cf40924beef6830b1fc9668b4db2c6 Mon Sep 17 00:00:00 2001 From: Claudio Wunder Date: Wed, 10 Dec 2025 01:25:35 +0100 Subject: [PATCH 17/25] chore: code review changes --- .github/workflows/generate.yml | 4 +++- bin/cli.mjs | 24 ++++++++---------------- 2 files changed, 11 insertions(+), 17 deletions(-) diff --git a/.github/workflows/generate.yml b/.github/workflows/generate.yml index 44af8650..b63b7d38 100644 --- a/.github/workflows/generate.yml +++ b/.github/workflows/generate.yml @@ -79,7 +79,9 @@ jobs: -t ${{ matrix.target }} \ -i "${{ matrix.input }}" \ -o "out/${{ matrix.target }}" \ - --index ./node/doc/api/index.md + -c ./node/CHANGELOG.md \ + --index ./node/doc/api/index.md \ + --log-level debug - name: Upload ${{ matrix.target }} artifacts uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0 diff --git a/bin/cli.mjs b/bin/cli.mjs index 4a1c4769..d53e9536 100755 --- a/bin/cli.mjs +++ b/bin/cli.mjs @@ -6,25 +6,17 @@ import { Command, Option } from 'commander'; import commands from './commands/index.mjs'; import { errorWrap } from './utils.mjs'; -import logger from '../src/logger/index.mjs'; +import logger, { LogLevel } from '../src/logger/index.mjs'; + +const logLevelOption = new Option('--log-level ', 'Log level') + .choices(Object.keys(LogLevel)) + .default('info'); const program = new Command() .name('@nodejs/doc-kit') - .description('CLI tool to generate the Node.js API documentation'); - -// Add global log level option -program.addOption( - new Option('--log-level ', 'Log level') - .choices(['debug', 'info', 'warn', 'error', 'fatal']) - .default('info') -); - -// Set log level before any command runs -program.hook('preAction', thisCommand => { - const { logLevel } = thisCommand.opts(); - - logger.setLogLevel(logLevel); -}); + .description('CLI tool to generate the Node.js API documentation') + .addOption(logLevelOption) + .hook('preAction', cmd => logger.setLogLevel(cmd.opts().logLevel)); // Registering commands commands.forEach(({ name, description, options, action }) => { From bb7bc556a31f55b331c0adb062601131b73d29d3 Mon Sep 17 00:00:00 2001 From: Claudio Wunder Date: Wed, 10 Dec 2025 01:32:04 +0100 Subject: [PATCH 18/25] chore: parse checkout main dir --- .github/workflows/generate.yml | 1 + scripts/vercel-prepare.sh | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/generate.yml b/.github/workflows/generate.yml index b63b7d38..30765597 100644 --- a/.github/workflows/generate.yml +++ b/.github/workflows/generate.yml @@ -59,6 +59,7 @@ jobs: sparse-checkout: | doc/api lib + . path: node - name: Setup Node.js diff --git a/scripts/vercel-prepare.sh b/scripts/vercel-prepare.sh index a4ae5b15..80b40220 100755 --- a/scripts/vercel-prepare.sh +++ b/scripts/vercel-prepare.sh @@ -5,7 +5,9 @@ git clone --depth 1 --filter=blob:none --sparse https://github.com/nodejs/node.g cd node # Enable sparse checkout and specify the folder -git sparse-checkout set doc/ +git sparse-checkout set lib +git sparse-checkout set doc/api +git sparse-checkout set . # Move back out cd .. From 1da6a67b661b4d53505e420f3a812fbd6abc5be7 Mon Sep 17 00:00:00 2001 From: Claudio Wunder Date: Wed, 10 Dec 2025 01:37:31 +0100 Subject: [PATCH 19/25] chore: misconfig of sparse-checkout --- scripts/vercel-prepare.sh | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/scripts/vercel-prepare.sh b/scripts/vercel-prepare.sh index 80b40220..31b29afe 100755 --- a/scripts/vercel-prepare.sh +++ b/scripts/vercel-prepare.sh @@ -5,9 +5,7 @@ git clone --depth 1 --filter=blob:none --sparse https://github.com/nodejs/node.g cd node # Enable sparse checkout and specify the folder -git sparse-checkout set lib -git sparse-checkout set doc/api -git sparse-checkout set . +git sparse-checkout set lib doc/api . # Move back out cd .. From e34f773433fc576f6ce3c11d8da2bcc0f027b773 Mon Sep 17 00:00:00 2001 From: Claudio Wunder Date: Wed, 10 Dec 2025 01:50:26 +0100 Subject: [PATCH 20/25] chore: ast-js doesn't need dependency --- src/generators/ast-js/index.mjs | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/generators/ast-js/index.mjs b/src/generators/ast-js/index.mjs index ee6825d8..95b4f460 100644 --- a/src/generators/ast-js/index.mjs +++ b/src/generators/ast-js/index.mjs @@ -28,8 +28,6 @@ export default { description: 'Parses Javascript source files passed into the input.', - dependsOn: 'metadata', - /** * Process a chunk of JavaScript files in a worker thread. * Parses JS source files into AST representations. From 9d590066501118d39afe1d949803eb30c087e22a Mon Sep 17 00:00:00 2001 From: Claudio Wunder Date: Wed, 10 Dec 2025 01:53:17 +0100 Subject: [PATCH 21/25] chore: other minor var renames --- src/generators/jsx-ast/index.mjs | 12 ++++++------ src/generators/llms-txt/index.mjs | 7 +++---- src/generators/orama-db/index.mjs | 2 ++ src/generators/web/index.mjs | 6 +++--- 4 files changed, 14 insertions(+), 13 deletions(-) diff --git a/src/generators/jsx-ast/index.mjs b/src/generators/jsx-ast/index.mjs index 1c9cd3a0..8e0e42bb 100644 --- a/src/generators/jsx-ast/index.mjs +++ b/src/generators/jsx-ast/index.mjs @@ -64,13 +64,13 @@ export default { /** * Generates a JSX AST * - * @param {Input} entries + * @param {Input} input * @param {Partial} options * @returns {AsyncGenerator>} */ - async *generate(entries, { index, releases, version, worker }) { - const groupedModules = groupNodesByModule(entries); - const headNodes = getSortedHeadNodes(entries); + async *generate(input, { index, releases, version, worker }) { + const groupedModules = groupNodesByModule(input); + const headNodes = getSortedHeadNodes(input); // Pre-compute docPages once in main thread const docPages = index @@ -79,14 +79,14 @@ export default { // Create sliced input: each item contains head + its module's entries // This avoids sending all 4700+ entries to every worker - const input = headNodes.map(head => ({ + const entries = headNodes.map(head => ({ head, entries: groupedModules.get(head.api), })); const deps = { docPages, releases, version }; - for await (const chunkResult of worker.stream(input, input, deps)) { + for await (const chunkResult of worker.stream(entries, entries, deps)) { yield chunkResult; } }, diff --git a/src/generators/llms-txt/index.mjs b/src/generators/llms-txt/index.mjs index b80c2348..5806ca24 100644 --- a/src/generators/llms-txt/index.mjs +++ b/src/generators/llms-txt/index.mjs @@ -24,18 +24,17 @@ export default { /** * Generates a llms.txt file * - * @param {Input} entries + * @param {Input} input * @param {Partial} options * @returns {Promise} */ - async generate(entries, { output }) { + async generate(input, { output }) { const template = await readFile( join(import.meta.dirname, 'template.txt'), 'utf-8' ); - const apiDocsLinks = entries - // Filter non top-level headings + const apiDocsLinks = input .filter(entry => entry.heading.depth === 1) .map(entry => `- ${buildApiDocLink(entry)}`) .join('\n'); diff --git a/src/generators/orama-db/index.mjs b/src/generators/orama-db/index.mjs index 6d6b047a..7d59f678 100644 --- a/src/generators/orama-db/index.mjs +++ b/src/generators/orama-db/index.mjs @@ -44,7 +44,9 @@ export function buildHierarchicalTitle(headings, currentIndex) { */ export default { name: 'orama-db', + version: '1.0.0', + description: 'Generates the Orama database for the API docs.', dependsOn: 'metadata', diff --git a/src/generators/web/index.mjs b/src/generators/web/index.mjs index cdae1185..98eceb0a 100644 --- a/src/generators/web/index.mjs +++ b/src/generators/web/index.mjs @@ -27,13 +27,13 @@ export default { /** * Main generation function that processes JSX AST entries into web bundles. * - * @param {import('../jsx-ast/utils/buildContent.mjs').JSXContent[]} entries - JSX AST entries to process. + * @param {import('../jsx-ast/utils/buildContent.mjs').JSXContent[]} input - JSX AST entries to process. * @param {Partial} options - Generator options. * @param {string} [options.output] - Output directory for generated files. * @param {string} options.version - Documentation version string. * @returns {Promise} */ - async generate(entries, { output, version }) { + async generate(input, { output, version }) { const template = await readFile( new URL('template.html', import.meta.url), 'utf-8' @@ -47,7 +47,7 @@ export default { // Process all entries: convert JSX to HTML/CSS/JS const { results, css, chunks } = await processJSXEntries( - entries, + input, template, astBuilders, requireFn, From 57c0d73a3a33924e989a6ade69bca43e5b5b915f Mon Sep 17 00:00:00 2001 From: Claudio Wunder Date: Wed, 10 Dec 2025 16:22:29 +0100 Subject: [PATCH 22/25] chore: code review --- bin/cli.mjs | 3 +- bin/commands/generate.mjs | 49 ++++++++----------- bin/commands/types.d.ts | 25 ++++++++++ src/generators/ast-js/index.mjs | 21 ++++++-- src/generators/ast/index.mjs | 24 +++++---- src/generators/jsx-ast/index.mjs | 12 ++--- src/generators/legacy-html-all/index.mjs | 12 +---- src/generators/legacy-html/index.mjs | 49 +++++++------------ src/generators/legacy-html/types.d.ts | 9 ++++ .../legacy-html/utils/buildDropdowns.mjs | 15 ++---- .../utils/replaceTemplateValues.mjs | 42 +++++----------- src/generators/legacy-json-all/index.mjs | 5 +- src/generators/legacy-json/index.mjs | 13 ++--- src/generators/man-page/index.mjs | 1 + src/generators/metadata/index.mjs | 11 +++-- src/generators/types.d.ts | 36 ++++++++------ src/generators/web/index.mjs | 16 +++--- src/logger/index.mjs | 3 -- src/parsers/json.mjs | 19 +++++++ src/parsers/markdown.mjs | 4 ++ src/streaming.mjs | 19 ------- src/threading/chunk-worker.mjs | 11 ++--- src/threading/index.mjs | 4 +- src/threading/parallel.mjs | 21 ++++++-- src/types.d.ts | 5 +- 25 files changed, 222 insertions(+), 207 deletions(-) create mode 100644 bin/commands/types.d.ts create mode 100644 src/generators/legacy-html/types.d.ts create mode 100644 src/parsers/json.mjs diff --git a/bin/cli.mjs b/bin/cli.mjs index d53e9536..8100125f 100755 --- a/bin/cli.mjs +++ b/bin/cli.mjs @@ -6,7 +6,8 @@ import { Command, Option } from 'commander'; import commands from './commands/index.mjs'; import { errorWrap } from './utils.mjs'; -import logger, { LogLevel } from '../src/logger/index.mjs'; +import { LogLevel } from '../src/logger/constants.mjs'; +import logger from '../src/logger/index.mjs'; const logLevelOption = new Option('--log-level ', 'Log level') .choices(Object.keys(LogLevel)) diff --git a/bin/commands/generate.mjs b/bin/commands/generate.mjs index 9ff34a75..2ba3e8cd 100644 --- a/bin/commands/generate.mjs +++ b/bin/commands/generate.mjs @@ -7,28 +7,14 @@ import { NODE_CHANGELOG_URL, NODE_VERSION } from '../../src/constants.mjs'; import { publicGenerators } from '../../src/generators/index.mjs'; import createGenerator from '../../src/generators.mjs'; import logger from '../../src/logger/index.mjs'; +import { parseTypeMap } from '../../src/parsers/json.mjs'; import { parseChangelog, parseIndex } from '../../src/parsers/markdown.mjs'; import { DEFAULT_TYPE_MAP } from '../../src/utils/parser/constants.mjs'; -import { loadFromURL } from '../../src/utils/parser.mjs'; const availableGenerators = Object.keys(publicGenerators); /** - * @typedef {{ - * input: Array | string; - * ignore?: Array | string; - * target: Array; - * version: string; - * changelog: string; - * typeMap: string; - * gitRef?: string; - * threads?: number; - * chunkSize?: number; - * }} Options - */ - -/** - * @type {import('../utils.mjs').Command} + * @type {import('./types').Command} */ export default { description: 'Generate API docs', @@ -60,7 +46,7 @@ export default { }, threads: { flags: ['-p', '--threads '], - desc: 'Number of worker threads to use (minimum: 2)', + desc: 'Number of threads to use (minimum: 1)', prompt: { type: 'text', message: 'How many threads to allow', @@ -134,7 +120,20 @@ export default { }, }, }, + /** + * @typedef {Object} Options + * @property {Array|string} input - Specifies the glob/path for input files. + * @property {Array|string} [ignore] - Specifies the glob/path for ignoring files. + * @property {Array} target - Specifies the generator target mode. + * @property {string} version - Specifies the target Node.js version. + * @property {string} changelog - Specifies the path to the Node.js CHANGELOG.md file. + * @property {string} typeMap - Specifies the path to the Node.js Type Map. + * @property {string} index - Specifies the path to the index document. + * @property {string} [gitRef] - Git ref/commit URL. + * @property {number} [threads] - Number of threads to allow. + * @property {number} [chunkSize] - Number of items to process per worker thread. + * * Handles the action for generating API docs * @param {Options} opts - The options to generate API docs. * @returns {Promise} @@ -144,12 +143,6 @@ export default { const { runGenerators } = createGenerator(); - const releases = await parseChangelog(opts.changelog); - - const typeMap = JSON.parse(await loadFromURL(opts.typeMap)); - - const index = opts.index && (await parseIndex(opts.index)); - logger.debug('Starting generation', { targets: opts.target }); await runGenerators({ @@ -157,12 +150,12 @@ export default { input: opts.input, output: opts.output && resolve(opts.output), version: coerce(opts.version), - releases, + releases: await parseChangelog(opts.changelog), gitRef: opts.gitRef, - threads: Math.max(parseInt(opts.threads, 10), 2), - chunkSize: parseInt(opts.chunkSize, 10), - index, - typeMap, + threads: Math.max(parseInt(opts.threads, 10), 1), + chunkSize: Math.max(parseInt(opts.chunkSize, 10), 1), + index: await parseIndex(opts.index), + typeMap: await parseTypeMap(opts.typeMap), }); }, }; diff --git a/bin/commands/types.d.ts b/bin/commands/types.d.ts new file mode 100644 index 00000000..e7ce0d7c --- /dev/null +++ b/bin/commands/types.d.ts @@ -0,0 +1,25 @@ +/** + * Represents a command-line option for the CLI. + */ +export interface Option { + flags: string[]; + desc: string; + prompt?: { + type: 'text' | 'confirm' | 'select' | 'multiselect'; + message: string; + variadic?: boolean; + required?: boolean; + initialValue?: boolean; + options?: { label: string; value: string }[]; + }; +} + +/** + * Represents a command-line subcommand + */ +export interface Command { + options: { [key: string]: Option }; + name: string; + description: string; + action: Function; +} diff --git a/src/generators/ast-js/index.mjs b/src/generators/ast-js/index.mjs index 95b4f460..6900adee 100644 --- a/src/generators/ast-js/index.mjs +++ b/src/generators/ast-js/index.mjs @@ -18,8 +18,9 @@ const { parseJsSource } = createJsParser(); * so we're only parsing the Javascript sources when we need to. * * @typedef {unknown} Input + * @typedef {Array} Output * - * @type {GeneratorMetadata>} + * @type {GeneratorMetadata} */ export default { name: 'ast-js', @@ -34,14 +35,24 @@ export default { * * @param {string[]} inputSlice - Sliced input paths for this chunk * @param {number[]} itemIndices - Indices into the sliced array - * @returns {Promise} Parsed JS AST objects for each file + * @returns {Promise} Parsed JS AST objects for each file */ async processChunk(inputSlice, itemIndices) { const filePaths = itemIndices.map(idx => inputSlice[idx]); - const vfiles = await Promise.all(loadFiles(filePaths)); + const vfilesPromises = loadFiles(filePaths); - return Promise.all(vfiles.map(parseJsSource)); + const results = []; + + for (const vfilePromise of vfilesPromises) { + const vfile = await vfilePromise; + + const parsed = await parseJsSource(vfile); + + results.push(parsed); + } + + return results; }, /** @@ -49,7 +60,7 @@ export default { * * @param {Input} _ - Unused (files loaded from input paths) * @param {Partial} options - * @returns {AsyncGenerator>} + * @returns {AsyncGenerator} */ async *generate(_, { input = [], worker }) { const source = globSync(input).filter(path => extname(path) === '.js'); diff --git a/src/generators/ast/index.mjs b/src/generators/ast/index.mjs index f429cf41..ad917575 100644 --- a/src/generators/ast/index.mjs +++ b/src/generators/ast/index.mjs @@ -16,8 +16,9 @@ const remarkProcessor = getRemark(); * It parallelizes the parsing across worker threads for better performance. * * @typedef {undefined} Input + * @typedef {Array>} Output * - * @type {GeneratorMetadata>>} + * @type {GeneratorMetadata} */ export default { name: 'ast', @@ -32,20 +33,25 @@ export default { * * @param {string[]} inputSlice - Sliced input paths for this chunk * @param {number[]} itemIndices - Indices into the sliced array - * @returns {Promise>>} + * @returns {Promise} */ async processChunk(inputSlice, itemIndices) { const filePaths = itemIndices.map(idx => inputSlice[idx]); - const vfiles = await Promise.all(loadFiles(filePaths)); + const vfilesPromises = loadFiles(filePaths); - return vfiles.map(vfile => { - const tree = remarkProcessor.parse(vfile); + const results = []; - const minimalVfile = { stem: vfile.stem, basename: vfile.basename }; + for (const vfilePromise of vfilesPromises) { + const vfile = await vfilePromise; - return { file: minimalVfile, tree }; - }); + results.push({ + tree: remarkProcessor.parse(vfile), + file: { stem: vfile.stem, basename: vfile.basename }, + }); + } + + return results; }, /** @@ -53,7 +59,7 @@ export default { * * @param {Input} _ - Unused (top-level generator) * @param {Partial} options - * @returns {AsyncGenerator>>} + * @returns {AsyncGenerator} */ async *generate(_, { input = [], worker }) { const files = globSync(input).filter(path => extname(path) === '.md'); diff --git a/src/generators/jsx-ast/index.mjs b/src/generators/jsx-ast/index.mjs index 8e0e42bb..2402b571 100644 --- a/src/generators/jsx-ast/index.mjs +++ b/src/generators/jsx-ast/index.mjs @@ -10,7 +10,9 @@ const remarkRecma = getRemarkRecma(); * Generator for converting MDAST to JSX AST. * * @typedef {Array} Input - * @type {GeneratorMetadata} + * @typedef {Array} Output + * + * @type {GeneratorMetadata} */ export default { name: 'jsx-ast', @@ -30,11 +32,8 @@ export default { * * @param {Array<{head: ApiDocMetadataEntry, entries: Array}>} slicedInput - Pre-sliced module data * @param {number[]} itemIndices - Indices of items to process - * @param {object} options - Serializable options - * @param {Array<[string, string]>} options.docPages - Pre-computed doc pages for sidebar - * @param {Array} options.releases - Release information - * @param {import('semver').SemVer} options.version - Target Node.js version - * @returns {Promise>} JSX AST programs for each module + * @param {{ docPages: Array<[string, string]>, releases: Array, version: import('semver').SemVer }} options - Serializable options + * @returns {Promise} JSX AST programs for each module */ async processChunk( slicedInput, @@ -66,7 +65,6 @@ export default { * * @param {Input} input * @param {Partial} options - * @returns {AsyncGenerator>} */ async *generate(input, { index, releases, version, worker }) { const groupedModules = groupNodesByModule(input); diff --git a/src/generators/legacy-html-all/index.mjs b/src/generators/legacy-html-all/index.mjs index 334e6555..8bcb5ca0 100644 --- a/src/generators/legacy-html-all/index.mjs +++ b/src/generators/legacy-html-all/index.mjs @@ -10,23 +10,13 @@ import { replaceTemplateValues } from '../legacy-html/utils/replaceTemplateValue import tableOfContents from '../legacy-html/utils/tableOfContents.mjs'; /** - * @typedef {{ - * api: string; - * added: string; - * section: string; - * version: string; - * toc: string; - * nav: string; - * content: string; - * }} TemplateValues - * * This generator generates the legacy HTML pages of the legacy API docs * for retro-compatibility and while we are implementing the new 'react' and 'html' generators. * * This generator is a top-level generator, and it takes the raw AST tree of the API doc files * and generates the HTML files to the specified output directory from the configuration settings * - * @typedef {Array} Input + * @typedef {Array} Input * * @type {GeneratorMetadata} */ diff --git a/src/generators/legacy-html/index.mjs b/src/generators/legacy-html/index.mjs index d87b5921..08b9674a 100644 --- a/src/generators/legacy-html/index.mjs +++ b/src/generators/legacy-html/index.mjs @@ -22,15 +22,6 @@ const getHeading = name => ({ data: { depth: 1, name } }); const remarkRehypeProcessor = getRemarkRehypeWithShiki(); /** - * @typedef {{ - * api: string; - * added: string; - * section: string; - * version: string; - * toc: string; - * nav: string; - * content: string; - * }} TemplateValues * * This generator generates the legacy HTML pages of the legacy API docs * for retro-compatibility and while we are implementing the new 'react' and 'html' generators. @@ -39,8 +30,9 @@ const remarkRehypeProcessor = getRemarkRehypeWithShiki(); * and generates the HTML files to the specified output directory from the configuration settings * * @typedef {Array} Input + * @typedef {Array} Output * - * @type {GeneratorMetadata>} + * @type {GeneratorMetadata} */ export default { name: 'legacy-html', @@ -59,10 +51,10 @@ export default { * Each item is pre-grouped {head, nodes, headNodes} - no need to * recompute groupNodesByModule for every chunk. * - * @param {Array<{head: ApiDocMetadataEntry, nodes: ApiDocMetadataEntry[], headNodes: ApiDocMetadataEntry[]}>} slicedInput - Pre-sliced module data + * @param {Array<{ head: ApiDocMetadataEntry, nodes: Array, headNodes: Array }> } slicedInput - Pre-sliced module data * @param {number[]} itemIndices - Indices into the sliced array - * @param {{version: SemVer, parsedSideNav: string}} deps - Dependencies passed from generate() - * @returns {Promise} Template objects for each processed module + * @param {{ version: SemVer, parsedSideNav: string }} deps - Dependencies passed from generate() + * @returns {Promise} Template objects for each processed module */ async processChunk(slicedInput, itemIndices, { version, parsedSideNav }) { const results = []; @@ -110,7 +102,7 @@ export default { * Generates the legacy version of the API docs in HTML * @param {Input} input * @param {Partial} options - * @returns {AsyncGenerator>} + * @returns {AsyncGenerator} */ async *generate(input, { index, releases, version, output, worker }) { const baseDir = import.meta.dirname; @@ -134,19 +126,6 @@ export default { }) ); - // Create sliced input: each item contains head + its module's entries + headNodes reference - // This avoids sending all ~4900 entries to every worker and recomputing groupings - const slicedInput = headNodes.map(head => ({ - head, - nodes: groupedModules.get(head.api), - headNodes, - })); - - const deps = { - version, - parsedSideNav: String(parsedSideNav), - }; - if (output) { // Define the source folder for API docs assets const srcAssets = join(baseDir, 'assets'); @@ -161,12 +140,18 @@ export default { await safeCopy(srcAssets, assetsFolder); } + // Create sliced input: each item contains head + its module's entries + headNodes reference + // This avoids sending all ~4900 entries to every worker and recomputing groupings + const entries = headNodes.map(head => ({ + head, + nodes: groupedModules.get(head.api), + headNodes, + })); + + const deps = { version, parsedSideNav: String(parsedSideNav) }; + // Stream chunks as they complete - HTML files are written immediately - for await (const chunkResult of worker.stream( - slicedInput, - slicedInput, - deps - )) { + for await (const chunkResult of worker.stream(entries, entries, deps)) { // Write files for this chunk in the generate method (main thread) if (output) { for (const template of chunkResult) { diff --git a/src/generators/legacy-html/types.d.ts b/src/generators/legacy-html/types.d.ts new file mode 100644 index 00000000..741a0e63 --- /dev/null +++ b/src/generators/legacy-html/types.d.ts @@ -0,0 +1,9 @@ +export interface TemplateValues { + api: string; + added: string; + section: string; + version: string; + toc: string; + nav: string; + content: string; +} diff --git a/src/generators/legacy-html/utils/buildDropdowns.mjs b/src/generators/legacy-html/utils/buildDropdowns.mjs index c612b7d6..a33b7a72 100644 --- a/src/generators/legacy-html/utils/buildDropdowns.mjs +++ b/src/generators/legacy-html/utils/buildDropdowns.mjs @@ -15,7 +15,7 @@ import { * * @param {string} tableOfContents The stringified ToC */ -const buildToC = tableOfContents => { +export const buildToC = tableOfContents => { if (tableOfContents.length) { return ( `
  • ` + @@ -36,7 +36,7 @@ const buildToC = tableOfContents => { * * @param {string} navigationContents The stringified Navigation */ -const buildNavigation = navigationContents => +export const buildNavigation = navigationContents => `
  • ` + `Index` + `
    • Index` + @@ -52,7 +52,7 @@ const buildNavigation = navigationContents => * @param {string} added The version the API was added * @param {Array} versions All available Node.js releases */ -const buildVersions = (api, added, versions) => { +export const buildVersions = (api, added, versions) => { const compatibleVersions = getCompatibleVersions(added, versions); // Parses the SemVer version into something we use for URLs and to display the Node.js version @@ -80,14 +80,7 @@ const buildVersions = (api, added, versions) => { * * @param {string} api The current API node name */ -const buildGitHub = api => +export const buildGitHub = api => `
    • ` + `` + `Edit on GitHub
    • `; - -export default { - buildToC, - buildNavigation, - buildVersions, - buildGitHub, -}; diff --git a/src/generators/legacy-html/utils/replaceTemplateValues.mjs b/src/generators/legacy-html/utils/replaceTemplateValues.mjs index 3eabbe73..ae907246 100644 --- a/src/generators/legacy-html/utils/replaceTemplateValues.mjs +++ b/src/generators/legacy-html/utils/replaceTemplateValues.mjs @@ -1,37 +1,27 @@ 'use strict'; -import dropdowns from './buildDropdowns.mjs'; +import { + buildToC, + buildNavigation, + buildVersions, + buildGitHub, +} from './buildDropdowns.mjs'; import tableOfContents from './tableOfContents.mjs'; -/** - * @typedef {{ - * api: string; - * added: string; - * section: string; - * version: string; - * toc: string; - * nav: string; - * content: string; - * }} TemplateValues - */ - /** * Replaces the template values in the API template with the given values. * @param {string} apiTemplate - The HTML template string - * @param {TemplateValues} values - The values to replace the template values with + * @param {import('../types').TemplateValues} values - The values to replace the template values with * @param {Array} releases - The releases array for version dropdown * @param {{ skipGitHub?: boolean; skipGtocPicker?: boolean }} [options] - Optional settings * @returns {string} The replaced template values */ export const replaceTemplateValues = ( apiTemplate, - values, + { api, added, section, version, toc, nav, content }, releases, - options = {} + { skipGitHub = false, skipGtocPicker = false } = {} ) => { - const { api, added, section, version, toc, nav, content } = values; - const { skipGitHub = false, skipGtocPicker = false } = options; - return apiTemplate .replace('__ID__', api) .replace(/__FILENAME__/g, api) @@ -40,14 +30,8 @@ export const replaceTemplateValues = ( .replace(/__TOC__/g, tableOfContents.wrapToC(toc)) .replace(/__GTOC__/g, nav) .replace('__CONTENT__', content) - .replace(/__TOC_PICKER__/g, dropdowns.buildToC(toc)) - .replace( - /__GTOC_PICKER__/g, - skipGtocPicker ? '' : dropdowns.buildNavigation(nav) - ) - .replace('__ALTDOCS__', dropdowns.buildVersions(api, added, releases)) - .replace( - '__EDIT_ON_GITHUB__', - skipGitHub ? '' : dropdowns.buildGitHub(api) - ); + .replace(/__TOC_PICKER__/g, buildToC(toc)) + .replace(/__GTOC_PICKER__/g, skipGtocPicker ? '' : buildNavigation(nav)) + .replace('__ALTDOCS__', buildVersions(api, added, releases)) + .replace('__EDIT_ON_GITHUB__', skipGitHub ? '' : buildGitHub(api)); }; diff --git a/src/generators/legacy-json-all/index.mjs b/src/generators/legacy-json-all/index.mjs index 1e9c51cf..c10544aa 100644 --- a/src/generators/legacy-json-all/index.mjs +++ b/src/generators/legacy-json-all/index.mjs @@ -8,8 +8,9 @@ import { join } from 'node:path'; * JSON file (`all.json`). * * @typedef {Array} Input + * @typedef {import('./types.d.ts').Output} Output * - * @type {GeneratorMetadata} + * @type {GeneratorMetadata} */ export default { name: 'legacy-json-all', @@ -26,7 +27,7 @@ export default { * * @param {Input} input * @param {Partial} options - * @returns {Promise} + * @returns {Promise} */ async generate(input, { output }) { /** diff --git a/src/generators/legacy-json/index.mjs b/src/generators/legacy-json/index.mjs index 80e9680f..b0ae17ea 100644 --- a/src/generators/legacy-json/index.mjs +++ b/src/generators/legacy-json/index.mjs @@ -18,8 +18,9 @@ const buildSection = createSectionBuilder(); * config. * * @typedef {Array} Input + * @typedef {Array} Output * - * @type {GeneratorMetadata} + * @type {GeneratorMetadata} */ export default { name: 'legacy-json', @@ -37,9 +38,9 @@ export default { * Each item is pre-grouped {head, nodes} - no need to * recompute groupNodesByModule for every chunk. * - * @param {Array<{head: ApiDocMetadataEntry, nodes: ApiDocMetadataEntry[]}>} slicedInput - Pre-sliced module data + * @param {Array<{ head: ApiDocMetadataEntry, nodes: Array }>} slicedInput - Pre-sliced module data * @param {number[]} itemIndices - Indices into the sliced array - * @returns {Promise} JSON sections for each processed module + * @returns {Promise} JSON sections for each processed module */ async processChunk(slicedInput, itemIndices) { const results = []; @@ -58,7 +59,7 @@ export default { * * @param {Input} input * @param {Partial} options - * @returns {AsyncGenerator>} + * @returns {AsyncGenerator} */ async *generate(input, { output, worker }) { const groupedModules = groupNodesByModule(input); @@ -67,12 +68,12 @@ export default { // Create sliced input: each item contains head + its module's entries // This avoids sending all 4900+ entries to every worker - const slicedInput = headNodes.map(head => ({ + const entries = headNodes.map(head => ({ head, nodes: groupedModules.get(head.api), })); - for await (const chunkResult of worker.stream(slicedInput, slicedInput)) { + for await (const chunkResult of worker.stream(entries, entries)) { if (output) { for (const section of chunkResult) { const out = join(output, `${section.api}.json`); diff --git a/src/generators/man-page/index.mjs b/src/generators/man-page/index.mjs index e54484c4..30a13fda 100644 --- a/src/generators/man-page/index.mjs +++ b/src/generators/man-page/index.mjs @@ -32,6 +32,7 @@ export default { /** * Generates the Node.js man-page + * * @param {Input} input * @param {Partial} options */ diff --git a/src/generators/metadata/index.mjs b/src/generators/metadata/index.mjs index c026df70..a09b10c3 100644 --- a/src/generators/metadata/index.mjs +++ b/src/generators/metadata/index.mjs @@ -6,8 +6,9 @@ import { parseApiDoc } from './utils/parse.mjs'; * This generator generates a flattened list of metadata entries from a API doc * * @typedef {Array>} Input + * @typedef {Array} Output * - * @type {GeneratorMetadata>} + * @type {GeneratorMetadata} */ export default { name: 'metadata', @@ -24,8 +25,8 @@ export default { * * @param {Input} fullInput - Full input array (parsed API doc files) * @param {number[]} itemIndices - Indices of files to process - * @param {{typeMap: Record}} deps - Dependencies passed from generate() - * @returns {Promise} Metadata entries for processed files + * @param {Partial} deps - Dependencies passed from generate() + * @returns {Promise} Metadata entries for processed files */ async processChunk(fullInput, itemIndices, { typeMap }) { const results = []; @@ -39,8 +40,8 @@ export default { /** * @param {Input} inputs - * @param {GeneratorOptions} options - * @returns {AsyncGenerator>} + * @param {Partial} options + * @returns {AsyncGenerator} */ async *generate(inputs, { typeMap, worker }) { const deps = { typeMap }; diff --git a/src/generators/types.d.ts b/src/generators/types.d.ts index f994f452..3ef3ee93 100644 --- a/src/generators/types.d.ts +++ b/src/generators/types.d.ts @@ -1,3 +1,4 @@ +import type { SemVer } from 'semver'; import type { ApiDocReleaseEntry } from '../types'; import type { publicGenerators, allGenerators } from './index.mjs'; @@ -76,6 +77,17 @@ declare global { worker: ParallelWorker; } + export type ParallelGeneratorOptions = Partial< + Omit + >; + + export interface ParallelTaskOptions { + generatorName: keyof AllGenerators; + input: unknown[]; + itemIndices: number[]; + options: ParallelGeneratorOptions & Record; + } + export interface GeneratorMetadata { // The name of the Generator. Must match the Key in AllGenerators name: keyof AllGenerators; @@ -126,25 +138,19 @@ declare global { * Generators that implement this method can have their work distributed * across multiple worker threads for true parallel processing. * - * @param fullInput - Full input data (for rebuilding context in workers) - * @param itemIndices - Array of indices of items to process + * Input is automatically sliced to only include items at the specified indices, + * reducing serialization overhead. The itemIndices are remapped to 0-based + * indices into the sliced array. + * + * @param slicedInput - Sliced input containing only items for this chunk + * @param itemIndices - Array of 0-based indices into slicedInput * @param options - Generator options (without worker, which isn't serializable) * @returns Array of results for the processed items */ - processChunk?: (( - fullInput: I, + processChunk?: ( + slicedInput: I, itemIndices: number[], options: Partial> - ) => Promise) & { - /** - * When true, only the items at the specified indices are sent to workers - * instead of the full input array. This reduces serialization overhead - * for generators that don't need full context to process individual items. - * - * Set this to true when processChunk only accesses `fullInput[idx]` for - * each index in itemIndices, and doesn't need the full array for context. - */ - sliceInput?: boolean; - }; + ) => Promise; } } diff --git a/src/generators/web/index.mjs b/src/generators/web/index.mjs index 98eceb0a..eb3c654a 100644 --- a/src/generators/web/index.mjs +++ b/src/generators/web/index.mjs @@ -16,22 +16,26 @@ import { processJSXEntries } from './utils/processing.mjs'; * Note: This generator does NOT support streaming/chunked processing because * processJSXEntries needs all entries together to generate code-split bundles. * - * @type {GeneratorMetadata} + * @typedef {Array} Input + * @typedef {Array<{ html: string, css: string }>} Output + * + * @type {GeneratorMetadata} */ export default { name: 'web', + version: '1.0.0', + description: 'Generates HTML/CSS/JS bundles from JSX AST entries', + dependsOn: 'jsx-ast', /** * Main generation function that processes JSX AST entries into web bundles. * - * @param {import('../jsx-ast/utils/buildContent.mjs').JSXContent[]} input - JSX AST entries to process. + * @param {Input} input - JSX AST entries to process. * @param {Partial} options - Generator options. - * @param {string} [options.output] - Output directory for generated files. - * @param {string} options.version - Documentation version string. - * @returns {Promise} + * @returns {Promise} Processed HTML/CSS/JS content. */ async generate(input, { output, version }) { const template = await readFile( @@ -70,6 +74,6 @@ export default { await writeFile(join(output, 'styles.css'), css, 'utf-8'); } - return results.map(({ html }) => ({ html, css })); + return results.map(({ html }) => ({ html: html.toString(), css })); }, }; diff --git a/src/logger/index.mjs b/src/logger/index.mjs index da54ded6..4b2ac5b4 100644 --- a/src/logger/index.mjs +++ b/src/logger/index.mjs @@ -1,6 +1,5 @@ 'use strict'; -import { LogLevel } from './constants.mjs'; import { createLogger } from './logger.mjs'; import { transports } from './transports/index.mjs'; @@ -26,5 +25,3 @@ export const Logger = (transportName = 'console') => { // Default logger instance using console transport export default Logger(); - -export { LogLevel }; diff --git a/src/parsers/json.mjs b/src/parsers/json.mjs new file mode 100644 index 00000000..7f4e9de7 --- /dev/null +++ b/src/parsers/json.mjs @@ -0,0 +1,19 @@ +'use strict'; + +import { loadFromURL } from '../utils/parser.mjs'; + +/** + * Retrieves the type map from the provided JSON file. + * + * @param {string|URL} path Path to type map JSON file + * @returns {Promise>} + */ +export const parseTypeMap = async path => { + if (!path || !path.length) { + return {}; + } + + const typeMapContent = await loadFromURL(path); + + return JSON.parse(typeMapContent); +}; diff --git a/src/parsers/markdown.mjs b/src/parsers/markdown.mjs index 318df30f..da7319cc 100644 --- a/src/parsers/markdown.mjs +++ b/src/parsers/markdown.mjs @@ -37,6 +37,10 @@ export const parseChangelog = async path => { * @returns {Promise>} */ export const parseIndex = async path => { + if (!path || !path.length) { + return []; + } + const index = await loadFromURL(path); const items = Array.from(index.matchAll(LIST_ITEM_REGEX)); diff --git a/src/streaming.mjs b/src/streaming.mjs index 3d8fc7bc..a275a037 100644 --- a/src/streaming.mjs +++ b/src/streaming.mjs @@ -78,24 +78,5 @@ export const createStreamingCache = () => { return cache.get(key); }, - - /** - * Checks if a key exists in the cache. - * - * @param {string} key - Cache key to check - * @returns {boolean} True if the key exists - */ - has(key) { - return cache.has(key); - }, - - /** - * Clears all cached results. - */ - clear() { - streamingLogger.debug(`Clearing streaming cache`); - - cache.clear(); - }, }; }; diff --git a/src/threading/chunk-worker.mjs b/src/threading/chunk-worker.mjs index f46bc005..80558790 100644 --- a/src/threading/chunk-worker.mjs +++ b/src/threading/chunk-worker.mjs @@ -4,16 +4,11 @@ import { allGenerators } from '../generators/index.mjs'; * Processes a chunk of items using the specified generator's processChunk method. * This is the worker entry point for Piscina. * - * @param {{ - * generatorName: string, - * fullInput: unknown[], - * itemIndices: number[], - * options: object - * }} opts - Task options from Piscina + * @param {ParallelTaskOptions} opts - Task options from Piscina * @returns {Promise} The processed result */ -export default async ({ generatorName, fullInput, itemIndices, options }) => { +export default async ({ generatorName, input, itemIndices, options }) => { const generator = allGenerators[generatorName]; - return generator.processChunk(fullInput, itemIndices, options); + return generator.processChunk(input, itemIndices, options); }; diff --git a/src/threading/index.mjs b/src/threading/index.mjs index 81744c02..bd7f2dfa 100644 --- a/src/threading/index.mjs +++ b/src/threading/index.mjs @@ -4,7 +4,7 @@ import logger from '../logger/index.mjs'; const poolLogger = logger.child('WorkerPool'); -const workerScript = new URL('./chunk-worker.mjs', import.meta.url).href; +const workerScript = import.meta.resolve('./chunk-worker.mjs'); /** * Creates a Piscina worker pool for parallel processing. @@ -15,7 +15,7 @@ const workerScript = new URL('./chunk-worker.mjs', import.meta.url).href; export default function createWorkerPool(threads) { poolLogger.debug(`WorkerPool initialized`, { threads, - workerScript: './chunk-worker.mjs', + workerScript, }); return new Piscina({ diff --git a/src/threading/parallel.mjs b/src/threading/parallel.mjs index 71491671..c0172813 100644 --- a/src/threading/parallel.mjs +++ b/src/threading/parallel.mjs @@ -1,5 +1,6 @@ 'use strict'; +import { allGenerators } from '../generators/index.mjs'; import logger from '../logger/index.mjs'; const parallelLogger = logger.child('parallel'); @@ -30,12 +31,12 @@ const createChunks = (count, size) => { * @param {number[]} indices - Indices to process * @param {object} options - Serialized options * @param {string} generatorName - Name of the generator - * @returns {object} Task data for Piscina + * @returns {ParallelTaskOptions} Task data for Piscina */ const createTask = (fullInput, indices, options, generatorName) => ({ generatorName, // Only send the items needed for this chunk (reduces serialization overhead) - fullInput: indices.map(i => fullInput[i]), + input: indices.map(i => fullInput[i]), // Remap indices to 0-based for the sliced array itemIndices: indices.map((_, i) => i), options, @@ -61,6 +62,8 @@ export default function createParallelWorker(generatorName, pool, options) { return opts; }; + const generator = allGenerators[generatorName]; + return { /** * Processes items in parallel, yielding results as chunks complete. @@ -76,18 +79,28 @@ export default function createParallelWorker(generatorName, pool, options) { return; } - const chunks = createChunks(items.length, chunkSize); - const opts = serializeOptions(extra); + const chunks = createChunks(items.length, chunkSize); + parallelLogger.debug( `Distributing ${items.length} items across ${chunks.length} chunks`, { generator: generatorName, chunks: chunks.length, chunkSize, threads } ); + const runInOneGo = threads <= 1 || items.length <= 2; + // Submit all tasks to Piscina - each promise resolves to itself for removal const pending = new Set( chunks.map(indices => { + if (runInOneGo) { + const promise = generator + .processChunk(fullInput, indices, opts) + .then(result => ({ promise, result })); + + return promise; + } + const promise = pool .run(createTask(fullInput, indices, opts, generatorName)) .then(result => ({ promise, result })); diff --git a/src/types.d.ts b/src/types.d.ts index 3e311761..065174c8 100644 --- a/src/types.d.ts +++ b/src/types.d.ts @@ -10,10 +10,7 @@ type NodeWithData = T & { declare global { export interface ParserOutput { - file: { - stem?: string; - basename?: string; - }; + file: { stem?: string; basename?: string }; tree: T; } From f0de55dc4a73dba6b291c0488c76ce6fcbf50fe7 Mon Sep 17 00:00:00 2001 From: Claudio Wunder Date: Wed, 10 Dec 2025 16:24:27 +0100 Subject: [PATCH 23/25] chore: removed unused tests --- src/__tests__/streaming.test.mjs | 60 -------------------------------- 1 file changed, 60 deletions(-) diff --git a/src/__tests__/streaming.test.mjs b/src/__tests__/streaming.test.mjs index 2ba63e1f..de9a123d 100644 --- a/src/__tests__/streaming.test.mjs +++ b/src/__tests__/streaming.test.mjs @@ -168,65 +168,5 @@ describe('streaming utilities', () => { deepStrictEqual(result1, [1, 2]); deepStrictEqual(result2, [3, 4]); }); - - it('should report has() correctly', async () => { - const cache = createStreamingCache(); - - strictEqual(cache.has('test'), false); - - async function* gen() { - yield [1]; - } - - cache.getOrCollect('test', gen()); - - strictEqual(cache.has('test'), true); - strictEqual(cache.has('other'), false); - }); - - it('should clear all entries', async () => { - const cache = createStreamingCache(); - - async function* gen1() { - yield [1]; - } - - async function* gen2() { - yield [2]; - } - - cache.getOrCollect('key1', gen1()); - cache.getOrCollect('key2', gen2()); - - strictEqual(cache.has('key1'), true); - strictEqual(cache.has('key2'), true); - - cache.clear(); - - strictEqual(cache.has('key1'), false); - strictEqual(cache.has('key2'), false); - }); - - it('should allow re-adding after clear', async () => { - const cache = createStreamingCache(); - - async function* gen1() { - yield [1, 2]; - } - - const result1 = await cache.getOrCollect('test', gen1()); - - deepStrictEqual(result1, [1, 2]); - - cache.clear(); - - async function* gen2() { - yield [3, 4]; - } - - const result2 = await cache.getOrCollect('test', gen2()); - - deepStrictEqual(result2, [3, 4]); - }); }); }); From 3f6552f1d85eed1e37e689b1daa9d8af26324333 Mon Sep 17 00:00:00 2001 From: Claudio Wunder Date: Wed, 10 Dec 2025 16:30:45 +0100 Subject: [PATCH 24/25] fix: test --- src/__tests__/streaming.test.mjs | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/__tests__/streaming.test.mjs b/src/__tests__/streaming.test.mjs index de9a123d..6b952f54 100644 --- a/src/__tests__/streaming.test.mjs +++ b/src/__tests__/streaming.test.mjs @@ -123,8 +123,6 @@ describe('streaming utilities', () => { ok(cache); strictEqual(typeof cache.getOrCollect, 'function'); - strictEqual(typeof cache.has, 'function'); - strictEqual(typeof cache.clear, 'function'); }); it('should return same promise for same key', async () => { From 13406cbd7a34aad768faafb545311ce4158d3092 Mon Sep 17 00:00:00 2001 From: Claudio Wunder Date: Wed, 10 Dec 2025 16:34:28 +0100 Subject: [PATCH 25/25] chore: minified result --- src/generators/legacy-html-all/index.mjs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/generators/legacy-html-all/index.mjs b/src/generators/legacy-html-all/index.mjs index 8bcb5ca0..3166080b 100644 --- a/src/generators/legacy-html-all/index.mjs +++ b/src/generators/legacy-html-all/index.mjs @@ -88,13 +88,13 @@ export default { { skipGitHub: true, skipGtocPicker: true } ); - if (output) { - // We minify the html result to reduce the file size and keep it "clean" - const minified = HTMLMinifier.minify(Buffer.from(result), {}); + // We minify the html result to reduce the file size and keep it "clean" + const minified = HTMLMinifier.minify(Buffer.from(result), {}); + if (output) { await writeFile(join(output, 'all.html'), minified); } - return result; + return minified; }, };