diff --git a/.github/workflows/generate.yml b/.github/workflows/generate.yml index 44af8650..30765597 100644 --- a/.github/workflows/generate.yml +++ b/.github/workflows/generate.yml @@ -59,6 +59,7 @@ jobs: sparse-checkout: | doc/api lib + . path: node - name: Setup Node.js @@ -79,7 +80,9 @@ jobs: -t ${{ matrix.target }} \ -i "${{ matrix.input }}" \ -o "out/${{ matrix.target }}" \ - --index ./node/doc/api/index.md + -c ./node/CHANGELOG.md \ + --index ./node/doc/api/index.md \ + --log-level debug - name: Upload ${{ matrix.target }} artifacts uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0 diff --git a/bin/cli.mjs b/bin/cli.mjs index 05533cde..8100125f 100755 --- a/bin/cli.mjs +++ b/bin/cli.mjs @@ -5,12 +5,19 @@ import process from 'node:process'; import { Command, Option } from 'commander'; import commands from './commands/index.mjs'; -import interactive from './commands/interactive.mjs'; import { errorWrap } from './utils.mjs'; +import { LogLevel } from '../src/logger/constants.mjs'; +import logger from '../src/logger/index.mjs'; + +const logLevelOption = new Option('--log-level ', 'Log level') + .choices(Object.keys(LogLevel)) + .default('info'); const program = new Command() .name('@nodejs/doc-kit') - .description('CLI tool to generate the Node.js API documentation'); + .description('CLI tool to generate the Node.js API documentation') + .addOption(logLevelOption) + .hook('preAction', cmd => logger.setLogLevel(cmd.opts().logLevel)); // Registering commands commands.forEach(({ name, description, options, action }) => { @@ -37,11 +44,5 @@ commands.forEach(({ name, description, options, action }) => { cmd.action(errorWrap(action)); }); -// Register the interactive command -program - .command('interactive') - .description('Launch guided CLI wizard') - .action(errorWrap(interactive)); - // Parse and execute command-line arguments program.parse(process.argv); diff --git a/bin/commands/generate.mjs b/bin/commands/generate.mjs index 5b9dbfd6..2ba3e8cd 100644 --- a/bin/commands/generate.mjs +++ b/bin/commands/generate.mjs @@ -3,38 +3,18 @@ import { resolve } from 'node:path'; import { coerce } from 'semver'; -import { - DOC_NODE_CHANGELOG_URL, - DOC_NODE_VERSION, -} from '../../src/constants.mjs'; +import { NODE_CHANGELOG_URL, NODE_VERSION } from '../../src/constants.mjs'; import { publicGenerators } from '../../src/generators/index.mjs'; import createGenerator from '../../src/generators.mjs'; +import logger from '../../src/logger/index.mjs'; +import { parseTypeMap } from '../../src/parsers/json.mjs'; import { parseChangelog, parseIndex } from '../../src/parsers/markdown.mjs'; import { DEFAULT_TYPE_MAP } from '../../src/utils/parser/constants.mjs'; -import { loadFromURL } from '../../src/utils/parser.mjs'; -import { loadAndParse } from '../utils.mjs'; const availableGenerators = Object.keys(publicGenerators); -// Half of available logical CPUs guarantees in general all physical CPUs are being used -// which in most scenarios is the best way to maximize performance -const optimalThreads = Math.floor(cpus().length / 2) + 1; - -/** - * @typedef {Object} Options - * @property {Array|string} input - Specifies the glob/path for input files. - * @property {Array|string} [ignore] - Specifies the glob/path for ignoring files. - * @property {Array} target - Specifies the generator target mode. - * @property {string} version - Specifies the target Node.js version. - * @property {string} changelog - Specifies the path to the Node.js CHANGELOG.md file. - * @property {string} typeMap - Specifies the path to the Node.js Type Map. - * @property {string} [gitRef] - Git ref/commit URL. - * @property {number} [threads] - Number of threads to allow. - * @property {number} [chunkSize] - Number of items to process per worker thread. - */ - /** - * @type {import('../utils.mjs').Command} + * @type {import('./types').Command} */ export default { description: 'Generate API docs', @@ -66,11 +46,11 @@ export default { }, threads: { flags: ['-p', '--threads '], - desc: 'Number of worker threads to use', + desc: 'Number of threads to use (minimum: 1)', prompt: { type: 'text', message: 'How many threads to allow', - initialValue: String(Math.max(optimalThreads, 1)), + initialValue: String(cpus().length), }, }, chunkSize: { @@ -88,7 +68,7 @@ export default { prompt: { type: 'text', message: 'Enter Node.js version', - initialValue: DOC_NODE_VERSION, + initialValue: NODE_VERSION, }, }, changelog: { @@ -97,7 +77,7 @@ export default { prompt: { type: 'text', message: 'Enter changelog URL', - initialValue: DOC_NODE_CHANGELOG_URL, + initialValue: NODE_CHANGELOG_URL, }, }, gitRef: { @@ -140,33 +120,42 @@ export default { }, }, }, + /** + * @typedef {Object} Options + * @property {Array|string} input - Specifies the glob/path for input files. + * @property {Array|string} [ignore] - Specifies the glob/path for ignoring files. + * @property {Array} target - Specifies the generator target mode. + * @property {string} version - Specifies the target Node.js version. + * @property {string} changelog - Specifies the path to the Node.js CHANGELOG.md file. + * @property {string} typeMap - Specifies the path to the Node.js Type Map. + * @property {string} index - Specifies the path to the index document. + * @property {string} [gitRef] - Git ref/commit URL. + * @property {number} [threads] - Number of threads to allow. + * @property {number} [chunkSize] - Number of items to process per worker thread. + * * Handles the action for generating API docs * @param {Options} opts - The options to generate API docs. * @returns {Promise} */ async action(opts) { - const docs = await loadAndParse(opts.input, opts.ignore); - const releases = await parseChangelog(opts.changelog); - - const rawTypeMap = await loadFromURL(opts.typeMap); - const typeMap = JSON.parse(rawTypeMap); + logger.debug('Starting doc-kit', opts); - const index = opts.index && (await parseIndex(opts.index)); + const { runGenerators } = createGenerator(); - const { runGenerators } = createGenerator(docs); + logger.debug('Starting generation', { targets: opts.target }); await runGenerators({ generators: opts.target, input: opts.input, output: opts.output && resolve(opts.output), version: coerce(opts.version), - releases, + releases: await parseChangelog(opts.changelog), gitRef: opts.gitRef, - threads: parseInt(opts.threads, 10), - chunkSize: parseInt(opts.chunkSize, 10), - index, - typeMap, + threads: Math.max(parseInt(opts.threads, 10), 1), + chunkSize: Math.max(parseInt(opts.chunkSize, 10), 1), + index: await parseIndex(opts.index), + typeMap: await parseTypeMap(opts.typeMap), }); }, }; diff --git a/bin/commands/index.mjs b/bin/commands/index.mjs index 3e6d9d97..ece48157 100644 --- a/bin/commands/index.mjs +++ b/bin/commands/index.mjs @@ -1,3 +1,4 @@ import generate from './generate.mjs'; +import interactive from './interactive.mjs'; -export default [generate]; +export default [generate, interactive]; diff --git a/bin/commands/interactive.mjs b/bin/commands/interactive.mjs index 618c9593..fef401cb 100644 --- a/bin/commands/interactive.mjs +++ b/bin/commands/interactive.mjs @@ -12,7 +12,6 @@ import { cancel, } from '@clack/prompts'; -import commands from './index.mjs'; import logger from '../../src/logger/index.mjs'; /** @@ -53,127 +52,143 @@ function escapeShellArg(arg) { } /** - * Main interactive function for the API Docs Tooling command line interface. - * Guides the user through a series of prompts, validates inputs, and generates a command to run. - * @returns {Promise} Resolves once the command is generated and executed. + * @type {import('../utils.mjs').Command} */ -export default async function interactive() { - // Step 1: Introduction to the tool - intro('Welcome to API Docs Tooling'); - - // Step 2: Choose the action based on available command definitions - const actionOptions = commands.map(({ description }, i) => ({ - label: description, - value: i, - })); - - const selectedAction = await select({ - message: 'What would you like to do?', - options: actionOptions, - }); - - if (isCancel(selectedAction)) { - cancel('Cancelled.'); - process.exit(0); - } - - // Retrieve the options for the selected action - const { options, name } = commands[selectedAction]; - const answers = {}; // Store answers from user prompts - - // Step 3: Collect input for each option - for (const [key, { prompt }] of Object.entries(options)) { - let response; - const promptMessage = getMessage(prompt); - - switch (prompt.type) { - case 'text': - response = await text({ - message: promptMessage, - initialValue: prompt.initialValue || '', - validate: prompt.required ? requireValue : undefined, - }); - if (response) { - // Store response; split into an array if variadic - answers[key] = prompt.variadic - ? response.split(',').map(s => s.trim()) - : response; - } - break; - - case 'confirm': - response = await confirm({ - message: promptMessage, - initialValue: prompt.initialValue, - }); - answers[key] = response; - break; - - case 'multiselect': - response = await multiselect({ - message: promptMessage, - options: prompt.options, - required: !!prompt.required, - }); - answers[key] = response; - break; - - case 'select': - response = await select({ - message: promptMessage, - options: prompt.options, - }); - answers[key] = response; - break; - } +export default { + name: 'interactive', + description: 'Launch guided CLI wizard', + options: {}, + /** + * Main interactive function for the API Docs Tooling command line interface. + * Guides the user through a series of prompts, validates inputs, and generates a command to run. + * @returns {Promise} Resolves once the command is generated and executed. + */ + async action() { + // Import commands dynamically to avoid circular dependency + const { default: commands } = await import('./index.mjs'); + + // Filter out the interactive command itself + const availableCommands = commands.filter( + cmd => cmd.name !== 'interactive' + ); + + // Step 1: Introduction to the tool + intro('Welcome to API Docs Tooling'); + + // Step 2: Choose the action based on available command definitions + const actionOptions = availableCommands.map((cmd, i) => ({ + label: cmd.description, + value: i, + })); + + const selectedAction = await select({ + message: 'What would you like to do?', + options: actionOptions, + }); - // Handle cancellation - if (isCancel(response)) { + if (isCancel(selectedAction)) { cancel('Cancelled.'); process.exit(0); } - } - // Step 4: Build the final command by escaping values - const cmdParts = ['npx', 'doc-kit', name]; - const executionArgs = [name]; + // Retrieve the options for the selected action + const { options, name } = availableCommands[selectedAction]; + const answers = {}; // Store answers from user prompts + + // Step 3: Collect input for each option + for (const [key, { prompt }] of Object.entries(options)) { + let response; + const promptMessage = getMessage(prompt); + + switch (prompt.type) { + case 'text': + response = await text({ + message: promptMessage, + initialValue: prompt.initialValue || '', + validate: prompt.required ? requireValue : undefined, + }); + if (response) { + // Store response; split into an array if variadic + answers[key] = prompt.variadic + ? response.split(',').map(s => s.trim()) + : response; + } + break; + + case 'confirm': + response = await confirm({ + message: promptMessage, + initialValue: prompt.initialValue, + }); + answers[key] = response; + break; + + case 'multiselect': + response = await multiselect({ + message: promptMessage, + options: prompt.options, + required: !!prompt.required, + }); + answers[key] = response; + break; + + case 'select': + response = await select({ + message: promptMessage, + options: prompt.options, + }); + answers[key] = response; + break; + } - for (const [key, { flags }] of Object.entries(options)) { - const value = answers[key]; - // Skip empty values - if (value == null || (Array.isArray(value) && value.length === 0)) { - continue; + // Handle cancellation + if (isCancel(response)) { + cancel('Cancelled.'); + process.exit(0); + } } - const flag = flags[0].split(/[\s,]+/)[0]; // Use the first flag + // Step 4: Build the final command by escaping values + const cmdParts = ['npx', 'doc-kit', name]; + const executionArgs = [name]; - // Handle different value types (boolean, array, string) - if (typeof value === 'boolean') { - if (value) { - cmdParts.push(flag); - executionArgs.push(flag); + for (const [key, { flags }] of Object.entries(options)) { + const value = answers[key]; + // Skip empty values + if (value == null || (Array.isArray(value) && value.length === 0)) { + continue; } - } else if (Array.isArray(value)) { - for (const item of value) { - cmdParts.push(flag, escapeShellArg(item)); - executionArgs.push(flag, item); + + const flag = flags[0].split(/[\s,]+/)[0]; // Use the first flag + + // Handle different value types (boolean, array, string) + if (typeof value === 'boolean') { + if (value) { + cmdParts.push(flag); + executionArgs.push(flag); + } + } else if (Array.isArray(value)) { + for (const item of value) { + cmdParts.push(flag, escapeShellArg(item)); + executionArgs.push(flag, item); + } + } else { + cmdParts.push(flag, escapeShellArg(value)); + executionArgs.push(flag, value); } - } else { - cmdParts.push(flag, escapeShellArg(value)); - executionArgs.push(flag, value); } - } - const finalCommand = cmdParts.join(' '); + const finalCommand = cmdParts.join(' '); - logger.info(`\nGenerated command:\n${finalCommand}\n`); + logger.info(`\nGenerated command:\n${finalCommand}\n`); - // Step 5: Confirm and execute the generated command - if (await confirm({ message: 'Run now?', initialValue: true })) { - spawnSync(process.execPath, [process.argv[1], ...executionArgs], { - stdio: 'inherit', - }); - } + // Step 5: Confirm and execute the generated command + if (await confirm({ message: 'Run now?', initialValue: true })) { + spawnSync(process.execPath, [process.argv[1], ...executionArgs], { + stdio: 'inherit', + }); + } - outro('Done!'); -} + outro('Done!'); + }, +}; diff --git a/bin/commands/types.d.ts b/bin/commands/types.d.ts new file mode 100644 index 00000000..e7ce0d7c --- /dev/null +++ b/bin/commands/types.d.ts @@ -0,0 +1,25 @@ +/** + * Represents a command-line option for the CLI. + */ +export interface Option { + flags: string[]; + desc: string; + prompt?: { + type: 'text' | 'confirm' | 'select' | 'multiselect'; + message: string; + variadic?: boolean; + required?: boolean; + initialValue?: boolean; + options?: { label: string; value: string }[]; + }; +} + +/** + * Represents a command-line subcommand + */ +export interface Command { + options: { [key: string]: Option }; + name: string; + description: string; + action: Function; +} diff --git a/bin/utils.mjs b/bin/utils.mjs index 8d0df2eb..561d9098 100644 --- a/bin/utils.mjs +++ b/bin/utils.mjs @@ -1,35 +1,4 @@ -import createMarkdownLoader from '../src/loaders/markdown.mjs'; import logger from '../src/logger/index.mjs'; -import createMarkdownParser from '../src/parsers/markdown.mjs'; - -/** - * Generic lazy initializer. - * @template T - * @param {() => T} factory - Function to create the instance. - * @returns {() => T} - A function that returns the singleton instance. - */ -export const lazy = factory => { - let instance; - return args => (instance ??= factory(args)); -}; - -// Instantiate loader and parser once to reuse, -// but only if/when we actually need them. No need -// to create these objects just to load a different -// utility. -const loader = lazy(createMarkdownLoader); -const parser = lazy(createMarkdownParser); - -/** - * Load and parse markdown API docs. - * @param {string[]} input - Glob patterns for input files. - * @param {string[]} [ignore] - Glob patterns to ignore. - * @returns {Promise>>} - */ -export async function loadAndParse(input, ignore) { - const files = await loader().loadFiles(input, ignore); - return parser().parseApiDocs(files); -} /** * Wraps a function to catch both synchronous and asynchronous errors. @@ -47,26 +16,3 @@ export const errorWrap = process.exit(1); } }; - -/** - * Represents a command-line option for the CLI. - * @typedef {Object} Option - * @property {string[]} flags - Command-line flags, e.g., ['-i, --input ']. - * @property {string} desc - Description of the option. - * @property {Object} [prompt] - Optional prompt configuration. - * @property {'text'|'confirm'|'select'|'multiselect'} prompt.type - Type of the prompt. - * @property {string} prompt.message - Message displayed in the prompt. - * @property {boolean} [prompt.variadic] - Indicates if the prompt accepts multiple values. - * @property {boolean} [prompt.required] - Whether the prompt is required. - * @property {boolean} [prompt.initialValue] - Default value for confirm prompts. - * @property {{label: string, value: string}[]} [prompt.options] - Options for select/multiselect prompts. - */ - -/** - * Represents a command-line subcommand - * @typedef {Object} Command - * @property {{ [key: string]: Option }} options - * @property {string} name - * @property {string} description - * @property {Function} action - */ diff --git a/npm-shrinkwrap.json b/npm-shrinkwrap.json index 30894767..03faf578 100644 --- a/npm-shrinkwrap.json +++ b/npm-shrinkwrap.json @@ -28,6 +28,7 @@ "hastscript": "^9.0.1", "lightningcss": "^1.30.2", "mdast-util-slice-markdown": "^2.0.1", + "piscina": "^5.1.4", "preact": "^10.28.0", "preact-render-to-string": "^6.6.3", "reading-time": "^1.5.0", @@ -768,6 +769,311 @@ "win32" ] }, + "node_modules/@napi-rs/nice": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/@napi-rs/nice/-/nice-1.1.1.tgz", + "integrity": "sha512-xJIPs+bYuc9ASBl+cvGsKbGrJmS6fAKaSZCnT0lhahT5rhA2VVy9/EcIgd2JhtEuFOJNx7UHNn/qiTPTY4nrQw==", + "license": "MIT", + "optional": true, + "engines": { + "node": ">= 10" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/Brooooooklyn" + }, + "optionalDependencies": { + "@napi-rs/nice-android-arm-eabi": "1.1.1", + "@napi-rs/nice-android-arm64": "1.1.1", + "@napi-rs/nice-darwin-arm64": "1.1.1", + "@napi-rs/nice-darwin-x64": "1.1.1", + "@napi-rs/nice-freebsd-x64": "1.1.1", + "@napi-rs/nice-linux-arm-gnueabihf": "1.1.1", + "@napi-rs/nice-linux-arm64-gnu": "1.1.1", + "@napi-rs/nice-linux-arm64-musl": "1.1.1", + "@napi-rs/nice-linux-ppc64-gnu": "1.1.1", + "@napi-rs/nice-linux-riscv64-gnu": "1.1.1", + "@napi-rs/nice-linux-s390x-gnu": "1.1.1", + "@napi-rs/nice-linux-x64-gnu": "1.1.1", + "@napi-rs/nice-linux-x64-musl": "1.1.1", + "@napi-rs/nice-openharmony-arm64": "1.1.1", + "@napi-rs/nice-win32-arm64-msvc": "1.1.1", + "@napi-rs/nice-win32-ia32-msvc": "1.1.1", + "@napi-rs/nice-win32-x64-msvc": "1.1.1" + } + }, + "node_modules/@napi-rs/nice-android-arm-eabi": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/@napi-rs/nice-android-arm-eabi/-/nice-android-arm-eabi-1.1.1.tgz", + "integrity": "sha512-kjirL3N6TnRPv5iuHw36wnucNqXAO46dzK9oPb0wj076R5Xm8PfUVA9nAFB5ZNMmfJQJVKACAPd/Z2KYMppthw==", + "cpu": [ + "arm" + ], + "license": "MIT", + "optional": true, + "os": [ + "android" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@napi-rs/nice-android-arm64": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/@napi-rs/nice-android-arm64/-/nice-android-arm64-1.1.1.tgz", + "integrity": "sha512-blG0i7dXgbInN5urONoUCNf+DUEAavRffrO7fZSeoRMJc5qD+BJeNcpr54msPF6qfDD6kzs9AQJogZvT2KD5nw==", + "cpu": [ + "arm64" + ], + "license": "MIT", + "optional": true, + "os": [ + "android" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@napi-rs/nice-darwin-arm64": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/@napi-rs/nice-darwin-arm64/-/nice-darwin-arm64-1.1.1.tgz", + "integrity": "sha512-s/E7w45NaLqTGuOjC2p96pct4jRfo61xb9bU1unM/MJ/RFkKlJyJDx7OJI/O0ll/hrfpqKopuAFDV8yo0hfT7A==", + "cpu": [ + "arm64" + ], + "license": "MIT", + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@napi-rs/nice-darwin-x64": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/@napi-rs/nice-darwin-x64/-/nice-darwin-x64-1.1.1.tgz", + "integrity": "sha512-dGoEBnVpsdcC+oHHmW1LRK5eiyzLwdgNQq3BmZIav+9/5WTZwBYX7r5ZkQC07Nxd3KHOCkgbHSh4wPkH1N1LiQ==", + "cpu": [ + "x64" + ], + "license": "MIT", + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@napi-rs/nice-freebsd-x64": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/@napi-rs/nice-freebsd-x64/-/nice-freebsd-x64-1.1.1.tgz", + "integrity": "sha512-kHv4kEHAylMYmlNwcQcDtXjklYp4FCf0b05E+0h6nDHsZ+F0bDe04U/tXNOqrx5CmIAth4vwfkjjUmp4c4JktQ==", + "cpu": [ + "x64" + ], + "license": "MIT", + "optional": true, + "os": [ + "freebsd" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@napi-rs/nice-linux-arm-gnueabihf": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/@napi-rs/nice-linux-arm-gnueabihf/-/nice-linux-arm-gnueabihf-1.1.1.tgz", + "integrity": "sha512-E1t7K0efyKXZDoZg1LzCOLxgolxV58HCkaEkEvIYQx12ht2pa8hoBo+4OB3qh7e+QiBlp1SRf+voWUZFxyhyqg==", + "cpu": [ + "arm" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@napi-rs/nice-linux-arm64-gnu": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/@napi-rs/nice-linux-arm64-gnu/-/nice-linux-arm64-gnu-1.1.1.tgz", + "integrity": "sha512-CIKLA12DTIZlmTaaKhQP88R3Xao+gyJxNWEn04wZwC2wmRapNnxCUZkVwggInMJvtVElA+D4ZzOU5sX4jV+SmQ==", + "cpu": [ + "arm64" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@napi-rs/nice-linux-arm64-musl": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/@napi-rs/nice-linux-arm64-musl/-/nice-linux-arm64-musl-1.1.1.tgz", + "integrity": "sha512-+2Rzdb3nTIYZ0YJF43qf2twhqOCkiSrHx2Pg6DJaCPYhhaxbLcdlV8hCRMHghQ+EtZQWGNcS2xF4KxBhSGeutg==", + "cpu": [ + "arm64" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@napi-rs/nice-linux-ppc64-gnu": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/@napi-rs/nice-linux-ppc64-gnu/-/nice-linux-ppc64-gnu-1.1.1.tgz", + "integrity": "sha512-4FS8oc0GeHpwvv4tKciKkw3Y4jKsL7FRhaOeiPei0X9T4Jd619wHNe4xCLmN2EMgZoeGg+Q7GY7BsvwKpL22Tg==", + "cpu": [ + "ppc64" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@napi-rs/nice-linux-riscv64-gnu": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/@napi-rs/nice-linux-riscv64-gnu/-/nice-linux-riscv64-gnu-1.1.1.tgz", + "integrity": "sha512-HU0nw9uD4FO/oGCCk409tCi5IzIZpH2agE6nN4fqpwVlCn5BOq0MS1dXGjXaG17JaAvrlpV5ZeyZwSon10XOXw==", + "cpu": [ + "riscv64" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@napi-rs/nice-linux-s390x-gnu": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/@napi-rs/nice-linux-s390x-gnu/-/nice-linux-s390x-gnu-1.1.1.tgz", + "integrity": "sha512-2YqKJWWl24EwrX0DzCQgPLKQBxYDdBxOHot1KWEq7aY2uYeX+Uvtv4I8xFVVygJDgf6/92h9N3Y43WPx8+PAgQ==", + "cpu": [ + "s390x" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@napi-rs/nice-linux-x64-gnu": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/@napi-rs/nice-linux-x64-gnu/-/nice-linux-x64-gnu-1.1.1.tgz", + "integrity": "sha512-/gaNz3R92t+dcrfCw/96pDopcmec7oCcAQ3l/M+Zxr82KT4DljD37CpgrnXV+pJC263JkW572pdbP3hP+KjcIg==", + "cpu": [ + "x64" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@napi-rs/nice-linux-x64-musl": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/@napi-rs/nice-linux-x64-musl/-/nice-linux-x64-musl-1.1.1.tgz", + "integrity": "sha512-xScCGnyj/oppsNPMnevsBe3pvNaoK7FGvMjT35riz9YdhB2WtTG47ZlbxtOLpjeO9SqqQ2J2igCmz6IJOD5JYw==", + "cpu": [ + "x64" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@napi-rs/nice-openharmony-arm64": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/@napi-rs/nice-openharmony-arm64/-/nice-openharmony-arm64-1.1.1.tgz", + "integrity": "sha512-6uJPRVwVCLDeoOaNyeiW0gp2kFIM4r7PL2MczdZQHkFi9gVlgm+Vn+V6nTWRcu856mJ2WjYJiumEajfSm7arPQ==", + "cpu": [ + "arm64" + ], + "license": "MIT", + "optional": true, + "os": [ + "openharmony" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@napi-rs/nice-win32-arm64-msvc": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/@napi-rs/nice-win32-arm64-msvc/-/nice-win32-arm64-msvc-1.1.1.tgz", + "integrity": "sha512-uoTb4eAvM5B2aj/z8j+Nv8OttPf2m+HVx3UjA5jcFxASvNhQriyCQF1OB1lHL43ZhW+VwZlgvjmP5qF3+59atA==", + "cpu": [ + "arm64" + ], + "license": "MIT", + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@napi-rs/nice-win32-ia32-msvc": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/@napi-rs/nice-win32-ia32-msvc/-/nice-win32-ia32-msvc-1.1.1.tgz", + "integrity": "sha512-CNQqlQT9MwuCsg1Vd/oKXiuH+TcsSPJmlAFc5frFyX/KkOh0UpBLEj7aoY656d5UKZQMQFP7vJNa1DNUNORvug==", + "cpu": [ + "ia32" + ], + "license": "MIT", + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@napi-rs/nice-win32-x64-msvc": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/@napi-rs/nice-win32-x64-msvc/-/nice-win32-x64-msvc-1.1.1.tgz", + "integrity": "sha512-vB+4G/jBQCAh0jelMTY3+kgFy00Hlx2f2/1zjMoH821IbplbWZOkLiTYXQkygNTzQJTq5cvwBDgn2ppHD+bglQ==", + "cpu": [ + "x64" + ], + "license": "MIT", + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">= 10" + } + }, "node_modules/@napi-rs/wasm-runtime": { "version": "0.2.12", "resolved": "https://registry.npmjs.org/@napi-rs/wasm-runtime/-/wasm-runtime-0.2.12.tgz", @@ -937,6 +1243,7 @@ "resolved": "https://registry.npmjs.org/@orama/core/-/core-0.0.10.tgz", "integrity": "sha512-rZ4AHeHoFTxOXMhM0An2coO3OfR+FpL0ejXc1PPrNsGB4p6VNlky7FAGeuqOvS5gUYB5ywJsmDzCxeflPtgk4w==", "license": "AGPL-3.0", + "peer": true, "dependencies": { "@orama/cuid2": "2.2.3", "dedent": "1.5.3" @@ -947,6 +1254,7 @@ "resolved": "https://registry.npmjs.org/dedent/-/dedent-1.5.3.tgz", "integrity": "sha512-NHQtfOOW68WD8lgypbLA5oT+Bt0xXJhiYvoR6SmmNXZfpzOGXwdKWmcwG8N7PwVVWV3eF/68nmD9BaJSsTBhyQ==", "license": "MIT", + "peer": true, "peerDependencies": { "babel-plugin-macros": "^3.1.0" }, @@ -976,7 +1284,6 @@ "resolved": "https://registry.npmjs.org/@orama/orama/-/orama-3.1.16.tgz", "integrity": "sha512-scSmQBD8eANlMUOglxHrN1JdSW8tDghsPuS83otqealBiIeMukCQMOf/wc0JJjDXomqwNdEQFLXLGHrU6PGxuA==", "license": "Apache-2.0", - "peer": true, "engines": { "node": ">= 20.0.0" } @@ -985,7 +1292,8 @@ "version": "0.0.5", "resolved": "https://registry.npmjs.org/@orama/oramacore-events-parser/-/oramacore-events-parser-0.0.5.tgz", "integrity": "sha512-yAuSwog+HQBAXgZ60TNKEwu04y81/09mpbYBCmz1RCxnr4ObNY2JnPZI7HmALbjAhLJ8t5p+wc2JHRK93ubO4w==", - "license": "AGPL-3.0" + "license": "AGPL-3.0", + "peer": true }, "node_modules/@orama/react-components": { "version": "0.8.1", @@ -1207,7 +1515,6 @@ "resolved": "https://registry.npmjs.org/@oramacloud/client/-/client-2.1.4.tgz", "integrity": "sha512-uNPFs4wq/iOPbggCwTkVNbIr64Vfd7ZS/h+cricXVnzXWocjDTfJ3wLL4lr0qiSu41g8z+eCAGBqJ30RO2O4AA==", "license": "ISC", - "peer": true, "dependencies": { "@orama/cuid2": "^2.2.3", "@orama/orama": "^3.0.0", @@ -3642,7 +3949,6 @@ "resolved": "https://registry.npmjs.org/acorn/-/acorn-8.15.0.tgz", "integrity": "sha512-NZyJarBfL7nWwIq+FDL6Zp/yHEhePMNnnJ0y3qfieCrmNvYct8uvtiV41UvlSe6apAfk0fY1FbWx+NwfmpvtTg==", "license": "MIT", - "peer": true, "bin": { "acorn": "bin/acorn" }, @@ -4183,7 +4489,8 @@ "version": "3.1.3", "resolved": "https://registry.npmjs.org/csstype/-/csstype-3.1.3.tgz", "integrity": "sha512-M1uQkMl8rQK/szD0LNhtqxIPLpimGm8sOBwU7lLnCpSbTyY3yeU1Vc7l4KT5zT4s/yOxHH5O7tIuuLOCnLADRw==", - "license": "MIT" + "license": "MIT", + "peer": true }, "node_modules/debug": { "version": "4.4.3", @@ -4465,7 +4772,6 @@ "resolved": "https://registry.npmjs.org/eslint/-/eslint-9.39.1.tgz", "integrity": "sha512-BhHmn2yNOFA9H9JmmIVKJmd288g9hrVRDkdoIgRCRuSySRUHH7r/DI6aAXW9T1WwUuY3DFgrcaqB+deURBLR5g==", "license": "MIT", - "peer": true, "dependencies": { "@eslint-community/eslint-utils": "^4.8.0", "@eslint-community/regexpp": "^4.12.1", @@ -6311,7 +6617,6 @@ "resolved": "https://registry.npmjs.org/marked/-/marked-13.0.3.tgz", "integrity": "sha512-rqRix3/TWzE9rIoFGIn8JmsVfhiuC8VIQ8IdX5TfzmeBucdY05/0UlzKaw0eVtpcN/OdVFpBk7CjKGo9iHJ/zA==", "license": "MIT", - "peer": true, "bin": { "marked": "bin/marked.js" }, @@ -7562,6 +7867,18 @@ "node": ">=0.10" } }, + "node_modules/piscina": { + "version": "5.1.4", + "resolved": "https://registry.npmjs.org/piscina/-/piscina-5.1.4.tgz", + "integrity": "sha512-7uU4ZnKeQq22t9AsmHGD2w4OYQGonwFnTypDypaWi7Qr2EvQIFVtG8J5D/3bE7W123Wdc9+v4CZDu5hJXVCtBg==", + "license": "MIT", + "engines": { + "node": ">=20.x" + }, + "optionalDependencies": { + "@napi-rs/nice": "^1.0.4" + } + }, "node_modules/postcss": { "version": "8.5.6", "resolved": "https://registry.npmjs.org/postcss/-/postcss-8.5.6.tgz", @@ -7581,7 +7898,6 @@ } ], "license": "MIT", - "peer": true, "dependencies": { "nanoid": "^3.3.11", "picocolors": "^1.1.1", @@ -7631,7 +7947,6 @@ "resolved": "https://registry.npmjs.org/preact/-/preact-10.28.0.tgz", "integrity": "sha512-rytDAoiXr3+t6OIP3WGlDd0ouCUG1iCWzkcY3++Nreuoi17y6T5i/zRhe6uYfoVcxq6YU+sBtJouuRDsq8vvqA==", "license": "MIT", - "peer": true, "funding": { "type": "opencollective", "url": "https://opencollective.com/preact" @@ -8199,7 +8514,8 @@ "version": "0.26.0", "resolved": "https://registry.npmjs.org/scheduler/-/scheduler-0.26.0.tgz", "integrity": "sha512-NlHwttCI/l5gCPR3D1nNXtWABUmBwvZpEQiD4IXSbIDq8BzLIK/7Ir5gTFSGZDUu37K5cMNp0hFtzO38sC7gWA==", - "license": "MIT" + "license": "MIT", + "peer": true }, "node_modules/semver": { "version": "7.7.3", @@ -8800,7 +9116,6 @@ "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.3.tgz", "integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==", "license": "MIT", - "peer": true, "engines": { "node": ">=12" }, @@ -9168,7 +9483,6 @@ "integrity": "sha512-VUyWiTNQD7itdiMuJy+EuLEErLj3uwX/EpHQF8EOf33Dq3Ju6VW1GXm+swk6+1h7a49uv9fKZ+dft9jU7esdLA==", "dev": true, "hasInstallScript": true, - "peer": true, "dependencies": { "napi-postinstall": "^0.2.4" }, @@ -9585,7 +9899,6 @@ "resolved": "https://registry.npmjs.org/zod/-/zod-3.24.3.tgz", "integrity": "sha512-HhY1oqzWCQWuUqvBFnsyrtZRhyPeR7SUGv+C4+MsisMuVfSPx8HpwWqH8tRahSlt6M3PiFAcoeFhZAqIXTxoSg==", "license": "MIT", - "peer": true, "funding": { "url": "https://github.com/sponsors/colinhacks" } diff --git a/package.json b/package.json index 7d9c2970..b92a40f4 100644 --- a/package.json +++ b/package.json @@ -60,6 +60,7 @@ "hastscript": "^9.0.1", "lightningcss": "^1.30.2", "mdast-util-slice-markdown": "^2.0.1", + "piscina": "^5.1.4", "preact": "^10.28.0", "preact-render-to-string": "^6.6.3", "reading-time": "^1.5.0", diff --git a/scripts/vercel-build.sh b/scripts/vercel-build.sh index 862d5340..6ca6cb2c 100755 --- a/scripts/vercel-build.sh +++ b/scripts/vercel-build.sh @@ -5,6 +5,8 @@ node bin/cli.mjs generate \ -t web \ -i "./node/doc/api/*.md" \ -o "./out" \ - --index "./node/doc/api/index.md" + -c "./node/CHANGELOG.md" \ + --index "./node/doc/api/index.md" \ + --log-level debug rm -rf node/ diff --git a/scripts/vercel-prepare.sh b/scripts/vercel-prepare.sh index a4ae5b15..31b29afe 100755 --- a/scripts/vercel-prepare.sh +++ b/scripts/vercel-prepare.sh @@ -5,7 +5,7 @@ git clone --depth 1 --filter=blob:none --sparse https://github.com/nodejs/node.g cd node # Enable sparse checkout and specify the folder -git sparse-checkout set doc/ +git sparse-checkout set lib doc/api . # Move back out cd .. diff --git a/src/__tests__/generators.test.mjs b/src/__tests__/generators.test.mjs new file mode 100644 index 00000000..7464784e --- /dev/null +++ b/src/__tests__/generators.test.mjs @@ -0,0 +1,142 @@ +import { ok, strictEqual } from 'node:assert'; +import { describe, it } from 'node:test'; + +import createGenerator from '../generators.mjs'; + +describe('createGenerator', () => { + // Mock options with minimal required fields + const mockOptions = { + input: '/tmp/test', + output: '/tmp/output', + generators: ['metadata'], + version: { major: 22, minor: 0, patch: 0 }, + releases: [], + index: [], + gitRef: 'https://github.com/nodejs/node/tree/HEAD', + threads: 1, + chunkSize: 20, + typeMap: {}, + }; + + it('should create a generator orchestrator with runGenerators method', () => { + const { runGenerators } = createGenerator(); + + ok(runGenerators); + strictEqual(typeof runGenerators, 'function'); + }); + + it('should return the ast input directly when generators list is empty', async () => { + const { runGenerators } = createGenerator(); + + const results = await runGenerators({ + ...mockOptions, + generators: ['ast'], + }); + + // Returns array of results, first element is the 'ast' result + ok(Array.isArray(results)); + strictEqual(results.length, 1); + ok(results[0]); + }); + + it('should run metadata generator', async () => { + const { runGenerators } = createGenerator(); + + const results = await runGenerators({ + ...mockOptions, + generators: ['metadata'], + }); + + // Returns array with one element - the collected metadata array + ok(Array.isArray(results)); + strictEqual(results.length, 1); + ok(Array.isArray(results[0])); + }); + + it('should handle generator with dependency', async () => { + const { runGenerators } = createGenerator(); + + // legacy-html depends on metadata + const results = await runGenerators({ + ...mockOptions, + generators: ['legacy-html'], + }); + + // Should complete without error - returns array of results + ok(Array.isArray(results)); + strictEqual(results.length, 1); + }); + + it('should skip already scheduled generators', async () => { + const { runGenerators } = createGenerator(); + + // Running with ['metadata', 'metadata'] should skip the second + const results = await runGenerators({ + ...mockOptions, + generators: ['metadata', 'metadata'], + }); + + // Returns array with two elements (same result cached for both) + ok(Array.isArray(results)); + strictEqual(results.length, 2); + }); + + it('should handle multiple generators in sequence', async () => { + const { runGenerators } = createGenerator(); + + // Run metadata - just one generator + const results = await runGenerators({ + ...mockOptions, + generators: ['metadata'], + }); + + // Returns array of results + ok(Array.isArray(results)); + strictEqual(results.length, 1); + }); + + it('should collect async generator results for dependents', async () => { + const { runGenerators } = createGenerator(); + + // legacy-json depends on metadata (async generator) + const results = await runGenerators({ + ...mockOptions, + generators: ['legacy-json'], + }); + + ok(Array.isArray(results)); + strictEqual(results.length, 1); + }); + + it('should use multiple threads when specified', async () => { + const { runGenerators } = createGenerator(); + + const results = await runGenerators({ + ...mockOptions, + threads: 4, + generators: ['metadata'], + }); + + // Returns array of results + ok(Array.isArray(results)); + strictEqual(results.length, 1); + ok(Array.isArray(results[0])); + }); + + it('should pass options to generators', async () => { + const { runGenerators } = createGenerator(); + + const customTypeMap = { TestType: 'https://example.com/TestType' }; + + const results = await runGenerators({ + ...mockOptions, + typeMap: customTypeMap, + generators: ['metadata'], + }); + + // Returns array of results + ok(Array.isArray(results)); + strictEqual(results.length, 1); + ok(Array.isArray(results[0])); + }); +}); diff --git a/src/__tests__/streaming.test.mjs b/src/__tests__/streaming.test.mjs new file mode 100644 index 00000000..6b952f54 --- /dev/null +++ b/src/__tests__/streaming.test.mjs @@ -0,0 +1,170 @@ +import { deepStrictEqual, ok, strictEqual } from 'node:assert'; +import { describe, it } from 'node:test'; + +import { + isAsyncGenerator, + collectAsyncGenerator, + createStreamingCache, +} from '../streaming.mjs'; + +describe('streaming utilities', () => { + describe('isAsyncGenerator', () => { + it('should return true for async generators', () => { + async function* asyncGen() { + yield 1; + } + + const gen = asyncGen(); + + strictEqual(isAsyncGenerator(gen), true); + }); + + it('should return false for regular generators', () => { + function* syncGen() { + yield 1; + } + + const gen = syncGen(); + + strictEqual(isAsyncGenerator(gen), false); + }); + + it('should return false for plain objects', () => { + strictEqual(isAsyncGenerator({}), false); + strictEqual(isAsyncGenerator([]), false); + strictEqual(isAsyncGenerator({ async: true }), false); + }); + + it('should return false for null and undefined', () => { + strictEqual(isAsyncGenerator(null), false); + strictEqual(isAsyncGenerator(undefined), false); + }); + + it('should return false for primitives', () => { + strictEqual(isAsyncGenerator(42), false); + strictEqual(isAsyncGenerator('string'), false); + strictEqual(isAsyncGenerator(true), false); + }); + + it('should return true for objects with Symbol.asyncIterator', () => { + const asyncIterable = { + [Symbol.asyncIterator]() { + return { + next: async () => ({ done: true, value: undefined }), + }; + }, + }; + + strictEqual(isAsyncGenerator(asyncIterable), true); + }); + }); + + describe('collectAsyncGenerator', () => { + it('should collect all chunks into a flat array', async () => { + async function* gen() { + yield [1, 2]; + yield [3, 4]; + yield [5]; + } + + const result = await collectAsyncGenerator(gen()); + + deepStrictEqual(result, [1, 2, 3, 4, 5]); + }); + + it('should return empty array for empty generator', async () => { + async function* gen() { + // empty generator + } + + const result = await collectAsyncGenerator(gen()); + + deepStrictEqual(result, []); + }); + + it('should handle single chunk', async () => { + async function* gen() { + yield [1, 2, 3]; + } + + const result = await collectAsyncGenerator(gen()); + + deepStrictEqual(result, [1, 2, 3]); + }); + + it('should handle empty chunks', async () => { + async function* gen() { + yield []; + yield [1]; + yield []; + yield [2, 3]; + } + + const result = await collectAsyncGenerator(gen()); + + deepStrictEqual(result, [1, 2, 3]); + }); + + it('should handle objects in chunks', async () => { + async function* gen() { + yield [{ a: 1 }, { b: 2 }]; + yield [{ c: 3 }]; + } + + const result = await collectAsyncGenerator(gen()); + + deepStrictEqual(result, [{ a: 1 }, { b: 2 }, { c: 3 }]); + }); + }); + + describe('createStreamingCache', () => { + it('should create a cache with required methods', () => { + const cache = createStreamingCache(); + + ok(cache); + strictEqual(typeof cache.getOrCollect, 'function'); + }); + + it('should return same promise for same key', async () => { + const cache = createStreamingCache(); + + async function* gen() { + yield [1, 2, 3]; + } + + const promise1 = cache.getOrCollect('test', gen()); + + // Create a new generator (which shouldn't be used due to caching) + async function* gen2() { + yield [4, 5, 6]; + } + + const promise2 = cache.getOrCollect('test', gen2()); + + // Both should resolve to the same result (from first generator) + const result1 = await promise1; + const result2 = await promise2; + + deepStrictEqual(result1, [1, 2, 3]); + strictEqual(result1, result2); + }); + + it('should return different results for different keys', async () => { + const cache = createStreamingCache(); + + async function* gen1() { + yield [1, 2]; + } + + async function* gen2() { + yield [3, 4]; + } + + const result1 = await cache.getOrCollect('key1', gen1()); + const result2 = await cache.getOrCollect('key2', gen2()); + + deepStrictEqual(result1, [1, 2]); + deepStrictEqual(result2, [3, 4]); + }); + }); +}); diff --git a/src/constants.mjs b/src/constants.mjs index 30edc199..37b3dc16 100644 --- a/src/constants.mjs +++ b/src/constants.mjs @@ -1,10 +1,10 @@ 'use strict'; // The current running version of Node.js (Environment) -export const DOC_NODE_VERSION = process.version; +export const NODE_VERSION = process.version; // This is the Node.js CHANGELOG to be consumed to generate a list of all major Node.js versions -export const DOC_NODE_CHANGELOG_URL = +export const NODE_CHANGELOG_URL = 'https://raw.githubusercontent.com/nodejs/node/HEAD/CHANGELOG.md'; // The base URL for the Node.js website diff --git a/src/generators.mjs b/src/generators.mjs index 3d985b8f..0a76a29e 100644 --- a/src/generators.mjs +++ b/src/generators.mjs @@ -1,83 +1,133 @@ 'use strict'; import { allGenerators } from './generators/index.mjs'; -import WorkerPool from './threading/index.mjs'; +import logger from './logger/index.mjs'; +import { isAsyncGenerator, createStreamingCache } from './streaming.mjs'; +import createWorkerPool from './threading/index.mjs'; import createParallelWorker from './threading/parallel.mjs'; +const generatorsLogger = logger.child('generators'); + /** - * This method creates a system that allows you to register generators - * and then execute them in a specific order, keeping track of the - * generation process, and handling errors that may occur from the - * execution of generating content. - * - * When the final generator is reached, the system will return the - * final generated content. - * - * Generators can output content that can be consumed by other generators; - * Generators can also write to files. These would usually be considered - * the final generators in the chain. + * Creates a generator orchestration system that manages the execution of + * documentation generators in dependency order, with support for parallel + * processing and streaming results. * - * @typedef {{ ast: GeneratorMetadata}} AstGenerator The AST "generator" is a facade for the AST tree and it isn't really a generator - * @typedef {AvailableGenerators & AstGenerator} AllGenerators A complete set of the available generators, including the AST one - * - * @param {ParserOutput} input The API doc AST tree + * @returns {{ runGenerators: (options: GeneratorOptions) => Promise }} */ -const createGenerator = input => { +const createGenerator = () => { + /** @type {{ [key: string]: Promise | AsyncGenerator }} */ + const cachedGenerators = {}; + + const streamingCache = createStreamingCache(); + + /** @type {import('piscina').Piscina} */ + let pool; + /** - * We store all the registered generators to be processed - * within a Record, so we can access their results at any time whenever needed - * (we store the Promises of the generator outputs) + * Gets the collected input from a dependency generator. * - * @type {{ [K in keyof AllGenerators]: ReturnType }} + * @param {string | undefined} dependsOn - Dependency generator name + * @returns {Promise} */ - const cachedGenerators = { ast: Promise.resolve(input) }; + const getDependencyInput = async dependsOn => { + if (!dependsOn) { + return undefined; + } + + const result = await cachedGenerators[dependsOn]; + + if (isAsyncGenerator(result)) { + return streamingCache.getOrCollect(dependsOn, result); + } + + return result; + }; /** - * Runs the Generator engine with the provided top-level input and the given generator options + * Schedules a generator and its dependencies for execution. * - * @param {GeneratorOptions} options The options for the generator runtime + * @param {string} generatorName - Generator to schedule + * @param {GeneratorOptions} options - Runtime options */ - const runGenerators = async options => { - const { generators, threads } = options; + const scheduleGenerator = (generatorName, options) => { + if (generatorName in cachedGenerators) { + return; + } - // WorkerPool for chunk-level parallelization within generators - const chunkPool = new WorkerPool('./chunk-worker.mjs', threads); - - // Schedule all generators, allowing independent ones to run in parallel. - // Each generator awaits its own dependency internally, so generators - // with the same dependency (e.g. legacy-html and legacy-json both depend - // on metadata) will run concurrently once metadata resolves. - for (const generatorName of generators) { - // Skip if already scheduled - if (generatorName in cachedGenerators) { - continue; - } + const { dependsOn, generate, processChunk } = allGenerators[generatorName]; + + // Schedule dependency first + if (dependsOn && !(dependsOn in cachedGenerators)) { + scheduleGenerator(dependsOn, options); + } + + generatorsLogger.debug(`Scheduling "${generatorName}"`, { + dependsOn: dependsOn || 'none', + streaming: Boolean(processChunk), + }); + + // Schedule the generator + cachedGenerators[generatorName] = (async () => { + const dependencyInput = await getDependencyInput(dependsOn); + + generatorsLogger.debug(`Starting "${generatorName}"`); + + // Create parallel worker for streaming generators + const worker = processChunk + ? createParallelWorker(generatorName, pool, options) + : null; - const { dependsOn, generate } = allGenerators[generatorName]; + const result = await generate(dependencyInput, { ...options, worker }); - // Ensure dependency is scheduled (but don't await its result yet) - if (dependsOn && !(dependsOn in cachedGenerators)) { - await runGenerators({ ...options, generators: [dependsOn] }); + // For streaming generators, "Completed" is logged when collection finishes + // (in streamingCache.getOrCollect), not here when the generator returns + if (!isAsyncGenerator(result)) { + generatorsLogger.debug(`Completed "${generatorName}"`); } - // Create a ParallelWorker for this generator - const worker = createParallelWorker(generatorName, chunkPool, options); + return result; + })(); + }; + + /** + * Runs all requested generators with their dependencies. + * + * @param {GeneratorOptions} options - Runtime options + * @returns {Promise} Results of all requested generators + */ + const runGenerators = async options => { + const { generators, threads } = options; - /** - * Schedule the generator - it awaits its dependency internally - * his allows multiple generators with the same dependency to run in parallel - */ - const scheduledGenerator = async () => { - const input = await cachedGenerators[dependsOn]; + generatorsLogger.debug(`Starting pipeline`, { + generators: generators.join(', '), + threads, + }); - return generate(input, { ...options, worker }); - }; + // Create worker pool + pool = createWorkerPool(threads); - cachedGenerators[generatorName] = scheduledGenerator(); + // Schedule all generators + for (const name of generators) { + scheduleGenerator(name, options); } - // Returns the value of the last generator of the current pipeline - return cachedGenerators[generators[generators.length - 1]]; + // Start all collections in parallel (don't await sequentially) + const resultPromises = generators.map(async name => { + let result = await cachedGenerators[name]; + + if (isAsyncGenerator(result)) { + result = await streamingCache.getOrCollect(name, result); + } + + return result; + }); + + const results = await Promise.all(resultPromises); + + await pool.destroy(); + + return results; }; return { runGenerators }; diff --git a/src/generators/__tests__/index.test.mjs b/src/generators/__tests__/index.test.mjs index abcb5851..1e456b0a 100644 --- a/src/generators/__tests__/index.test.mjs +++ b/src/generators/__tests__/index.test.mjs @@ -5,7 +5,7 @@ import semver from 'semver'; import { allGenerators } from '../index.mjs'; -const validDependencies = [...Object.keys(allGenerators), 'ast']; +const validDependencies = Object.keys(allGenerators); const generatorEntries = Object.entries(allGenerators); describe('All Generators', () => { @@ -34,9 +34,18 @@ describe('All Generators', () => { if (generator.dependsOn) { assert.ok( validDependencies.includes(generator.dependsOn), - `Generator "${key}" depends on "${generator.dependsOn}" which is not a valid generator or 'ast'` + `Generator "${key}" depends on "${generator.dependsOn}" which is not a valid generator` ); } }); }); + + it('should have ast generator as a top-level generator with no dependencies', () => { + assert.ok(allGenerators.ast, 'ast generator should exist'); + assert.equal( + allGenerators.ast.dependsOn, + undefined, + 'ast generator should have no dependencies' + ); + }); }); diff --git a/src/generators/api-links/__tests__/fixtures.test.mjs b/src/generators/api-links/__tests__/fixtures.test.mjs index fc6b204f..d20fdfd5 100644 --- a/src/generators/api-links/__tests__/fixtures.test.mjs +++ b/src/generators/api-links/__tests__/fixtures.test.mjs @@ -1,9 +1,9 @@ import { readdir } from 'node:fs/promises'; import { cpus } from 'node:os'; import { basename, extname, join } from 'node:path'; -import { describe, it } from 'node:test'; +import { after, before, describe, it } from 'node:test'; -import WorkerPool from '../../../threading/index.mjs'; +import createWorkerPool from '../../../threading/index.mjs'; import createParallelWorker from '../../../threading/parallel.mjs'; import astJs from '../../ast-js/index.mjs'; import apiLinks from '../index.mjs'; @@ -16,22 +16,36 @@ const sourceFiles = fixtures .map(fixture => join(FIXTURES_DIRECTORY, fixture)); describe('api links', () => { + const threads = cpus().length; + let pool; + + before(() => { + pool = createWorkerPool(threads); + }); + + after(async () => { + await pool.destroy(); + }); + describe('should work correctly for all fixtures', () => { sourceFiles.forEach(sourceFile => { it(`${basename(sourceFile)}`, async t => { - const pool = new WorkerPool('../chunk-worker.mjs', cpus().length); - const worker = createParallelWorker('ast-js', pool, { - threads: 1, + threads, chunkSize: 10, }); - const astJsResult = await astJs.generate(undefined, { + // Collect results from the async generator + const astJsResults = []; + + for await (const chunk of astJs.generate(undefined, { input: [sourceFile], worker, - }); + })) { + astJsResults.push(...chunk); + } - const actualOutput = await apiLinks.generate(astJsResult, { + const actualOutput = await apiLinks.generate(astJsResults, { gitRef: 'https://github.com/nodejs/node/tree/HEAD', }); diff --git a/src/generators/api-links/index.mjs b/src/generators/api-links/index.mjs index 95689be5..e15784bf 100644 --- a/src/generators/api-links/index.mjs +++ b/src/generators/api-links/index.mjs @@ -72,10 +72,9 @@ export default { }); if (output) { - await writeFile( - join(output, 'apilinks.json'), - JSON.stringify(definitions) - ); + const out = join(output, 'apilinks.json'); + + await writeFile(out, JSON.stringify(definitions)); } return definitions; diff --git a/src/generators/ast-js/index.mjs b/src/generators/ast-js/index.mjs index 7967a5b3..6900adee 100644 --- a/src/generators/ast-js/index.mjs +++ b/src/generators/ast-js/index.mjs @@ -5,6 +5,10 @@ import { globSync } from 'glob'; import createJsLoader from '../../loaders/javascript.mjs'; import createJsParser from '../../parsers/javascript.mjs'; +const { loadFiles } = createJsLoader(); + +const { parseJsSource } = createJsParser(); + /** * This generator parses Javascript sources passed into the generator's input * field. This is separate from the Markdown parsing step since it's not as @@ -14,8 +18,9 @@ import createJsParser from '../../parsers/javascript.mjs'; * so we're only parsing the Javascript sources when we need to. * * @typedef {unknown} Input + * @typedef {Array} Output * - * @type {GeneratorMetadata>} + * @type {GeneratorMetadata} */ export default { name: 'ast-js', @@ -24,41 +29,46 @@ export default { description: 'Parses Javascript source files passed into the input.', - dependsOn: 'metadata', - /** * Process a chunk of JavaScript files in a worker thread. - * @param {unknown} _ - * @param {number[]} itemIndices - * @param {Partial} options + * Parses JS source files into AST representations. + * + * @param {string[]} inputSlice - Sliced input paths for this chunk + * @param {number[]} itemIndices - Indices into the sliced array + * @returns {Promise} Parsed JS AST objects for each file */ - async processChunk(_, itemIndices, { input }) { - const { loadFiles } = createJsLoader(); - const { parseJsSource } = createJsParser(); + async processChunk(inputSlice, itemIndices) { + const filePaths = itemIndices.map(idx => inputSlice[idx]); + + const vfilesPromises = loadFiles(filePaths); const results = []; - for (const idx of itemIndices) { - const [file] = loadFiles(input[idx]); + for (const vfilePromise of vfilesPromises) { + const vfile = await vfilePromise; - const parsedFile = await parseJsSource(file); + const parsed = await parseJsSource(vfile); - results.push(parsedFile); + results.push(parsed); } return results; }, /** - * @param {Input} _ + * Generates a JavaScript AST from the input files. + * + * @param {Input} _ - Unused (files loaded from input paths) * @param {Partial} options + * @returns {AsyncGenerator} */ - async generate(_, { input = [], worker }) { - const sourceFiles = globSync(input).filter( - filePath => extname(filePath) === '.js' - ); + async *generate(_, { input = [], worker }) { + const source = globSync(input).filter(path => extname(path) === '.js'); // Parse the Javascript sources into ASTs in parallel using worker threads - return worker.map(sourceFiles, _, { input: sourceFiles }); + // source is both the items list and the fullInput since we use sliceInput + for await (const chunkResult of worker.stream(source, source)) { + yield chunkResult; + } }, }; diff --git a/src/generators/ast/index.mjs b/src/generators/ast/index.mjs new file mode 100644 index 00000000..ad917575 --- /dev/null +++ b/src/generators/ast/index.mjs @@ -0,0 +1,72 @@ +'use strict'; + +import { extname } from 'node:path'; + +import { globSync } from 'glob'; + +import createLoader from '../../loaders/markdown.mjs'; +import { getRemark } from '../../utils/remark.mjs'; + +const { loadFiles } = createLoader(); + +const remarkProcessor = getRemark(); + +/** + * This generator parses Markdown API doc files into AST trees. + * It parallelizes the parsing across worker threads for better performance. + * + * @typedef {undefined} Input + * @typedef {Array>} Output + * + * @type {GeneratorMetadata} + */ +export default { + name: 'ast', + + version: '1.0.0', + + description: 'Parses Markdown API doc files into AST trees', + + /** + * Process a chunk of markdown files in a worker thread. + * Loads and parses markdown files into AST representations. + * + * @param {string[]} inputSlice - Sliced input paths for this chunk + * @param {number[]} itemIndices - Indices into the sliced array + * @returns {Promise} + */ + async processChunk(inputSlice, itemIndices) { + const filePaths = itemIndices.map(idx => inputSlice[idx]); + + const vfilesPromises = loadFiles(filePaths); + + const results = []; + + for (const vfilePromise of vfilesPromises) { + const vfile = await vfilePromise; + + results.push({ + tree: remarkProcessor.parse(vfile), + file: { stem: vfile.stem, basename: vfile.basename }, + }); + } + + return results; + }, + + /** + * Generates AST trees from markdown input files. + * + * @param {Input} _ - Unused (top-level generator) + * @param {Partial} options + * @returns {AsyncGenerator} + */ + async *generate(_, { input = [], worker }) { + const files = globSync(input).filter(path => extname(path) === '.md'); + + // Parse markdown files in parallel using worker threads + for await (const chunkResult of worker.stream(files, files)) { + yield chunkResult; + } + }, +}; diff --git a/src/generators/index.mjs b/src/generators/index.mjs index cca7767e..09a4c1a0 100644 --- a/src/generators/index.mjs +++ b/src/generators/index.mjs @@ -2,6 +2,7 @@ import addonVerify from './addon-verify/index.mjs'; import apiLinks from './api-links/index.mjs'; +import ast from './ast/index.mjs'; import astJs from './ast-js/index.mjs'; import jsonSimple from './json-simple/index.mjs'; import jsxAst from './jsx-ast/index.mjs'; @@ -32,6 +33,7 @@ export const publicGenerators = { // These ones are special since they don't produce standard output, // and hence, we don't expose them to the CLI. const internalGenerators = { + ast, metadata, 'jsx-ast': jsxAst, 'ast-js': astJs, diff --git a/src/generators/jsx-ast/index.mjs b/src/generators/jsx-ast/index.mjs index 629646ea..2402b571 100644 --- a/src/generators/jsx-ast/index.mjs +++ b/src/generators/jsx-ast/index.mjs @@ -1,47 +1,18 @@ -import { OVERRIDDEN_POSITIONS } from './constants.mjs'; import { buildSideBarProps } from './utils/buildBarProps.mjs'; import buildContent from './utils/buildContent.mjs'; +import { getSortedHeadNodes } from './utils/getSortedHeadNodes.mjs'; import { groupNodesByModule } from '../../utils/generators.mjs'; import { getRemarkRecma } from '../../utils/remark.mjs'; -/** - * Sorts entries by OVERRIDDEN_POSITIONS and then heading name. - * @param {Array} entries - */ -const getSortedHeadNodes = entries => { - /** - * Sorts entries by OVERRIDDEN_POSITIONS and then heading name. - * @param {ApiDocMetadataEntry} a - * @param {ApiDocMetadataEntry} b - * @returns {number} - */ - const headingSortFn = (a, b) => { - const ai = OVERRIDDEN_POSITIONS.indexOf(a.api); - const bi = OVERRIDDEN_POSITIONS.indexOf(b.api); - - if (ai !== -1 && bi !== -1) { - return ai - bi; - } - - if (ai !== -1) { - return -1; - } - - if (bi !== -1) { - return 1; - } - - return a.heading.data.name.localeCompare(b.heading.data.name); - }; - - return entries.filter(node => node.heading.depth === 1).sort(headingSortFn); -}; +const remarkRecma = getRemarkRecma(); /** * Generator for converting MDAST to JSX AST. * * @typedef {Array} Input - * @type {GeneratorMetadata} + * @typedef {Array} Output + * + * @type {GeneratorMetadata} */ export default { name: 'jsx-ast', @@ -54,34 +25,31 @@ export default { /** * Process a chunk of items in a worker thread. - * @param {Input} fullInput - * @param {number[]} itemIndices - * @param {Partial} options + * Transforms metadata entries into JSX AST nodes. + * + * Each item is a SlicedModuleInput containing the head node + * and all entries for that module - no need to recompute grouping. + * + * @param {Array<{head: ApiDocMetadataEntry, entries: Array}>} slicedInput - Pre-sliced module data + * @param {number[]} itemIndices - Indices of items to process + * @param {{ docPages: Array<[string, string]>, releases: Array, version: import('semver').SemVer }} options - Serializable options + * @returns {Promise} JSX AST programs for each module */ - async processChunk(fullInput, itemIndices, { index, releases, version }) { - const remarkRecma = getRemarkRecma(); - const groupedModules = groupNodesByModule(fullInput); - const headNodes = getSortedHeadNodes(fullInput); - - const docPages = index - ? index.map(({ section, api }) => [section, `${api}.html`]) - : headNodes.map(node => [node.heading.data.name, `${node.api}.html`]); - + async processChunk( + slicedInput, + itemIndices, + { docPages, releases, version } + ) { const results = []; for (const idx of itemIndices) { - const entry = headNodes[idx]; + const { head, entries } = slicedInput[idx]; - const sideBarProps = buildSideBarProps( - entry, - releases, - version, - docPages - ); + const sideBarProps = buildSideBarProps(head, releases, version, docPages); const content = await buildContent( - groupedModules.get(entry.api), - entry, + entries, + head, sideBarProps, remarkRecma ); @@ -95,13 +63,29 @@ export default { /** * Generates a JSX AST * - * @param {Input} entries + * @param {Input} input * @param {Partial} options - * @returns {Promise>} Array of generated content */ - async generate(entries, { index, releases, version, worker }) { - const headNodes = entries.filter(node => node.heading.depth === 1); + async *generate(input, { index, releases, version, worker }) { + const groupedModules = groupNodesByModule(input); + const headNodes = getSortedHeadNodes(input); - return worker.map(headNodes, entries, { index, releases, version }); + // Pre-compute docPages once in main thread + const docPages = index + ? index.map(({ section, api }) => [section, `${api}.html`]) + : headNodes.map(node => [node.heading.data.name, `${node.api}.html`]); + + // Create sliced input: each item contains head + its module's entries + // This avoids sending all 4700+ entries to every worker + const entries = headNodes.map(head => ({ + head, + entries: groupedModules.get(head.api), + })); + + const deps = { docPages, releases, version }; + + for await (const chunkResult of worker.stream(entries, entries, deps)) { + yield chunkResult; + } }, }; diff --git a/src/generators/jsx-ast/utils/buildContent.mjs b/src/generators/jsx-ast/utils/buildContent.mjs index 17b48bf9..5d92801c 100644 --- a/src/generators/jsx-ast/utils/buildContent.mjs +++ b/src/generators/jsx-ast/utils/buildContent.mjs @@ -295,10 +295,7 @@ const buildContent = async (metadataEntries, head, sideBarProps, remark) => { const ast = await remark.run(root); // The final MDX content is the expression in the Program's first body node - return { - ...ast.body[0].expression, - data: head, - }; + return { ...ast.body[0].expression, data: head }; }; export default buildContent; diff --git a/src/generators/jsx-ast/utils/getSortedHeadNodes.mjs b/src/generators/jsx-ast/utils/getSortedHeadNodes.mjs new file mode 100644 index 00000000..d8c015b0 --- /dev/null +++ b/src/generators/jsx-ast/utils/getSortedHeadNodes.mjs @@ -0,0 +1,36 @@ +'use strict'; + +import { OVERRIDDEN_POSITIONS } from '../constants.mjs'; + +/** + * Sorts entries by OVERRIDDEN_POSITIONS and then heading name. + * @param {ApiDocMetadataEntry} a + * @param {ApiDocMetadataEntry} b + * @returns {number} + */ +const headingSortFn = (a, b) => { + const ai = OVERRIDDEN_POSITIONS.indexOf(a.api); + const bi = OVERRIDDEN_POSITIONS.indexOf(b.api); + + if (ai !== -1 && bi !== -1) { + return ai - bi; + } + + if (ai !== -1) { + return -1; + } + + if (bi !== -1) { + return 1; + } + + return a.heading.data.name.localeCompare(b.heading.data.name); +}; + +/** + * Filters and sorts entries by OVERRIDDEN_POSITIONS and then heading name. + * @param {Array} entries + * @returns {Array} + */ +export const getSortedHeadNodes = entries => + entries.filter(node => node.heading.depth === 1).sort(headingSortFn); diff --git a/src/generators/legacy-html-all/index.mjs b/src/generators/legacy-html-all/index.mjs index 15d448a1..3166080b 100644 --- a/src/generators/legacy-html-all/index.mjs +++ b/src/generators/legacy-html-all/index.mjs @@ -6,27 +6,17 @@ import { join, resolve } from 'node:path'; import HTMLMinifier from '@minify-html/node'; import { getRemarkRehype } from '../../utils/remark.mjs'; -import dropdowns from '../legacy-html/utils/buildDropdowns.mjs'; +import { replaceTemplateValues } from '../legacy-html/utils/replaceTemplateValues.mjs'; import tableOfContents from '../legacy-html/utils/tableOfContents.mjs'; /** - * @typedef {{ - * api: string; - * added: string; - * section: string; - * version: string; - * toc: string; - * nav: string; - * content: string; - * }} TemplateValues - * * This generator generates the legacy HTML pages of the legacy API docs * for retro-compatibility and while we are implementing the new 'react' and 'html' generators. * * This generator is a top-level generator, and it takes the raw AST tree of the API doc files * and generates the HTML files to the specified output directory from the configuration settings * - * @typedef {Array} Input + * @typedef {Array} Input * * @type {GeneratorMetadata} */ @@ -44,10 +34,9 @@ export default { * Generates the `all.html` file from the `legacy-html` generator * @param {Input} input * @param {Partial} options + * @returns {Promise} */ async generate(input, { version, releases, output }) { - const inputWithoutIndex = input.filter(entry => entry.api !== 'index'); - // Gets a Remark Processor that parses Markdown to minified HTML const remarkWithRehype = getRemarkRehype(); @@ -58,17 +47,18 @@ export default { // Reads the API template.html file to be used as a base for the HTML files const apiTemplate = await readFile(join(baseDir, 'template.html'), 'utf-8'); + // Filter out index entries and extract needed properties + const entries = input.filter(entry => entry.api !== 'index'); + // Aggregates all individual Table of Contents into one giant string - const aggregatedToC = inputWithoutIndex.map(entry => entry.toc).join('\n'); + const aggregatedToC = entries.map(entry => entry.toc).join('\n'); // Aggregates all individual content into one giant string - const aggregatedContent = inputWithoutIndex - .map(entry => entry.content) - .join('\n'); + const aggregatedContent = entries.map(entry => entry.content).join('\n'); // Creates a "mimic" of an `ApiDocMetadataEntry` which fulfils the requirements // for generating the `tableOfContents` with the `tableOfContents.parseNavigationNode` parser - const sideNavigationFromValues = inputWithoutIndex.map(entry => ({ + const sideNavigationFromValues = entries.map(entry => ({ api: entry.api, heading: { data: { depth: 1, name: entry.section } }, })); @@ -81,21 +71,25 @@ export default { }) ); - const generatedAllTemplate = apiTemplate - .replace('__ID__', 'all') - .replace(/__FILENAME__/g, 'all') - .replace('__SECTION__', 'All') - .replace(/__VERSION__/g, `v${version.version}`) - .replace(/__TOC__/g, tableOfContents.wrapToC(aggregatedToC)) - .replace(/__GTOC__/g, parsedSideNav) - .replace('__CONTENT__', aggregatedContent) - .replace(/__TOC_PICKER__/g, dropdowns.buildToC(aggregatedToC)) - .replace(/__GTOC_PICKER__/g, '') - .replace('__ALTDOCS__', dropdowns.buildVersions('all', '', releases)) - .replace('__EDIT_ON_GITHUB__', ''); + const templateValues = { + api: 'all', + added: '', + section: 'All', + version: `v${version.version}`, + toc: aggregatedToC, + nav: String(parsedSideNav), + content: aggregatedContent, + }; + + const result = replaceTemplateValues( + apiTemplate, + templateValues, + releases, + { skipGitHub: true, skipGtocPicker: true } + ); // We minify the html result to reduce the file size and keep it "clean" - const minified = HTMLMinifier.minify(Buffer.from(generatedAllTemplate), {}); + const minified = HTMLMinifier.minify(Buffer.from(result), {}); if (output) { await writeFile(join(output, 'all.html'), minified); diff --git a/src/generators/legacy-html/index.mjs b/src/generators/legacy-html/index.mjs index 525c7978..08b9674a 100644 --- a/src/generators/legacy-html/index.mjs +++ b/src/generators/legacy-html/index.mjs @@ -6,7 +6,7 @@ import { join } from 'node:path'; import HTMLMinifier from '@minify-html/node'; import buildContent from './utils/buildContent.mjs'; -import dropdowns from './utils/buildDropdowns.mjs'; +import { replaceTemplateValues } from './utils/replaceTemplateValues.mjs'; import { safeCopy } from './utils/safeCopy.mjs'; import tableOfContents from './utils/tableOfContents.mjs'; import { groupNodesByModule } from '../../utils/generators.mjs'; @@ -19,16 +19,9 @@ import { getRemarkRehypeWithShiki } from '../../utils/remark.mjs'; */ const getHeading = name => ({ data: { depth: 1, name } }); +const remarkRehypeProcessor = getRemarkRehypeWithShiki(); + /** - * @typedef {{ - * api: string; - * added: string; - * section: string; - * version: string; - * toc: string; - * nav: string; - * content: string; - * }} TemplateValues * * This generator generates the legacy HTML pages of the legacy API docs * for retro-compatibility and while we are implementing the new 'react' and 'html' generators. @@ -37,8 +30,9 @@ const getHeading = name => ({ data: { depth: 1, name } }); * and generates the HTML files to the specified output directory from the configuration settings * * @typedef {Array} Input + * @typedef {Array} Output * - * @type {GeneratorMetadata>} + * @type {GeneratorMetadata} */ export default { name: 'legacy-html', @@ -52,49 +46,21 @@ export default { /** * Process a chunk of items in a worker thread. - * @param {Input} fullInput - * @param {number[]} itemIndices - * @param {Partial} options + * Builds HTML template objects - FS operations happen in generate(). + * + * Each item is pre-grouped {head, nodes, headNodes} - no need to + * recompute groupNodesByModule for every chunk. + * + * @param {Array<{ head: ApiDocMetadataEntry, nodes: Array, headNodes: Array }> } slicedInput - Pre-sliced module data + * @param {number[]} itemIndices - Indices into the sliced array + * @param {{ version: SemVer, parsedSideNav: string }} deps - Dependencies passed from generate() + * @returns {Promise} Template objects for each processed module */ - async processChunk( - fullInput, - itemIndices, - { releases, version, output, apiTemplate, parsedSideNav } - ) { - const remarkRehypeProcessor = getRemarkRehypeWithShiki(); - const groupedModules = groupNodesByModule(fullInput); - - const headNodes = fullInput - .filter(node => node.heading.depth === 1) - .sort((a, b) => a.heading.data.name.localeCompare(b.heading.data.name)); - - /** - * Replaces the template values in the API template with the given values. - * @param {TemplateValues} values - The values to replace the template values with - * @returns {string} The replaced template values - */ - const replaceTemplateValues = values => { - const { api, added, section, version, toc, nav, content } = values; - - return apiTemplate - .replace('__ID__', api) - .replace(/__FILENAME__/g, api) - .replace('__SECTION__', section) - .replace(/__VERSION__/g, version) - .replace(/__TOC__/g, tableOfContents.wrapToC(toc)) - .replace(/__GTOC__/g, nav) - .replace('__CONTENT__', content) - .replace(/__TOC_PICKER__/g, dropdowns.buildToC(toc)) - .replace(/__GTOC_PICKER__/g, dropdowns.buildNavigation(nav)) - .replace('__ALTDOCS__', dropdowns.buildVersions(api, added, releases)) - .replace('__EDIT_ON_GITHUB__', dropdowns.buildGitHub(api)); - }; - + async processChunk(slicedInput, itemIndices, { version, parsedSideNav }) { const results = []; for (const idx of itemIndices) { - const head = headNodes[idx]; - const nodes = groupedModules.get(head.api); + const { head, nodes, headNodes } = slicedInput[idx]; const activeSideNav = String(parsedSideNav).replace( `class="nav-${head.api}`, @@ -116,7 +82,7 @@ export default { const apiAsHeading = head.api.charAt(0).toUpperCase() + head.api.slice(1); - const generatedTemplate = { + const template = { api: head.api, added: head.introduced_in ?? '', section: head.heading.data.name || apiAsHeading, @@ -126,15 +92,7 @@ export default { content: parsedContent, }; - if (output) { - // We minify the html result to reduce the file size and keep it "clean" - const result = replaceTemplateValues(generatedTemplate); - const minified = HTMLMinifier.minify(Buffer.from(result), {}); - - await writeFile(join(output, `${head.api}.html`), minified); - } - - results.push(generatedTemplate); + results.push(template); } return results; @@ -144,14 +102,15 @@ export default { * Generates the legacy version of the API docs in HTML * @param {Input} input * @param {Partial} options + * @returns {AsyncGenerator} */ - async generate(input, { index, releases, version, output, worker }) { - const remarkRehypeProcessor = getRemarkRehypeWithShiki(); - + async *generate(input, { index, releases, version, output, worker }) { const baseDir = import.meta.dirname; const apiTemplate = await readFile(join(baseDir, 'template.html'), 'utf-8'); + const groupedModules = groupNodesByModule(input); + const headNodes = input .filter(node => node.heading.depth === 1) .sort((a, b) => a.heading.data.name.localeCompare(b.heading.data.name)); @@ -167,15 +126,6 @@ export default { }) ); - const generatedValues = await worker.map(headNodes, input, { - index, - releases, - version, - output, - apiTemplate, - parsedSideNav: String(parsedSideNav), - }); - if (output) { // Define the source folder for API docs assets const srcAssets = join(baseDir, 'assets'); @@ -190,6 +140,30 @@ export default { await safeCopy(srcAssets, assetsFolder); } - return generatedValues; + // Create sliced input: each item contains head + its module's entries + headNodes reference + // This avoids sending all ~4900 entries to every worker and recomputing groupings + const entries = headNodes.map(head => ({ + head, + nodes: groupedModules.get(head.api), + headNodes, + })); + + const deps = { version, parsedSideNav: String(parsedSideNav) }; + + // Stream chunks as they complete - HTML files are written immediately + for await (const chunkResult of worker.stream(entries, entries, deps)) { + // Write files for this chunk in the generate method (main thread) + if (output) { + for (const template of chunkResult) { + const result = replaceTemplateValues(apiTemplate, template, releases); + + const minified = HTMLMinifier.minify(Buffer.from(result), {}); + + await writeFile(join(output, `${template.api}.html`), minified); + } + } + + yield chunkResult; + } }, }; diff --git a/src/generators/legacy-html/types.d.ts b/src/generators/legacy-html/types.d.ts new file mode 100644 index 00000000..741a0e63 --- /dev/null +++ b/src/generators/legacy-html/types.d.ts @@ -0,0 +1,9 @@ +export interface TemplateValues { + api: string; + added: string; + section: string; + version: string; + toc: string; + nav: string; + content: string; +} diff --git a/src/generators/legacy-html/utils/buildDropdowns.mjs b/src/generators/legacy-html/utils/buildDropdowns.mjs index c612b7d6..a33b7a72 100644 --- a/src/generators/legacy-html/utils/buildDropdowns.mjs +++ b/src/generators/legacy-html/utils/buildDropdowns.mjs @@ -15,7 +15,7 @@ import { * * @param {string} tableOfContents The stringified ToC */ -const buildToC = tableOfContents => { +export const buildToC = tableOfContents => { if (tableOfContents.length) { return ( `
  • ` + @@ -36,7 +36,7 @@ const buildToC = tableOfContents => { * * @param {string} navigationContents The stringified Navigation */ -const buildNavigation = navigationContents => +export const buildNavigation = navigationContents => `
  • ` + `Index` + `
    • Index` + @@ -52,7 +52,7 @@ const buildNavigation = navigationContents => * @param {string} added The version the API was added * @param {Array} versions All available Node.js releases */ -const buildVersions = (api, added, versions) => { +export const buildVersions = (api, added, versions) => { const compatibleVersions = getCompatibleVersions(added, versions); // Parses the SemVer version into something we use for URLs and to display the Node.js version @@ -80,14 +80,7 @@ const buildVersions = (api, added, versions) => { * * @param {string} api The current API node name */ -const buildGitHub = api => +export const buildGitHub = api => `
    • ` + `` + `Edit on GitHub
    • `; - -export default { - buildToC, - buildNavigation, - buildVersions, - buildGitHub, -}; diff --git a/src/generators/legacy-html/utils/replaceTemplateValues.mjs b/src/generators/legacy-html/utils/replaceTemplateValues.mjs new file mode 100644 index 00000000..ae907246 --- /dev/null +++ b/src/generators/legacy-html/utils/replaceTemplateValues.mjs @@ -0,0 +1,37 @@ +'use strict'; + +import { + buildToC, + buildNavigation, + buildVersions, + buildGitHub, +} from './buildDropdowns.mjs'; +import tableOfContents from './tableOfContents.mjs'; + +/** + * Replaces the template values in the API template with the given values. + * @param {string} apiTemplate - The HTML template string + * @param {import('../types').TemplateValues} values - The values to replace the template values with + * @param {Array} releases - The releases array for version dropdown + * @param {{ skipGitHub?: boolean; skipGtocPicker?: boolean }} [options] - Optional settings + * @returns {string} The replaced template values + */ +export const replaceTemplateValues = ( + apiTemplate, + { api, added, section, version, toc, nav, content }, + releases, + { skipGitHub = false, skipGtocPicker = false } = {} +) => { + return apiTemplate + .replace('__ID__', api) + .replace(/__FILENAME__/g, api) + .replace('__SECTION__', section) + .replace(/__VERSION__/g, version) + .replace(/__TOC__/g, tableOfContents.wrapToC(toc)) + .replace(/__GTOC__/g, nav) + .replace('__CONTENT__', content) + .replace(/__TOC_PICKER__/g, buildToC(toc)) + .replace(/__GTOC_PICKER__/g, skipGtocPicker ? '' : buildNavigation(nav)) + .replace('__ALTDOCS__', buildVersions(api, added, releases)) + .replace('__EDIT_ON_GITHUB__', skipGitHub ? '' : buildGitHub(api)); +}; diff --git a/src/generators/legacy-json-all/index.mjs b/src/generators/legacy-json-all/index.mjs index 5fb8c061..c10544aa 100644 --- a/src/generators/legacy-json-all/index.mjs +++ b/src/generators/legacy-json-all/index.mjs @@ -8,8 +8,9 @@ import { join } from 'node:path'; * JSON file (`all.json`). * * @typedef {Array} Input + * @typedef {import('./types.d.ts').Output} Output * - * @type {GeneratorMetadata} + * @type {GeneratorMetadata} */ export default { name: 'legacy-json-all', @@ -26,6 +27,7 @@ export default { * * @param {Input} input * @param {Partial} options + * @returns {Promise} */ async generate(input, { output }) { /** @@ -42,17 +44,14 @@ export default { methods: [], }; - const propertiesToCopy = [ - 'miscs', - 'modules', - 'classes', - 'globals', - 'methods', - ]; + /** + * The properties to copy from each section in the input + */ + const propertiesToCopy = Object.keys(generatedValue); - input.forEach(section => { - // Copy the relevant properties from each section into our output - propertiesToCopy.forEach(property => { + // Aggregate all sections into the output + for (const section of input) { + for (const property of propertiesToCopy) { const items = section[property]; if (Array.isArray(items)) { @@ -62,8 +61,8 @@ export default { generatedValue[property].push(...enrichedItems); } - }); - }); + } + } if (output) { await writeFile(join(output, 'all.json'), JSON.stringify(generatedValue)); diff --git a/src/generators/legacy-json/index.mjs b/src/generators/legacy-json/index.mjs index 9d468760..b0ae17ea 100644 --- a/src/generators/legacy-json/index.mjs +++ b/src/generators/legacy-json/index.mjs @@ -6,6 +6,8 @@ import { join } from 'node:path'; import { createSectionBuilder } from './utils/buildSection.mjs'; import { groupNodesByModule } from '../../utils/generators.mjs'; +const buildSection = createSectionBuilder(); + /** * This generator is responsible for generating the legacy JSON files for the * legacy API docs for retro-compatibility. It is to be replaced while we work @@ -16,8 +18,9 @@ import { groupNodesByModule } from '../../utils/generators.mjs'; * config. * * @typedef {Array} Input + * @typedef {Array} Output * - * @type {GeneratorMetadata} + * @type {GeneratorMetadata} */ export default { name: 'legacy-json', @@ -30,31 +33,22 @@ export default { /** * Process a chunk of items in a worker thread. - * @param {Input} fullInput - * @param {number[]} itemIndices - * @param {Partial} options + * Builds JSON sections - FS operations happen in generate(). + * + * Each item is pre-grouped {head, nodes} - no need to + * recompute groupNodesByModule for every chunk. + * + * @param {Array<{ head: ApiDocMetadataEntry, nodes: Array }>} slicedInput - Pre-sliced module data + * @param {number[]} itemIndices - Indices into the sliced array + * @returns {Promise} JSON sections for each processed module */ - async processChunk(fullInput, itemIndices, { output }) { - const buildSection = createSectionBuilder(); - const groupedModules = groupNodesByModule(fullInput); - - const headNodes = fullInput.filter(node => node.heading.depth === 1); - + async processChunk(slicedInput, itemIndices) { const results = []; for (const idx of itemIndices) { - const head = headNodes[idx]; - const nodes = groupedModules.get(head.api); - const section = buildSection(head, nodes); - - if (output) { - await writeFile( - join(output, `${head.api}.json`), - JSON.stringify(section) - ); - } + const { head, nodes } = slicedInput[idx]; - results.push(section); + results.push(buildSection(head, nodes)); } return results; @@ -65,10 +59,30 @@ export default { * * @param {Input} input * @param {Partial} options + * @returns {AsyncGenerator} */ - async generate(input, { output, worker }) { + async *generate(input, { output, worker }) { + const groupedModules = groupNodesByModule(input); + const headNodes = input.filter(node => node.heading.depth === 1); - return worker.map(headNodes, input, { output }); + // Create sliced input: each item contains head + its module's entries + // This avoids sending all 4900+ entries to every worker + const entries = headNodes.map(head => ({ + head, + nodes: groupedModules.get(head.api), + })); + + for await (const chunkResult of worker.stream(entries, entries)) { + if (output) { + for (const section of chunkResult) { + const out = join(output, `${section.api}.json`); + + await writeFile(out, JSON.stringify(section)); + } + } + + yield chunkResult; + } }, }; diff --git a/src/generators/legacy-json/types.d.ts b/src/generators/legacy-json/types.d.ts index 3174fc7a..9b6f1d47 100644 --- a/src/generators/legacy-json/types.d.ts +++ b/src/generators/legacy-json/types.d.ts @@ -45,6 +45,11 @@ export interface Meta { * Base interface for sections in the API documentation, representing common properties. */ export interface SectionBase { + /** + * The inferred API file or module name (e.g., 'fs', 'http', 'path'). + */ + api: string; + /** * The type of section (e.g., 'module', 'method', 'property'). */ diff --git a/src/generators/legacy-json/utils/buildSection.mjs b/src/generators/legacy-json/utils/buildSection.mjs index 247a7a5f..185bfba0 100644 --- a/src/generators/legacy-json/utils/buildSection.mjs +++ b/src/generators/legacy-json/utils/buildSection.mjs @@ -187,7 +187,11 @@ export const createSectionBuilder = () => { * @returns {import('../types.d.ts').ModuleSection} The constructed module section. */ return (head, entries) => { - const rootModule = { type: 'module', source: head.api_doc_source }; + const rootModule = { + type: 'module', + api: head.api, + source: head.api_doc_source, + }; buildHierarchy(entries).forEach(entry => handleEntry(entry, rootModule)); diff --git a/src/generators/llms-txt/index.mjs b/src/generators/llms-txt/index.mjs index b80c2348..5806ca24 100644 --- a/src/generators/llms-txt/index.mjs +++ b/src/generators/llms-txt/index.mjs @@ -24,18 +24,17 @@ export default { /** * Generates a llms.txt file * - * @param {Input} entries + * @param {Input} input * @param {Partial} options * @returns {Promise} */ - async generate(entries, { output }) { + async generate(input, { output }) { const template = await readFile( join(import.meta.dirname, 'template.txt'), 'utf-8' ); - const apiDocsLinks = entries - // Filter non top-level headings + const apiDocsLinks = input .filter(entry => entry.heading.depth === 1) .map(entry => `- ${buildApiDocLink(entry)}`) .join('\n'); diff --git a/src/generators/man-page/index.mjs b/src/generators/man-page/index.mjs index 229e1126..30a13fda 100644 --- a/src/generators/man-page/index.mjs +++ b/src/generators/man-page/index.mjs @@ -32,12 +32,14 @@ export default { /** * Generates the Node.js man-page + * * @param {Input} input * @param {Partial} options */ async generate(input, options) { // Filter to only 'cli'. const components = input.filter(({ api }) => api === 'cli'); + if (!components.length) { throw new Error('Could not find any `cli` documentation.'); } @@ -46,9 +48,11 @@ export default { const optionsStart = components.findIndex( ({ slug }) => slug === DOC_SLUG_OPTIONS ); + const environmentStart = components.findIndex( ({ slug }) => slug === DOC_SLUG_ENVIRONMENT ); + // The first header that is <3 in depth after environmentStart const environmentEnd = components.findIndex( ({ heading }, index) => heading.depth < 3 && index > environmentStart diff --git a/src/generators/metadata/index.mjs b/src/generators/metadata/index.mjs index 750dd82c..a09b10c3 100644 --- a/src/generators/metadata/index.mjs +++ b/src/generators/metadata/index.mjs @@ -6,8 +6,9 @@ import { parseApiDoc } from './utils/parse.mjs'; * This generator generates a flattened list of metadata entries from a API doc * * @typedef {Array>} Input + * @typedef {Array} Output * - * @type {GeneratorMetadata>} + * @type {GeneratorMetadata} */ export default { name: 'metadata', @@ -22,9 +23,10 @@ export default { * Process a chunk of API doc files in a worker thread. * Called by chunk-worker.mjs for parallel processing. * - * @param {Input} fullInput - Full input array - * @param {number[]} itemIndices - Indices of items to process - * @param {Partial} options + * @param {Input} fullInput - Full input array (parsed API doc files) + * @param {number[]} itemIndices - Indices of files to process + * @param {Partial} deps - Dependencies passed from generate() + * @returns {Promise} Metadata entries for processed files */ async processChunk(fullInput, itemIndices, { typeMap }) { const results = []; @@ -38,12 +40,16 @@ export default { /** * @param {Input} inputs - * @param {GeneratorOptions} options - * @returns {Promise>} + * @param {Partial} options + * @returns {AsyncGenerator} */ - async generate(inputs, { typeMap, worker }) { - const results = await worker.map(inputs, inputs, { typeMap }); + async *generate(inputs, { typeMap, worker }) { + const deps = { typeMap }; - return results.flat(); + // Stream chunks as they complete - allows dependent generators + // to start collecting/preparing while we're still processing + for await (const chunkResult of worker.stream(inputs, inputs, deps)) { + yield chunkResult.flat(); + } }, }; diff --git a/src/generators/metadata/utils/parse.mjs b/src/generators/metadata/utils/parse.mjs index 68436303..bb3efeb4 100644 --- a/src/generators/metadata/utils/parse.mjs +++ b/src/generators/metadata/utils/parse.mjs @@ -12,6 +12,10 @@ import createQueries from '../../../utils/queries/index.mjs'; import { getRemark } from '../../../utils/remark.mjs'; import { IGNORE_STABILITY_STEMS } from '../constants.mjs'; +// Creates an instance of the Remark processor with GFM support +// which is used for stringifying the AST tree back to Markdown +const remarkProcessor = getRemark(); + /** * This generator generates a flattened list of metadata entries from a API doc * @@ -42,10 +46,6 @@ export const parseApiDoc = ({ file, tree }, typeMap) => { addStabilityMetadata, } = createQueries(typeMap); - // Creates an instance of the Remark processor with GFM support - // which is used for stringifying the AST tree back to Markdown - const remarkProcessor = getRemark(); - // Creates a new Slugger instance for the current API doc file const nodeSlugger = createNodeSlugger(); diff --git a/src/generators/orama-db/index.mjs b/src/generators/orama-db/index.mjs index 6d6b047a..7d59f678 100644 --- a/src/generators/orama-db/index.mjs +++ b/src/generators/orama-db/index.mjs @@ -44,7 +44,9 @@ export function buildHierarchicalTitle(headings, currentIndex) { */ export default { name: 'orama-db', + version: '1.0.0', + description: 'Generates the Orama database for the API docs.', dependsOn: 'metadata', diff --git a/src/generators/types.d.ts b/src/generators/types.d.ts index 201d8e39..3ef3ee93 100644 --- a/src/generators/types.d.ts +++ b/src/generators/types.d.ts @@ -1,27 +1,34 @@ +import type { SemVer } from 'semver'; import type { ApiDocReleaseEntry } from '../types'; -import type { publicGenerators } from './index.mjs'; +import type { publicGenerators, allGenerators } from './index.mjs'; declare global { - // All available generators as an inferable type, to allow Generator interfaces - // to be type complete and runtime friendly within `runGenerators` + // Public generators exposed to the CLI export type AvailableGenerators = typeof publicGenerators; - // ParallelWorker interface for item-level parallelization using real worker threads + // All generators including internal ones (metadata, jsx-ast, ast-js) + export type AllGenerators = typeof allGenerators; + + /** + * ParallelWorker interface for distributing work across Node.js worker threads. + * Streams results as chunks complete, enabling pipeline parallelism. + */ export interface ParallelWorker { /** - * Process items in parallel using real worker threads. - * Items are split into chunks, each chunk processed by a separate worker. + * Processes items in parallel across worker threads and yields results + * as each chunk completes. Enables downstream processing to begin + * while upstream chunks are still being processed. * - * @param items - Items to process (used to determine indices) + * @param items - Items to process (determines chunk distribution) * @param fullInput - Full input data for context rebuilding in workers * @param opts - Additional options to pass to workers - * @returns Results in same order as input items + * @yields Each chunk's results as they complete */ - map( + stream( items: T[], - fullInput: unknown, + fullInput: T[], opts?: Record - ): Promise; + ): AsyncGenerator; } // This is the runtime config passed to the API doc generators @@ -70,9 +77,20 @@ declare global { worker: ParallelWorker; } + export type ParallelGeneratorOptions = Partial< + Omit + >; + + export interface ParallelTaskOptions { + generatorName: keyof AllGenerators; + input: unknown[]; + itemIndices: number[]; + options: ParallelGeneratorOptions & Record; + } + export interface GeneratorMetadata { - // The name of the Generator. Must match the Key in the AvailableGenerators - name: keyof AvailableGenerators; + // The name of the Generator. Must match the Key in AllGenerators + name: keyof AllGenerators; version: string; @@ -95,14 +113,12 @@ declare global { * If you pass `createGenerator` with ['react', 'html'], the 'react' generator will be executed first, * as it is a top level generator and then the 'html' generator would be executed after the 'react' generator. * - * The 'ast' generator is the top-level parser, and if 'ast' is passed to `dependsOn`, then the generator - * will be marked as a top-level generator. + * The 'ast' generator is the top-level parser for markdown files. It has no dependencies. * * The `ast-js` generator is the top-level parser for JavaScript files. It - * passes the ASTs for any JavaScript files given in the input. Like `ast`, - * any generator depending on it is marked as a top-level generator. + * passes the ASTs for any JavaScript files given in the input. */ - dependsOn: keyof AvailableGenerators | 'ast' | 'ast-js'; + dependsOn: keyof AllGenerators | undefined; /** * Generators are abstract and the different generators have different sort of inputs and outputs. @@ -122,13 +138,17 @@ declare global { * Generators that implement this method can have their work distributed * across multiple worker threads for true parallel processing. * - * @param fullInput - Full input data (for rebuilding context in workers) - * @param itemIndices - Array of indices of items to process + * Input is automatically sliced to only include items at the specified indices, + * reducing serialization overhead. The itemIndices are remapped to 0-based + * indices into the sliced array. + * + * @param slicedInput - Sliced input containing only items for this chunk + * @param itemIndices - Array of 0-based indices into slicedInput * @param options - Generator options (without worker, which isn't serializable) * @returns Array of results for the processed items */ processChunk?: ( - fullInput: I, + slicedInput: I, itemIndices: number[], options: Partial> ) => Promise; diff --git a/src/generators/web/index.mjs b/src/generators/web/index.mjs index 71f2f015..eb3c654a 100644 --- a/src/generators/web/index.mjs +++ b/src/generators/web/index.mjs @@ -13,25 +13,31 @@ import { processJSXEntries } from './utils/processing.mjs'; * - Client-side JavaScript with code splitting * - Bundled CSS styles * - * @type {GeneratorMetadata} + * Note: This generator does NOT support streaming/chunked processing because + * processJSXEntries needs all entries together to generate code-split bundles. + * + * @typedef {Array} Input + * @typedef {Array<{ html: string, css: string }>} Output + * + * @type {GeneratorMetadata} */ export default { name: 'web', + version: '1.0.0', + description: 'Generates HTML/CSS/JS bundles from JSX AST entries', + dependsOn: 'jsx-ast', /** * Main generation function that processes JSX AST entries into web bundles. * - * @param {import('../jsx-ast/utils/buildContent.mjs').JSXContent[]} entries - JSX AST entries to process. + * @param {Input} input - JSX AST entries to process. * @param {Partial} options - Generator options. - * @param {string} [options.output] - Output directory for generated files. - * @param {string} options.version - Documentation version string. - * @returns {Promise>} Generated HTML and CSS. + * @returns {Promise} Processed HTML/CSS/JS content. */ - async generate(entries, { output, version }) { - // Load the HTML template with placeholders + async generate(input, { output, version }) { const template = await readFile( new URL('template.html', import.meta.url), 'utf-8' @@ -45,14 +51,14 @@ export default { // Process all entries: convert JSX to HTML/CSS/JS const { results, css, chunks } = await processJSXEntries( - entries, + input, template, astBuilders, requireFn, { version } ); - // Write files to disk if output directory is specified + // Process all entries together (required for code-split bundles) if (output) { // Write HTML files for (const { html, api } of results) { @@ -68,7 +74,6 @@ export default { await writeFile(join(output, 'styles.css'), css, 'utf-8'); } - // Return HTML and CSS for each entry - return results.map(({ html }) => ({ html, css })); + return results.map(({ html }) => ({ html: html.toString(), css })); }, }; diff --git a/src/generators/web/utils/processing.mjs b/src/generators/web/utils/processing.mjs index 2c61211f..a2839f6f 100644 --- a/src/generators/web/utils/processing.mjs +++ b/src/generators/web/utils/processing.mjs @@ -7,6 +7,38 @@ import { SPECULATION_RULES } from '../constants.mjs'; import bundleCode from './bundle.mjs'; import { createChunkedRequire } from './chunks.mjs'; +/** + * Converts JSX AST entries to server and client JavaScript code. + * This is the CPU-intensive step that can be parallelized. + * + * @param {Array} entries - JSX AST entries + * @param {function} buildServerProgram - Wraps code for server execution + * @param {function} buildClientProgram - Wraps code for client hydration + * @returns {{serverCodeMap: Map, clientCodeMap: Map}} + */ +export function convertJSXToCode( + entries, + { buildServerProgram, buildClientProgram } +) { + const serverCodeMap = new Map(); + const clientCodeMap = new Map(); + + for (const entry of entries) { + const fileName = `${entry.data.api}.jsx`; + + // Convert AST to JavaScript string with JSX syntax + const { value: code } = toJs(entry, { handlers: jsx }); + + // Prepare code for server-side execution (wrapped for SSR) + serverCodeMap.set(fileName, buildServerProgram(code)); + + // Prepare code for client-side execution (wrapped for hydration) + clientCodeMap.set(fileName, buildClientProgram(code)); + } + + return { serverCodeMap, clientCodeMap }; +} + /** * Executes server-side JavaScript code in an isolated context with virtual module support. * @@ -56,38 +88,27 @@ export async function executeServerCode(serverCodeMap, requireFn) { export async function processJSXEntries( entries, template, - { buildServerProgram, buildClientProgram }, + astBuilders, requireFn, { version } ) { - const serverCodeMap = new Map(); - const clientCodeMap = new Map(); - - // Convert JSX AST to JavaScript for both server and client - for (const entry of entries) { - const fileName = `${entry.data.api}.jsx`; - - // Convert AST to JavaScript string with JSX syntax - const { value: code } = toJs(entry, { handlers: jsx }); - - // Prepare code for server-side execution (wrapped for SSR) - serverCodeMap.set(fileName, buildServerProgram(code)); - - // Prepare code for client-side execution (wrapped for hydration) - clientCodeMap.set(fileName, buildClientProgram(code)); - } - - // Execute all server code at once to get dehydrated HTML - const serverBundle = await executeServerCode(serverCodeMap, requireFn); - - // Bundle all client code at once (with code splitting for shared chunks) - const clientBundle = await bundleCode(clientCodeMap); + // Step 1: Convert JSX AST to JavaScript (CPU-intensive, could be parallelized) + const { serverCodeMap, clientCodeMap } = convertJSXToCode( + entries, + astBuilders + ); + + // Step 2: Bundle server and client code IN PARALLEL + // Both need all entries for code-splitting, but are independent of each other + const [serverBundle, clientBundle] = await Promise.all([ + executeServerCode(serverCodeMap, requireFn), + bundleCode(clientCodeMap), + ]); const titleSuffix = `Node.js v${version.version} Documentation`; - const speculationRulesString = JSON.stringify(SPECULATION_RULES, null, 2); - // Process each entry to create final HTML + // Step 3: Create final HTML (could be parallelized in workers) const results = entries.map(({ data: { api, heading } }) => { const fileName = `${api}.js`; diff --git a/src/loaders/markdown.mjs b/src/loaders/markdown.mjs index 5e92904c..26dae7c6 100644 --- a/src/loaders/markdown.mjs +++ b/src/loaders/markdown.mjs @@ -6,39 +6,36 @@ import { extname } from 'node:path'; import { globSync } from 'glob'; import { VFile } from 'vfile'; +import createQueries from '../utils/queries/index.mjs'; + +const { updateStabilityPrefixToLink } = createQueries(); + /** - * This method creates a simple abstract "Loader", which technically - * could be used for different things, but here we want to use it to load - * Markdown files and transform them into VFiles + * This creates a "loader" for loading Markdown API doc files into VFiles. */ const createLoader = () => { /** - * Loads API Doc files and transforms it into VFiles + * Loads Markdown source files and transforms them into VFiles. + * Applies stability index normalization during load. * - * @param {Array} searchPath A glob/path for API docs to be loaded - * @param {Array | undefined} [ignorePath] A glob/path of files to ignore - * The input string can be a simple path (relative or absolute) - * The input string can also be any allowed glob string - * - * @see https://code.visualstudio.com/docs/editor/glob-patterns + * @param {string | string[]} searchPath - Glob pattern(s) or file paths + * @returns {Promise[]} Array of promises resolving to VFiles */ - const loadFiles = async (searchPath, ignorePath) => { - const ignoredFiles = ignorePath - ? globSync(ignorePath).filter(filePath => extname(filePath) === '.md') - : []; - + const loadFiles = searchPath => { const resolvedFiles = globSync(searchPath).filter( - filePath => - extname(filePath) === '.md' && !ignoredFiles.includes(filePath) + filePath => extname(filePath) === '.md' ); - return Promise.all( - resolvedFiles.map(async filePath => { - const fileContents = await readFile(filePath, 'utf-8'); + return resolvedFiles.map(async filePath => { + const fileContents = await readFile(filePath, 'utf-8'); - return new VFile({ path: filePath, value: fileContents }); - }) - ); + const vfile = new VFile({ path: filePath, value: fileContents }); + + // Normalizes all the Stability Index prefixes with Markdown links + updateStabilityPrefixToLink(vfile); + + return vfile; + }); }; return { loadFiles }; diff --git a/src/logger/__tests__/logger.test.mjs b/src/logger/__tests__/logger.test.mjs index 006d2495..1673b453 100644 --- a/src/logger/__tests__/logger.test.mjs +++ b/src/logger/__tests__/logger.test.mjs @@ -221,4 +221,163 @@ describe('createLogger', () => { }, ]); }); + + describe('setLogLevel', () => { + it('should change log level at runtime using number', t => { + const transport = t.mock.fn(); + + const logger = createLogger(transport, LogLevel.info); + + // Should log at info level + logger.info('Info message'); + strictEqual(transport.mock.callCount(), 1); + + // Change to error level + logger.setLogLevel(LogLevel.error); + + // Should not log info anymore + logger.info('Another info message'); + strictEqual(transport.mock.callCount(), 1); + + // Should log error + logger.error('Error message'); + strictEqual(transport.mock.callCount(), 2); + }); + + it('should change log level at runtime using string', t => { + const transport = t.mock.fn(); + + const logger = createLogger(transport, LogLevel.error); + + // Should not log at info level initially + logger.info('Info message'); + strictEqual(transport.mock.callCount(), 0); + + // Change to debug level using string + logger.setLogLevel('debug'); + + // Should now log info + logger.info('Another info message'); + strictEqual(transport.mock.callCount(), 1); + }); + + it('should handle case-insensitive level names', t => { + const transport = t.mock.fn(); + + const logger = createLogger(transport, LogLevel.fatal); + + logger.setLogLevel('DEBUG'); + logger.debug('Debug message'); + strictEqual(transport.mock.callCount(), 1); + + logger.setLogLevel('Info'); + logger.debug('Debug message 2'); + strictEqual(transport.mock.callCount(), 1); // Should not log debug at info level + }); + + it('should propagate to child loggers', t => { + const transport = t.mock.fn(); + + const logger = createLogger(transport, LogLevel.info); + const child = logger.child('child-module'); + + // Child should initially respect parent's info level + child.debug('Debug message'); + strictEqual(transport.mock.callCount(), 0); + + child.info('Info message'); + strictEqual(transport.mock.callCount(), 1); + + // Change parent to debug level + logger.setLogLevel(LogLevel.debug); + + // Child should now log debug messages + child.debug('Debug message after level change'); + strictEqual(transport.mock.callCount(), 2); + + // Change parent to error level + logger.setLogLevel(LogLevel.error); + + // Child should not log info anymore + child.info('Info message after error level'); + strictEqual(transport.mock.callCount(), 2); + + // Child should log error + child.error('Error message'); + strictEqual(transport.mock.callCount(), 3); + }); + + it('should propagate to nested child loggers', t => { + const transport = t.mock.fn(); + + const logger = createLogger(transport, LogLevel.error); + const child1 = logger.child('child1'); + const child2 = child1.child('child2'); + const child3 = child2.child('child3'); + + // None should log debug initially + logger.debug('root debug'); + child1.debug('child1 debug'); + child2.debug('child2 debug'); + child3.debug('child3 debug'); + strictEqual(transport.mock.callCount(), 0); + + // Change root to debug level + logger.setLogLevel(LogLevel.debug); + + // All should now log debug + child1.debug('child1 debug after'); + strictEqual(transport.mock.callCount(), 1); + + child2.debug('child2 debug after'); + strictEqual(transport.mock.callCount(), 2); + + child3.debug('child3 debug after'); + strictEqual(transport.mock.callCount(), 3); + }); + + it('should propagate to multiple children at same level', t => { + const transport = t.mock.fn(); + + const logger = createLogger(transport, LogLevel.error); + const childA = logger.child('childA'); + const childB = logger.child('childB'); + const childC = logger.child('childC'); + + // None should log info + childA.info('A info'); + childB.info('B info'); + childC.info('C info'); + strictEqual(transport.mock.callCount(), 0); + + // Change root to info + logger.setLogLevel(LogLevel.info); + + // All children should now log info + childA.info('A info after'); + strictEqual(transport.mock.callCount(), 1); + + childB.info('B info after'); + strictEqual(transport.mock.callCount(), 2); + + childC.info('C info after'); + strictEqual(transport.mock.callCount(), 3); + }); + + it('should ignore invalid string level names', t => { + const transport = t.mock.fn(); + + const logger = createLogger(transport, LogLevel.info); + + // Try to set invalid level + logger.setLogLevel('invalid'); + + // Should still log at info level + logger.info('Info message'); + strictEqual(transport.mock.callCount(), 1); + + logger.debug('Debug message'); + strictEqual(transport.mock.callCount(), 1); // Debug should be filtered + }); + }); }); diff --git a/src/logger/__tests__/transports/console.test.mjs b/src/logger/__tests__/transports/console.test.mjs index cf346707..0cf3bf14 100644 --- a/src/logger/__tests__/transports/console.test.mjs +++ b/src/logger/__tests__/transports/console.test.mjs @@ -219,4 +219,117 @@ describe('console', () => { '\n', ]); }); + + it('should print inline metadata in magenta', t => { + process.env.FORCE_COLOR = '1'; + + t.mock.timers.enable({ apis: ['Date'] }); + + const fn = t.mock.method(process.stdout, 'write'); + + fn.mock.mockImplementation(() => {}); + + console({ + level: LogLevel.info, + message: 'Test message', + metadata: { + threads: 4, + generator: 'metadata', + }, + timestamp: Date.now(), + }); + + const callsArgs = process.stdout.write.mock.calls.map( + call => call.arguments[0] + ); + + strictEqual(process.stdout.write.mock.callCount(), 5); + deepStrictEqual(callsArgs, [ + '[00:00:00.000]', + ' \x1B[32mINFO\x1B[39m', + ': Test message', + ' \x1B[35m{"threads":4,"generator":"metadata"}\x1B[39m', + '\n', + ]); + }); + + it('should not print metadata block if only file/stack present', t => { + process.env.FORCE_COLOR = '1'; + + t.mock.timers.enable({ apis: ['Date'] }); + + const fn = t.mock.method(process.stdout, 'write'); + + fn.mock.mockImplementation(() => {}); + + console({ + level: LogLevel.info, + message: 'Test message', + metadata: { + file: { + path: 'test.md', + }, + stack: 'Error: test\n at test.mjs:1:1', + }, + timestamp: Date.now(), + }); + + const callsArgs = process.stdout.write.mock.calls.map( + call => call.arguments[0] + ); + + // Should have: timestamp, level, message, file path, newline, stack + // But NOT a metadata JSON block (since only file/stack are present) + strictEqual(process.stdout.write.mock.callCount(), 6); + deepStrictEqual(callsArgs, [ + '[00:00:00.000]', + ' \x1B[32mINFO\x1B[39m', + ': Test message', + ' at test.md', + '\n', + 'Error: test\n at test.mjs:1:1', + ]); + }); + + it('should print both file info and extra metadata', t => { + process.env.FORCE_COLOR = '1'; + + t.mock.timers.enable({ apis: ['Date'] }); + + const fn = t.mock.method(process.stdout, 'write'); + + fn.mock.mockImplementation(() => {}); + + console({ + level: LogLevel.debug, + message: 'Processing chunk', + metadata: { + file: { + path: 'api.md', + position: { + start: { line: 10 }, + end: { line: 20 }, + }, + }, + chunkId: 3, + itemCount: 15, + }, + timestamp: Date.now(), + }); + + const callsArgs = process.stdout.write.mock.calls.map( + call => call.arguments[0] + ); + + strictEqual(process.stdout.write.mock.callCount(), 7); + deepStrictEqual(callsArgs, [ + '[00:00:00.000]', + ' \x1B[34mDEBUG\x1B[39m', + ': Processing chunk', + ' at api.md', + '(10:20)', + ' \x1B[35m{"chunkId":3,"itemCount":15}\x1B[39m', + '\n', + ]); + }); }); diff --git a/src/logger/logger.mjs b/src/logger/logger.mjs index 0723a2c7..2a0a9a47 100644 --- a/src/logger/logger.mjs +++ b/src/logger/logger.mjs @@ -9,7 +9,7 @@ import { LogLevel } from './constants.mjs'; /** * Creates a logger instance with the specified transport, log level and an - * optional module name. + * optional module name. Child loggers share the parent's log level. * * @param {import('./types').Transport} transport - Function to handle log output. * @param {number} [loggerLevel] - Minimum log level to output. @@ -20,6 +20,21 @@ export const createLogger = ( loggerLevel = LogLevel.info, module ) => { + /** @type {number} */ + let currentLevel = loggerLevel; + + /** @type {Set>} */ + const children = new Set(); + + /** + * Checks if the given log level should be logged based on the current logger + * level. + * + * @param {number} level - Log level to check. + * @returns {boolean} + */ + const shouldLog = level => level >= currentLevel; + /** * Logs a message at the given level with optional metadata. * @@ -41,8 +56,10 @@ export const createLogger = ( // Extract message string from Error object or use message as-is let msg; + if (message instanceof Error) { msg = message.message; + metadata.stack = message.stack; } else { msg = message; @@ -108,22 +125,44 @@ export const createLogger = ( log(LogLevel.debug, message, metadata); /** - * Creates a child logger for a specific module. + * Creates a child logger for a specific module. Child loggers share the + * parent's log level. * - * @param {string} module - Module name for the child logger. + * @param {string} childModule - Module name for the child logger. * @returns {ReturnType} */ - const child = module => createLogger(transport, loggerLevel, module); + const child = childModule => { + const childLogger = createLogger(transport, currentLevel, childModule); + + children.add(childLogger); + + return childLogger; + }; /** - * Checks if the given log level should be logged based on the current logger - * level. + * Sets the log level for this logger instance and all child loggers. * - * @param {number} level - Log level to check. - * @returns {boolean} + * @param {number | string} level - Log level (number) or level name (string) */ - const shouldLog = level => { - return level >= loggerLevel; + const setLogLevel = level => { + let newLogLevel = level; + + if (typeof newLogLevel === 'string') { + newLogLevel = newLogLevel.toLowerCase(); + + if (newLogLevel in LogLevel === false) { + return; + } + + newLogLevel = LogLevel[newLogLevel]; + } + + currentLevel = newLogLevel; + + // Propagate to all child loggers + for (const childLogger of children) { + childLogger.setLogLevel(currentLevel); + } }; return { @@ -133,5 +172,6 @@ export const createLogger = ( fatal, debug, child, + setLogLevel, }; }; diff --git a/src/logger/transports/console.mjs b/src/logger/transports/console.mjs index 3dfb7e19..0edd50f1 100644 --- a/src/logger/transports/console.mjs +++ b/src/logger/transports/console.mjs @@ -1,5 +1,7 @@ 'use strict'; +import { styleText } from 'node:util'; + import { prettifyLevel } from '../utils/colors.mjs'; import { prettifyTimestamp } from '../utils/time.mjs'; @@ -10,7 +12,7 @@ import { prettifyTimestamp } from '../utils/time.mjs'; * @returns {void} */ const console = ({ level, message, timestamp, metadata = {}, module }) => { - const { file, stack } = metadata; + const { file, stack, ...rest } = metadata; const time = prettifyTimestamp(timestamp); @@ -36,6 +38,12 @@ const console = ({ level, message, timestamp, metadata = {}, module }) => { process.stdout.write(position); } + // Print remaining metadata inline in purple + if (Object.keys(rest).length > 0) { + const metaStr = styleText('magenta', JSON.stringify(rest)); + process.stdout.write(` ${metaStr}`); + } + process.stdout.write('\n'); if (stack) { diff --git a/src/parsers/json.mjs b/src/parsers/json.mjs new file mode 100644 index 00000000..7f4e9de7 --- /dev/null +++ b/src/parsers/json.mjs @@ -0,0 +1,19 @@ +'use strict'; + +import { loadFromURL } from '../utils/parser.mjs'; + +/** + * Retrieves the type map from the provided JSON file. + * + * @param {string|URL} path Path to type map JSON file + * @returns {Promise>} + */ +export const parseTypeMap = async path => { + if (!path || !path.length) { + return {}; + } + + const typeMapContent = await loadFromURL(path); + + return JSON.parse(typeMapContent); +}; diff --git a/src/parsers/markdown.mjs b/src/parsers/markdown.mjs index cce3618b..da7319cc 100644 --- a/src/parsers/markdown.mjs +++ b/src/parsers/markdown.mjs @@ -3,8 +3,6 @@ import { coerce } from 'semver'; import { loadFromURL } from '../utils/parser.mjs'; -import createQueries from '../utils/queries/index.mjs'; -import { getRemark } from '../utils/remark.mjs'; // A ReGeX for retrieving Node.js version headers from the CHANGELOG.md const NODE_VERSIONS_REGEX = /\* \[Node\.js ([0-9.]+)\]\S+ (.*)\r?\n/g; @@ -15,57 +13,6 @@ const LIST_ITEM_REGEX = /\* \[(.*?)\]\((.*?)\.md\)/g; // A ReGeX for checking if a Node.js version is an LTS release const NODE_LTS_VERSION_REGEX = /Long Term Support/i; -/** - * Creates an API doc parser for a given Markdown API doc file - */ -const createParser = () => { - // Creates an instance of the Remark processor with GFM support - const remarkProcessor = getRemark(); - - const { updateStabilityPrefixToLink } = createQueries(); - - /** - * Parses a given API doc file into a AST tree - * - * @param {import('vfile').VFile | Promise} apiDoc - * @returns {Promise>} - */ - const parseApiDoc = async apiDoc => { - // We allow the API doc VFile to be a Promise of a VFile also, - // hence we want to ensure that it first resolves before we pass it to the parser - const resolvedApiDoc = await Promise.resolve(apiDoc); - - // Normalizes all the Stability Index prefixes with Markdown links - updateStabilityPrefixToLink(resolvedApiDoc); - - // Parses the API doc into an AST tree using `unified` and `remark` - const apiDocTree = remarkProcessor.parse(resolvedApiDoc); - - return { - file: { - stem: resolvedApiDoc.stem, - basename: resolvedApiDoc.basename, - }, - tree: apiDocTree, - }; - }; - - /** - * This method allows to parse multiple API doc files at once - * and it simply wraps parseApiDoc with the given API docs - * - * @param {Array>} apiDocs List of API doc files to be parsed - * @returns {Promise>>} - */ - const parseApiDocs = async apiDocs => { - // We do a Promise.all, to ensure that each API doc is resolved asynchronously - // but all need to be resolved first before we return the result to the caller - return Promise.all(apiDocs.map(parseApiDoc)); - }; - - return { parseApiDocs, parseApiDoc }; -}; - /** * Retrieves all Node.js major versions from the provided CHANGELOG.md file * and returns an array of objects containing the version and LTS status. @@ -90,11 +37,13 @@ export const parseChangelog = async path => { * @returns {Promise>} */ export const parseIndex = async path => { + if (!path || !path.length) { + return []; + } + const index = await loadFromURL(path); const items = Array.from(index.matchAll(LIST_ITEM_REGEX)); return items.map(([, section, api]) => ({ section, api })); }; - -export default createParser; diff --git a/src/streaming.mjs b/src/streaming.mjs new file mode 100644 index 00000000..a275a037 --- /dev/null +++ b/src/streaming.mjs @@ -0,0 +1,82 @@ +'use strict'; + +import logger from './logger/index.mjs'; + +const streamingLogger = logger.child('streaming'); + +/** + * Checks if a value is an async generator/iterable. + * + * @param {unknown} obj - Value to check + * @returns {obj is AsyncGenerator} True if the value is an async iterable + */ +export const isAsyncGenerator = obj => + obj !== null && + typeof obj === 'object' && + typeof obj[Symbol.asyncIterator] === 'function'; + +/** + * Collects all values from an async generator into a flat array. + * Each yielded chunk is spread into the results array. + * + * @template T + * @param {AsyncGenerator} generator - Async generator yielding arrays + * @returns {Promise} Flattened array of all yielded items + */ +export const collectAsyncGenerator = async generator => { + const results = []; + + let chunkCount = 0; + + for await (const chunk of generator) { + chunkCount++; + + results.push(...chunk); + + streamingLogger.debug(`Collected chunk ${chunkCount}`, { + itemsInChunk: chunk.length, + }); + } + + streamingLogger.debug(`Collection complete`, { + totalItems: results.length, + chunks: chunkCount, + }); + + return results; +}; + +/** + * Creates a cache for async generator collection results. + * Ensures that when multiple consumers request the same async generator, + * only one collection happens and all consumers share the result. + */ +export const createStreamingCache = () => { + /** @type {Map>} */ + const cache = new Map(); + + return { + /** + * Gets the collected result for a generator, starting collection if needed. + * + * @param {string} key - Cache key (usually generator name) + * @param {AsyncGenerator} generator - The async generator to collect + * @returns {Promise} Promise resolving to collected results + */ + getOrCollect(key, generator) { + const hasKey = cache.has(key); + + if (!hasKey) { + cache.set(key, collectAsyncGenerator(generator)); + } + + streamingLogger.debug( + hasKey + ? `Using cached result for "${key}"` + : `Starting collection for "${key}"` + ); + + return cache.get(key); + }, + }; +}; diff --git a/src/threading/__tests__/WorkerPool.test.mjs b/src/threading/__tests__/WorkerPool.test.mjs deleted file mode 100644 index c878fe48..00000000 --- a/src/threading/__tests__/WorkerPool.test.mjs +++ /dev/null @@ -1,90 +0,0 @@ -import { deepStrictEqual, ok, strictEqual } from 'node:assert'; -import { describe, it } from 'node:test'; - -import WorkerPool from '../index.mjs'; - -describe('WorkerPool', () => { - // Use relative path from WorkerPool's location (src/threading/) - const workerPath = './chunk-worker.mjs'; - - it('should create a worker pool with specified thread count', () => { - const pool = new WorkerPool(workerPath, 4); - - strictEqual(pool.threads, 4); - strictEqual(pool.getActiveThreadCount(), 0); - }); - - it('should initialize with zero active threads', () => { - const pool = new WorkerPool(workerPath, 2); - - strictEqual(pool.getActiveThreadCount(), 0); - }); - - it('should change active thread count atomically', () => { - const pool = new WorkerPool(workerPath, 2); - - pool.changeActiveThreadCount(1); - strictEqual(pool.getActiveThreadCount(), 1); - - pool.changeActiveThreadCount(2); - strictEqual(pool.getActiveThreadCount(), 3); - - pool.changeActiveThreadCount(-1); - strictEqual(pool.getActiveThreadCount(), 2); - }); - - it('should queue tasks when thread limit is reached', async () => { - const pool = new WorkerPool(workerPath, 1); - - const task1 = pool.run({ - generatorName: 'ast-js', - fullInput: [], - itemIndices: [], - options: {}, - }); - - const task2 = pool.run({ - generatorName: 'ast-js', - fullInput: [], - itemIndices: [], - options: {}, - }); - - const results = await Promise.all([task1, task2]); - - ok(Array.isArray(results)); - strictEqual(results.length, 2); - }); - - it('should run multiple tasks in parallel with runAll', async () => { - const pool = new WorkerPool(workerPath, 2); - - const tasks = [ - { - generatorName: 'ast-js', - fullInput: [], - itemIndices: [], - options: {}, - }, - { - generatorName: 'ast-js', - fullInput: [], - itemIndices: [], - options: {}, - }, - ]; - - const results = await pool.runAll(tasks); - - ok(Array.isArray(results)); - strictEqual(results.length, 2); - }); - - it('should handle empty task array', async () => { - const pool = new WorkerPool(workerPath, 2); - - const results = await pool.runAll([]); - - deepStrictEqual(results, []); - }); -}); diff --git a/src/threading/__tests__/parallel.test.mjs b/src/threading/__tests__/parallel.test.mjs index 3090e234..41b04c6f 100644 --- a/src/threading/__tests__/parallel.test.mjs +++ b/src/threading/__tests__/parallel.test.mjs @@ -1,85 +1,182 @@ import { deepStrictEqual, ok, strictEqual } from 'node:assert'; import { describe, it } from 'node:test'; -import WorkerPool from '../index.mjs'; +import createWorkerPool from '../index.mjs'; import createParallelWorker from '../parallel.mjs'; -describe('createParallelWorker', () => { - // Use relative path from WorkerPool's location (src/threading/) - const workerPath = './chunk-worker.mjs'; - - it('should create a ParallelWorker with map method', () => { - const pool = new WorkerPool(workerPath, 2); +/** + * Helper to collect all results from an async generator. + * + * @template T + * @param {AsyncGenerator} generator + * @returns {Promise} + */ +async function collectStream(generator) { + const results = []; + + for await (const chunk of generator) { + results.push(...chunk); + } + + return results; +} + +/** + * Helper to collect chunks (not flattened) + * + * @template T + * @param {AsyncGenerator} generator + * @returns {Promise} + */ +async function collectChunks(generator) { + const chunks = []; + + for await (const chunk of generator) { + chunks.push(chunk); + } + + return chunks; +} +describe('createParallelWorker', () => { + it('should create a ParallelWorker with stream method', async () => { + const pool = createWorkerPool(2); const worker = createParallelWorker('metadata', pool, { threads: 2 }); ok(worker); - strictEqual(typeof worker.map, 'function'); - }); - - it('should use main thread for single-threaded execution', async () => { - const pool = new WorkerPool(workerPath, 1); - - const worker = createParallelWorker('ast-js', pool, { threads: 1 }); - const items = []; - const results = await worker.map(items, items, {}); - - ok(Array.isArray(results)); - strictEqual(results.length, 0); - }); - - it('should use main thread for small item counts', async () => { - const pool = new WorkerPool(workerPath, 4); - - const worker = createParallelWorker('ast-js', pool, { threads: 4 }); - const items = []; - const results = await worker.map(items, items, {}); + strictEqual(typeof worker.stream, 'function'); - ok(Array.isArray(results)); - strictEqual(results.length, 0); + await pool.destroy(); }); - it('should chunk items for parallel processing', async () => { - const pool = new WorkerPool(workerPath, 2); + it('should handle empty items array', async () => { + const pool = createWorkerPool(2); + const worker = createParallelWorker('ast-js', pool, { + threads: 2, + chunkSize: 10, + }); - const worker = createParallelWorker('ast-js', pool, { threads: 2 }); - const items = []; + const results = await collectStream(worker.stream([], [], {})); - const results = await worker.map(items, items, {}); + deepStrictEqual(results, []); - strictEqual(results.length, 0); - ok(Array.isArray(results)); + await pool.destroy(); }); - it('should pass extra options to worker', async () => { - const pool = new WorkerPool(workerPath, 1); - - const worker = createParallelWorker('ast-js', pool, { threads: 1 }); - const extra = { gitRef: 'main', customOption: 'value' }; - const items = []; - - const results = await worker.map(items, items, extra); - - ok(Array.isArray(results)); + it('should distribute items to multiple worker threads', async () => { + const pool = createWorkerPool(4); + const worker = createParallelWorker('metadata', pool, { + threads: 4, + chunkSize: 1, + }); + + const mockInput = [ + { + file: { stem: 'test1', basename: 'test1.md' }, + tree: { type: 'root', children: [] }, + }, + { + file: { stem: 'test2', basename: 'test2.md' }, + tree: { type: 'root', children: [] }, + }, + { + file: { stem: 'test3', basename: 'test3.md' }, + tree: { type: 'root', children: [] }, + }, + { + file: { stem: 'test4', basename: 'test4.md' }, + tree: { type: 'root', children: [] }, + }, + ]; + + const chunks = await collectChunks( + worker.stream(mockInput, mockInput, { typeMap: {} }) + ); + + strictEqual(chunks.length, 4); + + for (const chunk of chunks) { + ok(Array.isArray(chunk)); + } + + await pool.destroy(); }); - it('should serialize and deserialize data correctly', async () => { - const pool = new WorkerPool(workerPath, 2); + it('should yield results as chunks complete', async () => { + const pool = createWorkerPool(2); + const worker = createParallelWorker('metadata', pool, { + threads: 2, + chunkSize: 1, + }); + + const mockInput = [ + { + file: { stem: 'test1', basename: 'test1.md' }, + tree: { type: 'root', children: [] }, + }, + { + file: { stem: 'test2', basename: 'test2.md' }, + tree: { type: 'root', children: [] }, + }, + ]; + + const chunks = await collectChunks( + worker.stream(mockInput, mockInput, { typeMap: {} }) + ); + + strictEqual(chunks.length, 2); + + await pool.destroy(); + }); - const worker = createParallelWorker('ast-js', pool, { threads: 2 }); - const items = []; + it('should work with single thread and items', async () => { + const pool = createWorkerPool(2); + const worker = createParallelWorker('metadata', pool, { + threads: 2, + chunkSize: 5, + }); - const results = await worker.map(items, items, {}); + const mockInput = [ + { + file: { stem: 'test1', basename: 'test1.md' }, + tree: { type: 'root', children: [] }, + }, + ]; - ok(Array.isArray(results)); - }); + const chunks = await collectChunks( + worker.stream(mockInput, mockInput, { typeMap: {} }) + ); - it('should handle empty items array', async () => { - const pool = new WorkerPool(workerPath, 2); + strictEqual(chunks.length, 1); + ok(Array.isArray(chunks[0])); - const worker = createParallelWorker('ast-js', pool, { threads: 2 }); - const results = await worker.map([], [], {}); + await pool.destroy(); + }); - deepStrictEqual(results, []); + it('should use sliceInput for metadata generator', async () => { + const pool = createWorkerPool(2); + const worker = createParallelWorker('metadata', pool, { + threads: 2, + chunkSize: 1, + }); + + const mockInput = [ + { + file: { stem: 'test1', basename: 'test1.md' }, + tree: { type: 'root', children: [] }, + }, + { + file: { stem: 'test2', basename: 'test2.md' }, + tree: { type: 'root', children: [] }, + }, + ]; + + const chunks = await collectChunks( + worker.stream(mockInput, mockInput, { typeMap: {} }) + ); + + strictEqual(chunks.length, 2); + + await pool.destroy(); }); }); diff --git a/src/threading/chunk-worker.mjs b/src/threading/chunk-worker.mjs index 31112109..80558790 100644 --- a/src/threading/chunk-worker.mjs +++ b/src/threading/chunk-worker.mjs @@ -1,13 +1,14 @@ -import { parentPort, workerData } from 'node:worker_threads'; - import { allGenerators } from '../generators/index.mjs'; -const { generatorName, fullInput, itemIndices, options } = workerData; - -const generator = allGenerators[generatorName]; - -// Generators must implement processChunk for item-level parallelization -generator - .processChunk(fullInput, itemIndices, options) - .then(result => parentPort.postMessage(result)) - .catch(error => parentPort.postMessage({ error: error.message })); +/** + * Processes a chunk of items using the specified generator's processChunk method. + * This is the worker entry point for Piscina. + * + * @param {ParallelTaskOptions} opts - Task options from Piscina + * @returns {Promise} The processed result + */ +export default async ({ generatorName, input, itemIndices, options }) => { + const generator = allGenerators[generatorName]; + + return generator.processChunk(input, itemIndices, options); +}; diff --git a/src/threading/index.mjs b/src/threading/index.mjs index a6c19fd2..bd7f2dfa 100644 --- a/src/threading/index.mjs +++ b/src/threading/index.mjs @@ -1,108 +1,27 @@ -import { Worker } from 'node:worker_threads'; +import Piscina from 'piscina'; -/** - * WorkerPool class to manage a pool of worker threads - */ -export default class WorkerPool { - /** @private {SharedArrayBuffer} - Shared memory for active thread count */ - sharedBuffer = new SharedArrayBuffer(Int32Array.BYTES_PER_ELEMENT); - /** @private {Int32Array} - A typed array to access shared memory */ - activeThreads = new Int32Array(this.sharedBuffer); - /** @private {Array} - Queue of pending tasks */ - queue = []; - - /** - * @param {string | URL} workerScript - Path to the worker script (relative to this file or absolute URL) - * @param {number} threads - Maximum number of concurrent worker threads - */ - constructor(workerScript = './generator-worker.mjs', threads = 1) { - this.workerScript = - workerScript instanceof URL - ? workerScript - : new URL(workerScript, import.meta.url); - - this.threads = threads; - } - - /** - * Gets the current active thread count. - * @returns {number} The current active thread count. - */ - getActiveThreadCount() { - return Atomics.load(this.activeThreads, 0); - } - - /** - * Changes the active thread count atomically by a given delta. - * @param {number} delta - The value to increment or decrement the active thread count by. - */ - changeActiveThreadCount(delta) { - Atomics.add(this.activeThreads, 0, delta); - } - - /** - * Runs a task in a worker thread with the given data. - * @param {Object} workerData - Data to pass to the worker thread - * @returns {Promise} Resolves with the worker result, or rejects with an error - */ - run(workerData) { - return new Promise((resolve, reject) => { - /** - * Runs the worker thread and handles the result or error. - * @private - */ - const run = () => { - this.changeActiveThreadCount(1); +import logger from '../logger/index.mjs'; - const worker = new Worker(this.workerScript, { workerData }); +const poolLogger = logger.child('WorkerPool'); - worker.on('message', result => { - this.changeActiveThreadCount(-1); - this.processQueue(); +const workerScript = import.meta.resolve('./chunk-worker.mjs'); - if (result?.error) { - reject(new Error(result.error)); - } else { - resolve(result); - } - }); - - worker.on('error', err => { - this.changeActiveThreadCount(-1); - this.processQueue(); - reject(err); - }); - }; - - if (this.getActiveThreadCount() >= this.threads) { - this.queue.push(run); - } else { - run(); - } - }); - } - - /** - * Run multiple tasks in parallel, distributing across worker threads. - * @template T, R - * @param {T[]} tasks - Array of task data to process - * @returns {Promise} Results in same order as input tasks - */ - async runAll(tasks) { - return Promise.all(tasks.map(task => this.run(task))); - } - - /** - * Process the worker thread queue to start the next available task. - * @private - */ - processQueue() { - if (this.queue.length > 0 && this.getActiveThreadCount() < this.threads) { - const next = this.queue.shift(); - - if (next) { - next(); - } - } - } +/** + * Creates a Piscina worker pool for parallel processing. + * + * @param {number} threads - Maximum number of worker threads + * @returns {import('piscina').Piscina} Configured Piscina instance + */ +export default function createWorkerPool(threads) { + poolLogger.debug(`WorkerPool initialized`, { + threads, + workerScript, + }); + + return new Piscina({ + filename: workerScript, + minThreads: threads, + maxThreads: threads, + idleTimeout: Infinity, // Keep workers alive + }); } diff --git a/src/threading/parallel.mjs b/src/threading/parallel.mjs index 25333cb8..c0172813 100644 --- a/src/threading/parallel.mjs +++ b/src/threading/parallel.mjs @@ -1,110 +1,130 @@ 'use strict'; import { allGenerators } from '../generators/index.mjs'; +import logger from '../logger/index.mjs'; + +const parallelLogger = logger.child('parallel'); /** - * Creates a ParallelWorker that uses real Node.js Worker threads - * for parallel processing of items. + * Splits items into chunks of specified size. * - * @param {string} generatorName - Name of the generator (for chunk processing) - * @param {import('./index.mjs').default} pool - WorkerPool instance for spawning workers - * @param {object} options - Generator options - * @returns {ParallelWorker} + * @param {number} count - Total number of items + * @param {number} size - Maximum items per chunk + * @returns {number[][]} Array of index arrays for each chunk */ -export default function createParallelWorker(generatorName, pool, options) { - const { threads, chunkSize } = options; +const createChunks = (count, size) => { + const chunks = []; - const generator = allGenerators[generatorName]; + for (let i = 0; i < count; i += size) { + chunks.push( + Array.from({ length: Math.min(size, count - i) }, (_, j) => i + j) + ); + } - /** - * Splits items into chunks of specified size. - * @param {number} count - Number of items - * @param {number} size - Items per chunk - * @returns {number[][]} Array of index arrays - */ - const createIndexChunks = (count, size) => { - const chunks = []; - - for (let i = 0; i < count; i += size) { - const end = Math.min(i + size, count); - - const chunk = []; - - for (let j = i; j < end; j++) { - chunk.push(j); - } + return chunks; +}; - chunks.push(chunk); - } +/** + * Prepares task data for a chunk, slicing input to only include relevant items. + * + * @param {unknown[]} fullInput - Full input array + * @param {number[]} indices - Indices to process + * @param {object} options - Serialized options + * @param {string} generatorName - Name of the generator + * @returns {ParallelTaskOptions} Task data for Piscina + */ +const createTask = (fullInput, indices, options, generatorName) => ({ + generatorName, + // Only send the items needed for this chunk (reduces serialization overhead) + input: indices.map(i => fullInput[i]), + // Remap indices to 0-based for the sliced array + itemIndices: indices.map((_, i) => i), + options, +}); - return chunks; - }; +/** + * Creates a parallel worker that distributes work across a Piscina thread pool. + * + * @param {keyof AllGenerators} generatorName - Generator name + * @param {import('piscina').Piscina} pool - Piscina instance + * @param {Partial} options - Generator options + * @returns {ParallelWorker} + */ +export default function createParallelWorker(generatorName, pool, options) { + const { threads, chunkSize } = options; - /** - * Strips non-serializable properties from options for worker transfer - * @param {object} extra - Extra options to merge - */ + /** @param {object} extra */ const serializeOptions = extra => { - const serialized = { ...options, ...extra }; + const opts = { ...options, ...extra }; - delete serialized.worker; + delete opts.worker; - return serialized; + return opts; }; + const generator = allGenerators[generatorName]; + return { /** - * Process items in parallel using real worker threads. - * Items are split into chunks, each chunk processed by a separate worker. + * Processes items in parallel, yielding results as chunks complete. * * @template T, R - * @param {T[]} items - Items to process (must be serializable) - * @param {T[]} fullInput - Full input data for context rebuilding in workers - * @param {object} extra - Generator-specific context (e.g. apiTemplate, parsedSideNav) - * @returns {Promise} - Results in same order as input items + * @param {T[]} items - Items to process + * @param {T[]} fullInput - Full input for context + * @param {object} extra - Extra options + * @yields {R[]} Chunk results as they complete */ - async map(items, fullInput, extra) { - const itemCount = items.length; - - if (itemCount === 0) { - return []; + async *stream(items, fullInput, extra) { + if (items.length === 0) { + return; } - if (!generator.processChunk) { - throw new Error( - `Generator "${generatorName}" does not support chunk processing` - ); - } + const opts = serializeOptions(extra); - // For single thread or small workloads - run in main thread - if (threads <= 1 || itemCount <= 2) { - const indices = []; + const chunks = createChunks(items.length, chunkSize); - for (let i = 0; i < itemCount; i++) { - indices.push(i); - } + parallelLogger.debug( + `Distributing ${items.length} items across ${chunks.length} chunks`, + { generator: generatorName, chunks: chunks.length, chunkSize, threads } + ); - return generator.processChunk(fullInput, indices, { - ...options, - ...extra, - }); - } + const runInOneGo = threads <= 1 || items.length <= 2; + + // Submit all tasks to Piscina - each promise resolves to itself for removal + const pending = new Set( + chunks.map(indices => { + if (runInOneGo) { + const promise = generator + .processChunk(fullInput, indices, opts) + .then(result => ({ promise, result })); - // Divide items into chunks based on chunkSize - const indexChunks = createIndexChunks(itemCount, chunkSize); - - // Process chunks in parallel using worker threads - const chunkResults = await pool.runAll( - indexChunks.map(indices => ({ - generatorName, - fullInput, - itemIndices: indices, - options: serializeOptions(extra), - })) + return promise; + } + + const promise = pool + .run(createTask(fullInput, indices, opts, generatorName)) + .then(result => ({ promise, result })); + + return promise; + }) ); - // Flatten results - return chunkResults.flat(); + // Yield results as they complete (true parallel collection) + let completed = 0; + + while (pending.size > 0) { + const { promise, result } = await Promise.race(pending); + + pending.delete(promise); + + completed++; + + parallelLogger.debug(`Chunk ${completed}/${chunks.length} completed`, { + generator: generatorName, + }); + + yield result; + } }, }; } diff --git a/src/types.d.ts b/src/types.d.ts index 3e311761..065174c8 100644 --- a/src/types.d.ts +++ b/src/types.d.ts @@ -10,10 +10,7 @@ type NodeWithData = T & { declare global { export interface ParserOutput { - file: { - stem?: string; - basename?: string; - }; + file: { stem?: string; basename?: string }; tree: T; }