diff --git a/.github/workflows/compare-builds.yml b/.github/workflows/compare-builds.yml deleted file mode 100644 index 71ce30da..00000000 --- a/.github/workflows/compare-builds.yml +++ /dev/null @@ -1,130 +0,0 @@ -name: Compare Build Outputs - -on: - workflow_run: - workflows: ['Generate Docs'] - types: [completed] - -permissions: - contents: read - actions: read - pull-requests: write - -jobs: - get-comparators: - name: Get Comparators - runs-on: ubuntu-latest - if: github.event.workflow_run.event == 'pull_request' - outputs: - comparators: ${{ steps.get-comparators.outputs.comparators }} - steps: - - name: Harden Runner - uses: step-security/harden-runner@20cf305ff2072d973412fa9b1e3a4f227bda3c76 # v2.14.0 - with: - egress-policy: audit - - - name: Checkout Code - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 - - - name: List comparators - id: get-comparators - run: | - # List all .mjs files in scripts/compare-builds/ and remove the .mjs extension - COMPARATORS=$(ls scripts/compare-builds/*.mjs | xargs -n1 basename | sed 's/\.mjs$//' | jq -R -s -c 'split("\n")[:-1]') - echo "comparators=$COMPARATORS" >> $GITHUB_OUTPUT - - compare: - name: Run ${{ matrix.comparator }} comparator - runs-on: ubuntu-latest - needs: get-comparators - strategy: - matrix: - comparator: ${{ fromJSON(needs.get-comparators.outputs.comparators) }} - - steps: - - name: Harden Runner - uses: step-security/harden-runner@20cf305ff2072d973412fa9b1e3a4f227bda3c76 # v2.14.0 - with: - egress-policy: audit - - - name: Checkout Code - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 - - - name: Download Output (HEAD) - uses: actions/download-artifact@018cc2cf5baa6db3ef3c5f8a56943fffe632ef53 # v6.0.0 - with: - name: ${{ matrix.comparator }} - path: out/head - run-id: ${{ github.event.workflow_run.id }} - github-token: ${{ secrets.GITHUB_TOKEN }} - - - name: Get Run ID from BASE - id: base-run - env: - WORKFLOW_ID: ${{ github.event.workflow_run.workflow_id }} - GH_TOKEN: ${{ github.token }} - run: | - ID=$(gh run list -c $GITHUB_SHA -w $WORKFLOW_ID -L 1 --json databaseId --jq ".[].databaseId") - echo "run_id=$ID" >> $GITHUB_OUTPUT - - - name: Download Output (BASE) - uses: actions/download-artifact@018cc2cf5baa6db3ef3c5f8a56943fffe632ef53 # v6.0.0 - with: - name: ${{ matrix.comparator }} - path: out/base - run-id: ${{ steps.base-run.outputs.run_id }} - github-token: ${{ secrets.GITHUB_TOKEN }} - - - name: Compare Bundle Size - id: compare - run: | - node scripts/compare-builds/${{ matrix.comparator }}.mjs > result.txt - if [ -s result.txt ]; then - echo "has_output=true" >> "$GITHUB_OUTPUT" - else - echo "has_output=false" >> "$GITHUB_OUTPUT" - fi - - - name: Upload comparison artifact - if: steps.compare.outputs.has_output == 'true' - uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0 - with: - name: ${{ matrix.comparator }} - path: result.txt - - aggregate: - name: Aggregate Comparison Results - runs-on: ubuntu-latest - needs: compare - steps: - - name: Harden Runner - uses: step-security/harden-runner@20cf305ff2072d973412fa9b1e3a4f227bda3c76 # v2.14.0 - with: - egress-policy: audit - - - name: Download all comparison artifacts - uses: actions/download-artifact@018cc2cf5baa6db3ef3c5f8a56943fffe632ef53 # v6.0.0 - with: - path: results - - - name: Combine results - id: combine - run: | - shopt -s nullglob - result_files=(results/*.txt) - - if ((${#result_files[@]})); then - { - echo "combined<> "$GITHUB_OUTPUT" - fi - - - name: Add Comment to PR - if: steps.combine.outputs.combined - uses: thollander/actions-comment-pull-request@24bffb9b452ba05a4f3f77933840a6a841d1b32b # v3.0.1 - with: - comment-tag: compared - message: ${{ steps.combine.outputs.combined }} - pr-number: ${{ github.event.workflow_run.pull_requests[0].number }} diff --git a/.github/workflows/generate.yml b/.github/workflows/generate.yml index 750ee278..2c9aae59 100644 --- a/.github/workflows/generate.yml +++ b/.github/workflows/generate.yml @@ -1,4 +1,4 @@ -name: Generate Docs +name: Generate and Compare Docs on: push: @@ -15,8 +15,64 @@ permissions: contents: read jobs: + prepare: + runs-on: ubuntu-latest + outputs: + sha: ${{ steps.push.outputs.sha || steps.pr.outputs.sha }} + base-run: ${{ steps.main.outputs.run_id }} + steps: + - name: Harden Runner + uses: step-security/harden-runner@20cf305ff2072d973412fa9b1e3a4f227bda3c76 # v2.14.0 + with: + egress-policy: audit + + # If we are running from the main branch (a non-pull_request event), we + # want the latest SHA from nodejs/node + - id: push + if: ${{ github.event_name != 'pull_request' }} + run: | + SHA=$(git ls-remote https://github.com/nodejs/node.git HEAD | awk '{print $1}') + echo "$SHA" > commit + echo "sha=$SHA" >> "$GITHUB_OUTPUT" + + - name: Upload metadata artifact + if: ${{ github.event_name != 'pull_request' }} + uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0 + with: + name: commit + path: commit + + # If we are running from a PR (a pull_request event), we + # want the SHA used by the most recent `push` run + - name: Get latest `main` run + if: ${{ github.event_name == 'pull_request' }} + id: main + env: + BASE_SHA: ${{ github.event.pull_request.base.sha }} + GH_TOKEN: ${{ github.token }} + run: | + # `174604400` refers to `generate.yml`'s workflow ID, available at https://api.github.com/repos/nodejs/doc-kit/actions/workflows/generate.yml + # The `databaseId` is a given runs run ID (Ref: https://docs.github.com/en/enterprise-cloud@latest/graphql/reference/objects#workflowrun) + ID=$(gh run list --repo $GITHUB_REPOSITORY -c $BASE_SHA -w 174604400 -L 1 --json databaseId --jq ".[].databaseId") + echo "run_id=$ID" >> $GITHUB_OUTPUT + + - name: Download metadata artifact + if: ${{ github.event_name == 'pull_request' }} + uses: actions/download-artifact@018cc2cf5baa6db3ef3c5f8a56943fffe632ef53 # v6.0.0 + with: + name: commit + run-id: ${{ steps.main.outputs.run_id }} + github-token: ${{ secrets.GITHUB_TOKEN }} + + - id: pr + if: ${{ github.event_name == 'pull_request' }} + run: | + SHA=$(cat commit) + echo "sha=$SHA" >> "$GITHUB_OUTPUT" + generate: runs-on: ubuntu-latest + needs: prepare strategy: matrix: include: @@ -32,10 +88,12 @@ jobs: input: './node/doc/api/*.md' - target: legacy-json input: './node/doc/api/*.md' + compare: true - target: legacy-html input: './node/doc/api/*.md' - target: web input: './node/doc/api/*.md' + compare: true - target: llms-txt input: './node/doc/api/*.md' fail-fast: false @@ -56,6 +114,7 @@ jobs: with: persist-credentials: false repository: nodejs/node + ref: ${{ needs.prepare.outputs.sha }} sparse-checkout: | doc/api lib @@ -79,13 +138,27 @@ jobs: node bin/cli.mjs generate \ -t ${{ matrix.target }} \ -i "${{ matrix.input }}" \ - -o "out/${{ matrix.target }}" \ + -o out \ -c ./node/CHANGELOG.md \ --index ./node/doc/api/index.md \ --log-level debug + - name: Download base branch artifact + if: ${{ matrix.compare && needs.prepare.outputs.base-run }} + uses: actions/download-artifact@018cc2cf5baa6db3ef3c5f8a56943fffe632ef53 # v6.0.0 + with: + name: ${{ matrix.target }} + path: base + run-id: ${{ needs.prepare.outputs.base-run }} + github-token: ${{ secrets.GITHUB_TOKEN }} + + - name: Compare to base branch + if: ${{ matrix.compare && needs.prepare.outputs.base-run }} + run: | + node scripts/compare-builds/${{ matrix.target }}.mjs > out/comparison.txt + - name: Upload ${{ matrix.target }} artifacts uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0 with: name: ${{ matrix.target }} - path: out/${{ matrix.target }} + path: out diff --git a/.github/workflows/leave-comment.yml b/.github/workflows/leave-comment.yml new file mode 100644 index 00000000..610c4007 --- /dev/null +++ b/.github/workflows/leave-comment.yml @@ -0,0 +1,47 @@ +name: Leave a comment + +on: + workflow_run: + workflows: ['Generate and Compare Docs'] + types: [completed] + +permissions: + contents: read + actions: read + pull-requests: write + +jobs: + aggregate: + name: Aggregate Comparison Results + runs-on: ubuntu-latest + steps: + - name: Harden Runner + uses: step-security/harden-runner@20cf305ff2072d973412fa9b1e3a4f227bda3c76 # v2.14.0 + with: + egress-policy: audit + + - name: Download all comparison artifacts + uses: actions/download-artifact@018cc2cf5baa6db3ef3c5f8a56943fffe632ef53 # v6.0.0 + with: + run-id: ${{ github.event.workflow_run.id }} + github-token: ${{ secrets.GITHUB_TOKEN }} + path: results + + - name: Combine results + id: combine + # Even if the cat fails (no files found), we don't want to fail the workflow + continue-on-error: true + run: | + { + echo "combined<> "$GITHUB_OUTPUT" + + - name: Add Comment to PR + if: steps.combine.outputs.combined + uses: thollander/actions-comment-pull-request@24bffb9b452ba05a4f3f77933840a6a841d1b32b # v3.0.1 + with: + comment-tag: compared + message: ${{ steps.combine.outputs.combined }} + pr-number: ${{ github.event.workflow_run.pull_requests[0].number }} diff --git a/.gitignore b/.gitignore index 4ef2b790..c86ab85e 100644 --- a/.gitignore +++ b/.gitignore @@ -2,8 +2,9 @@ node_modules npm-debug.log -# Default Output Directory +# Default Output and Comparison Directories out +base # Tests coverage diff --git a/docs/commands.md b/docs/commands.md new file mode 100644 index 00000000..9a78f7af --- /dev/null +++ b/docs/commands.md @@ -0,0 +1,223 @@ +# Creating Commands + +## Command Structure + +Commands in `doc-kit` are defined as modules that export a command object conforming to the `Command` interface: + +```typescript +interface Command { + name: string; + description: string; + options: { [key: string]: Option }; + action: (options: any) => Promise; +} +``` + +Each command consists of: + +- **name**: The command name used in the CLI (e.g., `generate`, `interactive`) +- **description**: A short description shown in help text +- **options**: An object mapping option names to their definitions +- **action**: The async function that executes when the command is run + +## Creating a New Command + +### Step 1: Create the Command File + +Create a new file in `bin/commands/` with your command name: + +```javascript +// bin/commands/my-command.mjs +import logger from '../../src/logger/index.mjs'; + +/** + * @type {import('./types').Command} + */ +export default { + name: 'my-command', + description: 'Does something useful', + + options: { + // Define your options here (see next section) + }, + + async action(opts) { + logger.info('Starting my-command', opts); + + // Your command logic here + + logger.info('Completed my-command'); + }, +}; +``` + +### Step 2: Register the Command + +Add your command to the exports in `bin/commands/index.mjs`: + +```javascript +import generate from './generate.mjs'; +import interactive from './interactive.mjs'; +import myCommand from './my-command.mjs'; // Add this + +export default [ + generate, + interactive, + myCommand, // Add this +]; +``` + +### Step 3: Update CLI Entry Point + +The CLI in `bin/cli.mjs` automatically loads commands from `bin/commands/index.mjs`, so no changes are needed there if you followed step 2. + +## Command Options + +Options define the flags and parameters your command accepts. Each option has: + +```typescript +interface Option { + flags: string[]; // CLI flags (e.g., ['-i', '--input ']) + desc: string; // Description for help text + prompt?: PromptConfig; // Interactive mode configuration +} +``` + +### Defining Options + +```javascript +options: { + input: { + flags: ['-i', '--input '], + desc: 'Input file patterns (glob)', + prompt: { + type: 'text', + message: 'Enter input glob patterns', + variadic: true, + required: true, + }, + }, + + force: { + flags: ['-f', '--force'], + desc: 'Force overwrite existing files', + prompt: { + type: 'confirm', + message: 'Overwrite existing files?', + initialValue: false, + }, + }, + + mode: { + flags: ['-m', '--mode '], + desc: 'Operation mode', + prompt: { + type: 'select', + message: 'Choose operation mode', + options: [ + { label: 'Fast', value: 'fast' }, + { label: 'Thorough', value: 'thorough' }, + ], + }, + }, +} +``` + +### Flag Syntax + +- `` - Required argument +- `[value]` - Optional argument +- `` - Variadic (multiple values) +- `[values...]` - Optional variadic + +### Option Types + +#### `text` + +Single-line text input. + +```javascript +prompt: { + type: 'text', + message: 'Enter a value', + initialValue: 'default', + required: true, +} +``` + +#### `confirm` + +Yes/no confirmation. + +```javascript +prompt: { + type: 'confirm', + message: 'Are you sure?', + initialValue: false, +} +``` + +#### `select` + +Single choice from a list. + +```javascript +prompt: { + type: 'select', + message: 'Choose one', + options: [ + { label: 'Option 1', value: 'opt1' }, + { label: 'Option 2', value: 'opt2' }, + ], +} +``` + +#### `multiselect` + +Multiple choices from a list. + +```javascript +prompt: { + type: 'multiselect', + message: 'Choose multiple', + options: [ + { label: 'Choice A', value: 'a' }, + { label: 'Choice B', value: 'b' }, + ], +} +``` + +## Interactive Prompts + +The `interactive` command automatically uses the `prompt` configuration from your options. When users run: + +```bash +doc-kit interactive +``` + +They'll be prompted to select a command, then guided through all required options. + +### Prompt Configuration + +- **message**: Question to ask the user +- **type**: Input type (`text`, `confirm`, `select`, `multiselect`) +- **required**: Whether the field must have a value +- **initialValue**: Default value +- **variadic**: Whether multiple values can be entered (for `text` type) +- **options**: Choices for `select`/`multiselect` types + +### Making Options Interactive-Friendly + +Always provide helpful messages and sensible defaults: + +```javascript +threads: { + flags: ['-p', '--threads '], + desc: 'Number of threads to use (minimum: 1)', + prompt: { + type: 'text', + message: 'How many threads to allow', + initialValue: String(cpus().length), // Smart default + }, +}, +``` diff --git a/docs/comparators.md b/docs/comparators.md new file mode 100644 index 00000000..d74c4b83 --- /dev/null +++ b/docs/comparators.md @@ -0,0 +1,165 @@ +# Creating Comparators + +This guide explains how to create build comparison scripts for `@nodejs/doc-kit`. Comparators help identify differences between documentation builds, useful for CI/CD and regression testing. + +## Comparator Concepts + +Comparators are scripts that: + +1. **Compare** generated documentation between two builds (base vs. head) +2. **Identify differences** in content, structure, or file size +3. **Report results** in a format suitable for CI/CD systems +4. **Help catch regressions** before merging changes + +### When to Use Comparators + +- **Verify backward compatibility** - Ensure new code produces same output +- **Track file size changes** - Monitor bundle size growth +- **Validate transformations** - Check that refactors don't alter output +- **Debug generation issues** - Understand what changed between versions + +## Comparator Structure + +Comparators are standalone ESM scripts located in `scripts/compare-builds/`: + +``` +scripts/compare-builds/ +├── utils.mjs # Shared utilities (BASE, HEAD paths) +├── legacy-json.mjs # Compare legacy JSON output +├── web.mjs # Compare web bundle sizes +└── your-comparator.mjs # Your new comparator +``` + +### Naming Convention + +**Each comparator must have the same name as the generator it compares.** For example: + +- `web.mjs` compares output from the `web` generator +- `legacy-json.mjs` compares output from the `legacy-json` generator +- `my-format.mjs` would compare output from a `my-format` generator + +## Creating a Comparator + +### Step 1: Create the Comparator File + +Create a new file in `scripts/compare-builds/` with the same name as your generator: + +```javascript +// scripts/compare-builds/my-format.mjs +import { readdir, readFile } from 'node:fs/promises'; +import { join } from 'node:path'; +import { BASE, HEAD } from './utils.mjs'; + +// Fetch files from both directories +const [baseFiles, headFiles] = await Promise.all([BASE, HEAD].map(() => await readdir(dir))); + +// Find all unique files across both builds +const allFiles = [...new Set([...baseFiles, ...headFiles])]; + +/** + * Compare a single file between base and head + * @param {string} file - Filename to compare + * @returns {Promise} Difference object or null if identical + */ +const compareFile = async file => { + const basePath = join(BASE, file); + const headPath = join(HEAD, file); + + try { + const baseContent = await readFile(basePath, 'utf-8'); + const headContent = await readFile(headPath, 'utf-8'); + + if (baseContent !== headContent) { + return { + file, + type: 'modified', + baseSize: baseContent.length, + headSize: headContent.length, + }; + } + + return null; + } catch (error) { + // File missing in one of the builds + const exists = await Promise.all([ + readFile(basePath, 'utf-8').then(() => true).catch(() => false), + readFile(headPath, 'utf-8').then(() => true).catch(() => false), + ]); + + if (exists[0] && !exists[1]) { + return { file, type: 'removed' }; + } + if (!exists[0] && exists[1]) { + return { file, type: 'added' }; + } + + return { file, type: 'error', error: error.message }; + } +}; + +// Compare all files in parallel +const results = await Promise.all(allFiles.map(compareFile)); + +// Filter out null results (identical files) +const differences = results.filter(Boolean); + +// Output markdown results +if (differences.length > 0) { + console.log('## `my-format` Generator'); + console.log(''); + console.log(`Found ${differences.length} difference(s):`); + console.log(''); + + // Group by type + const added = differences.filter(d => d.type === 'added'); + const removed = differences.filter(d => d.type === 'removed'); + const modified = differences.filter(d => d.type === 'modified'); + + if (added.length) { + console.log('### Added Files'); + console.log(''); + added.forEach(d => console.log(`- \`${d.file}\``)); + console.log(''); + } + + if (removed.length) { + console.log('### Removed Files'); + console.log(''); + removed.forEach(d => console.log(`- \`${d.file}\``)); + console.log(''); + } + + if (modified.length) { + console.log('### Modified Files'); + console.log(''); + console.log('| File | Base Size | Head Size | Diff |'); + console.log('|-|-|-|-|'); + modified.forEach(({ file, baseSize, headSize }) => { + const diff = headSize - baseSize; + const sign = diff > 0 ? '+' : ''; + console.log(`| \`${file}\` | ${baseSize} | ${headSize} | ${sign}${diff} |`); + }); + console.log(''); + } +} +``` + +### Step 2: Test Locally + +Run your comparator locally to verify it works: + +```bash +# Set up BASE and HEAD directories +export BASE=path/to/base/output +export HEAD=path/to/head/output + +# Run the comparator +node scripts/compare-builds/my-format.mjs +``` + +### Step 3: Integrate with CI/CD + +The comparator will automatically run in GitHub Actions when: + +1. Your generator is configured with `compare: true` in the workflow +2. The comparator filename matches the generator name diff --git a/docs/generators.md b/docs/generators.md new file mode 100644 index 00000000..42836f4c --- /dev/null +++ b/docs/generators.md @@ -0,0 +1,427 @@ +# Creating Generators + +This guide explains how to create new documentation generators for `@nodejs/doc-kit`. + +## Generator Concepts + +Generators in `doc-kit` transform API documentation through a pipeline. Each generator: + +1. **Takes input** from a previous generator or raw files +2. **Processes the data** into a different format +3. **Yields output** for the next generator or final output + +### Generator Pipeline + +``` +Raw Markdown Files + ↓ + [ast] - Parse to MDAST + ↓ + [metadata] - Extract structured metadata + ↓ + [jsx-ast] - Convert to JSX AST + ↓ + [web] - Generate HTML/CSS/JS bundles +``` + +Each generator declares its dependency using the `dependsOn` field, allowing automatic pipeline construction. + +## Generator Structure + +A generator is defined as a module exporting an object conforming to the `GeneratorMetadata` interface: + +```typescript +interface GeneratorMetadata { + name: string; + version: string; + description: string; + dependsOn?: string; + + // Core generation function + generate( + input: Input, + options: Partial + ): Promise | AsyncGenerator; + + // Optional: for parallel processing + processChunk?( + fullInput: any, + itemIndices: number[], + deps: any + ): Promise; +} +``` + +## Creating a Basic Generator + +### Step 1: Create the Generator File + +Create a new directory in `src/generators/`: + +``` +src/generators/my-format/ +├── index.mjs # Main generator file +├── constants.mjs # Constants (optional) +├── types.d.ts # TypeScript types (optional) +└── utils/ # Utility functions (optional) + └── formatter.mjs +``` + +### Step 2: Implement the Generator + +```javascript +// src/generators/my-format/index.mjs +import { writeFile } from 'node:fs/promises'; +import { join } from 'node:path'; + +/** + * Generates output in MyFormat. + * + * @typedef {Array} Input + * @typedef {string} Output + * + * @type {GeneratorMetadata} + */ +export default { + name: 'my-format', + + version: '1.0.0', + + description: 'Generates documentation in MyFormat', + + // This generator depends on the metadata generator + dependsOn: 'metadata', + + /** + * Main generation function + * + * @param {Input} input - Metadata entries from previous generator + * @param {Partial} options - Configuration + * @returns {Promise} + */ + async generate(input, { output, version }) { + // Transform input to your format + const result = transformToMyFormat(input, version); + + // Write to file if output directory specified + if (output) { + await writeFile(join(output, 'documentation.myformat'), result, 'utf-8'); + } + + return result; + }, +}; + +/** + * Transform metadata entries to MyFormat + * @param {Input} entries + * @param {import('semver').SemVer} version + * @returns {string} + */ +function transformToMyFormat(entries, version) { + // Your transformation logic here + return entries + .map(entry => `${entry.api}: ${entry.heading.data.name}`) + .join('\n'); +} +``` + +### Step 3: Register the Generator + +Add your generator to the exports in `src/generators/index.mjs`: + +```javascript +// For public generators (available via CLI) +import myFormat from './my-format/index.mjs'; + +export const publicGenerators = { + 'json-simple': jsonSimple, + 'my-format': myFormat, // Add this + // ... other generators +}; + +// For internal generators (used only as dependencies) +const internalGenerators = { + ast, + metadata, + // ... internal generators +}; +``` + +## Parallel Processing with Workers + +For generators processing large datasets, implement parallel processing using worker threads. + +### Implementing Worker-Based Processing + +```javascript +export default { + name: 'parallel-generator', + version: '1.0.0', + description: 'Processes data in parallel', + dependsOn: 'metadata', + + /** + * Process a chunk of items in a worker thread. + * This function runs in isolated worker threads. + * + * @param {Array} fullInput - Complete input array + * @param {number[]} itemIndices - Indices of items to process + * @param {Object} deps - Serializable dependencies + * @returns {Promise>} + */ + async processChunk(fullInput, itemIndices, deps) { + const results = []; + + // Process only the items at specified indices + for (const idx of itemIndices) { + const item = fullInput[idx]; + const result = await processItem(item, deps); + results.push(result); + } + + return results; + }, + + /** + * Main generation function that orchestrates worker threads + * + * @param {Input} input + * @param {Partial} options + */ + async *generate(input, { worker, output }) { + // Prepare serializable dependencies + const deps = { + version: options.version, + ...someConfig, + }; + + // Stream chunks as they complete + for await (const chunkResult of worker.stream(input, input, deps)) { + // Process chunk result if needed + yield chunkResult; + } + }, +}; +``` + +### Key Points for Worker Processing + +1. **`processChunk` executes in worker threads** - No access to main thread state +2. **Only serializable data** can be passed to workers (no functions, classes, etc.) +3. **`fullInput` and `itemIndices`** - Workers receive full input but only process specified indices +4. **`deps` must be serializable** - Pass only JSON-compatible data + +### When to Use Workers + +Use parallel processing when: + +- Processing many independent items (files, modules, entries) +- Each item takes significant time to process +- Operations are CPU-intensive + +Don't use workers when: + +- Items have dependencies on each other +- Output must be in specific order +- Operation is I/O bound rather than CPU bound + +## Streaming Results + +Generators can yield results as they're produced using async generators: + +```javascript +export default { + name: 'streaming-generator', + version: '1.0.0', + description: 'Streams results as they are ready', + dependsOn: 'metadata', + + async processChunk(fullInput, itemIndices, deps) { + // Process chunk + return results; + }, + + /** + * Generator function that yields results incrementally + */ + async *generate(input, options) { + const { worker } = options; + + // Stream results as workers complete chunks + for await (const chunkResult of worker.stream(input, input, {})) { + // Yield immediately - downstream can start processing + yield chunkResult; + } + }, +}; +``` + +### Benefits of Streaming + +- **Reduced memory usage** - Process data in chunks +- **Earlier downstream starts** - Next generator can begin before this one finishes +- **Better parallelism** - Multiple generators can work simultaneously + +### Non-Streaming Generators + +Some generators must collect all input before processing: + +```javascript +export default { + name: 'batch-generator', + version: '1.0.0', + description: 'Requires all input at once', + dependsOn: 'jsx-ast', + + /** + * Non-streaming - returns Promise instead of AsyncGenerator + */ + async generate(input, options) { + // Collect all input (if dependency is streaming, this waits for completion) + const allData = await collectAll(input); + + // Process everything together + const result = processBatch(allData); + + return result; + }, +}; +``` + +Use non-streaming when: + +- You need all data to make decisions (e.g., code splitting, global analysis) +- Output format requires complete dataset +- Cross-references between items need resolution + +## Generator Dependencies + +### Declaring Dependencies + +```javascript +export default { + name: 'my-generator', + dependsOn: 'metadata', // This generator requires metadata output + + async generate(input, options) { + // input contains the output from 'metadata' generator + }, +}; +``` + +### Dependency Chain Example + +```javascript +// Step 1: Parse markdown to AST +export default { + name: 'ast', + dependsOn: undefined, // No dependency + // Processes raw markdown files +}; + +// Step 2: Extract metadata from AST +export default { + name: 'metadata', + dependsOn: 'ast', // Depends on AST + // Processes AST output +}; + +// Step 3: Generate HTML from metadata +export default { + name: 'html-generator', + dependsOn: 'metadata', // Depends on metadata + // Processes metadata output +}; +``` + +### Multiple Consumers + +Multiple generators can depend on the same generator: + +``` + metadata + ↙ ↓ ↘ + html json man-page +``` + +The framework ensures `metadata` runs once and its output is cached for all consumers. + +## File Output + +### Writing Output Files + +```javascript +import { writeFile, mkdir } from 'node:fs/promises'; +import { join } from 'node:path'; + +async generate(input, options) { + const { output } = options; + + if (!output) { + // Return data without writing + return result; + } + + // Ensure directory exists + await mkdir(output, { recursive: true }); + + // Write single file + await writeFile( + join(output, 'output.txt'), + content, + 'utf-8' + ); + + // Write multiple files + for (const item of items) { + await writeFile( + join(output, `${item.name}.txt`), + item.content, + 'utf-8' + ); + } + + return result; +} +``` + +### Copying Assets + +```javascript +import { cp } from 'node:fs/promises'; +import { join } from 'node:path'; + +async generate(input, options) { + const { output } = options; + + if (output) { + // Copy asset directory + await cp( + new URL('./assets', import.meta.url), + join(output, 'assets'), + { recursive: true } + ); + } + + return result; +} +``` + +### Output Structure + +Organize output clearly: + +``` +output/ +├── index.html +├── api/ +│ ├── fs.html +│ ├── http.html +│ └── path.html +├── assets/ +│ ├── style.css +│ └── script.js +└── data/ + └── search-index.json +``` diff --git a/scripts/compare-builds/legacy-json.mjs b/scripts/compare-builds/legacy-json.mjs new file mode 100644 index 00000000..f221e814 --- /dev/null +++ b/scripts/compare-builds/legacy-json.mjs @@ -0,0 +1,34 @@ +import assert from 'node:assert'; +import { readdir, readFile } from 'node:fs/promises'; +import { join } from 'node:path'; + +import { BASE, HEAD } from './utils.mjs'; + +const files = await readdir(BASE); + +export const details = (summary, diff) => + `
\n${summary}\n\n\`\`\`diff\n${diff}\n\`\`\`\n\n
`; + +const getFileDiff = async file => { + const basePath = join(BASE, file); + const headPath = join(HEAD, file); + + const baseContent = JSON.parse(await readFile(basePath, 'utf-8')); + const headContent = JSON.parse(await readFile(headPath, 'utf-8')); + + try { + assert.deepStrictEqual(baseContent, headContent); + return null; + } catch ({ message }) { + return details(file, message); + } +}; + +const results = await Promise.all(files.map(getFileDiff)); + +const filteredResults = results.filter(Boolean); + +if (filteredResults.length) { + console.log('## `legacy-json` generator'); + console.log(filteredResults.join('\n')); +} diff --git a/scripts/compare-builds/utils.mjs b/scripts/compare-builds/utils.mjs new file mode 100644 index 00000000..5c1d4079 --- /dev/null +++ b/scripts/compare-builds/utils.mjs @@ -0,0 +1,4 @@ +import { fileURLToPath } from 'node:url'; + +export const BASE = fileURLToPath(import.meta.resolve('../../base')); +export const HEAD = fileURLToPath(import.meta.resolve('../../out')); diff --git a/scripts/compare-builds/web.mjs b/scripts/compare-builds/web.mjs index f8ce2a3a..9081d7b9 100644 --- a/scripts/compare-builds/web.mjs +++ b/scripts/compare-builds/web.mjs @@ -1,9 +1,8 @@ import { stat, readdir } from 'node:fs/promises'; import path from 'node:path'; -import { fileURLToPath } from 'node:url'; -const BASE = fileURLToPath(import.meta.resolve('../../out/base')); -const HEAD = fileURLToPath(import.meta.resolve('../../out/head')); +import { BASE, HEAD } from './utils.mjs'; + const UNITS = ['B', 'KB', 'MB', 'GB']; /** @@ -21,131 +20,51 @@ const formatBytes = bytes => { }; /** - * Formats the difference between base and head sizes - * @param {number} base - Base file size in bytes - * @param {number} head - Head file size in bytes - * @returns {string} Formatted diff string (e.g., "+1.50 KB (+10.00%)") - */ -const formatDiff = (base, head) => { - const diff = head - base; - const sign = diff > 0 ? '+' : ''; - const percent = base ? `${sign}${((diff / base) * 100).toFixed(2)}%` : 'N/A'; - return `${sign}${formatBytes(diff)} (${percent})`; -}; - -/** - * Gets all files in a directory with their stats - * @param {string} dir - Directory path to search - * @returns {Promise>} Map of filename to size + * Gets all files in a directory with their sizes + * @param {string} dir - Directory path to scan + * @returns {Promise>} Map of filename to size in bytes */ -const getDirectoryStats = async dir => { +const getStats = async dir => { const files = await readdir(dir); - const entries = await Promise.all( - files.map(async file => [file, (await stat(path.join(dir, file))).size]) - ); - return new Map(entries); -}; - -/** - * Generates a table row for a file - * @param {string} file - Filename - * @param {number} baseSize - Base size in bytes - * @param {number} headSize - Head size in bytes - * @returns {string} Markdown table row - */ -const generateRow = (file, baseSize, headSize) => { - const baseCol = formatBytes(baseSize); - const headCol = formatBytes(headSize); - const diffCol = formatDiff(baseSize, headSize); - - return `| \`${file}\` | ${baseCol} | ${headCol} | ${diffCol} |`; -}; - -/** - * Generates a markdown table - * @param {string[]} files - List of files - * @param {Map} baseStats - Base stats map - * @param {Map} headStats - Head stats map - * @returns {string} Markdown table - */ -const generateTable = (files, baseStats, headStats) => { - const header = '| File | Base | Head | Diff |\n|------|------|------|------|'; - const rows = files.map(f => - generateRow(f, baseStats.get(f), headStats.get(f)) + return new Map( + await Promise.all( + files.map(async f => [f, (await stat(path.join(dir, f))).size]) + ) ); - return `${header}\n${rows.join('\n')}`; }; -/** - * Wraps content in a details/summary element - * @param {string} summary - Summary text - * @param {string} content - Content to wrap - * @returns {string} Markdown details element - */ -const details = (summary, content) => - `
\n${summary}\n\n${content}\n\n
`; - -const [baseStats, headStats] = await Promise.all( - [BASE, HEAD].map(getDirectoryStats) -); - -const allFiles = Array.from( - new Set([...baseStats.keys(), ...headStats.keys()]) -); - -// Filter to only changed files (exist in both and have different sizes) -const changedFiles = allFiles.filter( - f => - baseStats.has(f) && - headStats.has(f) && - baseStats.get(f) !== headStats.get(f) -); - -if (changedFiles.length) { - // Separate HTML files and their matching JS files from other files - const pages = []; - const other = []; - - // Get all HTML base names - const htmlBaseNames = new Set( - changedFiles - .filter(f => path.extname(f) === '.html') - .map(f => path.basename(f, '.html')) - ); - - for (const file of changedFiles) { - const ext = path.extname(file); - const basename = path.basename(file, ext); - - // All HTML files go to pages - if (ext === '.html') { - pages.push(file); - } - // JS files go to pages only if they have a matching HTML file - else if (ext === '.js' && htmlBaseNames.has(basename)) { - pages.push(file); - } - // Everything else goes to other - else { - other.push(file); - } - } - - pages.sort(); - other.sort(); - - console.log('## Web Generator\n'); - - if (other.length) { - console.log(generateTable(other, baseStats, headStats)); - } - - if (pages.length) { - console.log( - details( - `Pages (${pages.filter(f => path.extname(f) === '.html').length})`, - generateTable(pages, baseStats, headStats) - ) - ); - } +// Fetch stats for both directories in parallel +const [baseStats, headStats] = await Promise.all([BASE, HEAD].map(getStats)); + +const didChange = f => + baseStats.has(f) && headStats.has(f) && baseStats.get(f) !== headStats.get(f); + +const toDiffObject = f => ({ + file: f, + base: baseStats.get(f), + head: headStats.get(f), + diff: headStats.get(f) - baseStats.get(f), +}); + +// Find files that exist in both directories but have different sizes, +// then sort by absolute diff (largest changes first) +const changed = [...new Set([...baseStats.keys(), ...headStats.keys()])] + .filter(didChange) + .map(toDiffObject) + .sort((a, b) => Math.abs(b.diff) - Math.abs(a.diff)); + +// Output markdown table if there are changes +if (changed.length) { + const rows = changed.map(({ file, base, head, diff }) => { + const sign = diff > 0 ? '+' : ''; + const percent = `${sign}${((diff / base) * 100).toFixed(2)}%`; + const diffFormatted = `${sign}${formatBytes(diff)} (${percent})`; + + return `| \`${file}\` | ${formatBytes(base)} | ${formatBytes(head)} | ${diffFormatted} |`; + }); + + console.log('## Web Generator'); + console.log('| File | Base | Head | Diff |'); + console.log('|-|-|-|-|'); + console.log(rows.join('\n')); }