11import { Octokit } from "@octokit/rest" ;
2- import { JSDOM } from "jsdom " ;
2+ import { parseHTML } from "linkedom " ;
33import fs , { readFile } from "fs/promises" ;
44import path , { join } from "path" ;
55import { fileURLToPath } from "url" ;
66import { execSync , spawnSync } from "child_process" ;
77import { visualizeTextDiff } from "./text-diff-visualizer" ;
8- import { getTextFromDOM } from "./text-from-element " ;
8+ import { convert } from "html-to-text " ;
99
1010const __dirname = path . dirname ( fileURLToPath ( import . meta. url ) ) ;
1111
@@ -68,8 +68,8 @@ async function fetchPageContent(
6868 throw new Error ( `Failed to fetch ${ url } : ${ response . status } ` ) ;
6969 }
7070 const html = await response . text ( ) ;
71- const dom = new JSDOM ( html ) ;
72- const contentElement = dom . window . document . querySelector ( "#mw-content-text" ) ;
71+ const document = parseHTML ( html ) . document ;
72+ const contentElement = document . querySelector ( "#mw-content-text" ) ;
7373
7474 const selectorsToRemove = [
7575 ".t-navbar" ,
@@ -81,15 +81,15 @@ async function fetchPageContent(
8181 const elements = contentElement ?. querySelectorAll ( selector ) ;
8282 elements ?. forEach ( ( el ) => el . remove ( ) ) ;
8383 }
84- const headingElement = dom . window . document . querySelector ( "#firstHeading" ) ;
84+ const headingElement = document . querySelector ( "#firstHeading" ) ;
8585 if ( ! contentElement ) {
8686 throw new Error ( "Could not find #mw-content-text" ) ;
8787 }
8888 return {
8989 html : contentElement . innerHTML ,
9090 title : headingElement ?. textContent ?. trim ( ) || "" ,
9191 url,
92- innerText : getTextFromDOM ( contentElement ) ,
92+ innerText : ( contentElement as HTMLDivElement ) . innerText ,
9393 } ;
9494}
9595
@@ -102,7 +102,7 @@ async function convertToMDX(
102102 "{{LLM_DOCS}}" ,
103103 await readFile (
104104 __dirname +
105- "/../src/content/docs/development/guide/component-docs-for-llm.mdx" ,
105+ "/../src/content/docs/development/guide/component-docs-for-llm.mdx" ,
106106 "utf8"
107107 )
108108 ) ;
@@ -304,16 +304,16 @@ async function createPullRequest(
304304
305305 const newInnerText = await readFile ( getRelativeHTMLPath ( url ) , "utf8" )
306306 . then ( ( data ) => {
307- const dom = new JSDOM ( data ) ;
308- const contentElement = dom . window . document . querySelector ( "main" ) ;
307+ const document = parseHTML ( data ) . document ;
308+ const contentElement = document . querySelector ( "main" ) ;
309309 const selectorsToRemove = [ ".sl-anchor-link" ] ;
310310 for ( const selector of selectorsToRemove ) {
311311 const elements = contentElement ?. querySelectorAll ( selector ) ;
312312 elements ?. forEach ( ( el ) => el . remove ( ) ) ;
313313 }
314314
315315 if ( ! contentElement ) return "" ;
316- return getTextFromDOM ( contentElement ) ;
316+ return ( contentElement as HTMLDivElement ) . innerText ;
317317 } )
318318 . catch ( ( ) => "" ) ;
319319
@@ -455,11 +455,11 @@ async function main() {
455455 if ( res . status !== 0 ) {
456456 throw new Error (
457457 "构建失败,可能生成的MDX有问题:" +
458- res . stderr ?. toString ( ) +
459- res . stdout ?. toString ( ) +
460- res . error ?. toString ( ) +
461- " exit code " +
462- res . status
458+ res . stderr ?. toString ( ) +
459+ res . stdout ?. toString ( ) +
460+ res . error ?. toString ( ) +
461+ " exit code " +
462+ res . status
463463 ) ;
464464 }
465465
0 commit comments