1+ #!/usr/bin/env node
2+
3+ import { Octokit } from "@octokit/rest" ;
4+ import { JSDOM } from "jsdom" ;
5+ import fs , { readFile } from "fs/promises" ;
6+ import path from "path" ;
7+ import { fileURLToPath } from "url" ;
8+
9+ const __dirname = path . dirname ( fileURLToPath ( import . meta. url ) ) ;
10+
11+ const GITHUB_TOKEN = process . env . GITHUB_TOKEN ;
12+ const OPENROUTER_API_KEY = process . env . OPENROUTER_API_KEY ;
13+ const REPO_OWNER = process . env . GITHUB_REPOSITORY_OWNER || "owner" ;
14+ const REPO_NAME = process . env . GITHUB_REPOSITORY ?. split ( "/" ) [ 1 ] || "cppdoc" ;
15+ const LABEL = "migrate-cppref-page" ;
16+
17+ if ( ! GITHUB_TOKEN ) {
18+ console . error ( "Missing GITHUB_TOKEN" ) ;
19+ process . exit ( 1 ) ;
20+ }
21+ if ( ! OPENROUTER_API_KEY ) {
22+ console . error ( "Missing OPENROUTER_API_KEY" ) ;
23+ process . exit ( 1 ) ;
24+ }
25+
26+ const octokit = new Octokit ( { auth : GITHUB_TOKEN } ) ;
27+
28+ function extractLink ( title ) {
29+ const urlRegex = / h t t p s ? : \/ \/ e n \. c p p r e f e r e n c e \. c o m \/ w \/ [ ^ \s ] + / g;
30+ const match = title . match ( urlRegex ) ;
31+ return match ? match [ 0 ] : null ;
32+ }
33+
34+ function hasPRReference ( title ) {
35+ return / \[ # \d + \] / . test ( title ) ;
36+ }
37+
38+ async function fetchPageContent ( url ) {
39+ const response = await fetch ( url ) ;
40+ if ( ! response . ok ) {
41+ throw new Error ( `Failed to fetch ${ url } : ${ response . status } ` ) ;
42+ }
43+ const html = await response . text ( ) ;
44+ const dom = new JSDOM ( html ) ;
45+ const contentElement = dom . window . document . querySelector ( "#mw-content-text" ) ;
46+ const headingElement = dom . window . document . querySelector ( "#firstHeading" ) ;
47+ if ( ! contentElement ) {
48+ throw new Error ( "Could not find #mw-content-text" ) ;
49+ }
50+ return {
51+ html : contentElement . innerHTML ,
52+ title : headingElement ?. textContent ?. trim ( ) || "" ,
53+ url,
54+ } ;
55+ }
56+
57+ async function convertToMDX ( html , title , url ) {
58+ const prompt = await readFile ( __dirname + "/PROMPT.md" , "utf8" )
59+ . replace ( "{{LLM_DOCS}}" , await readFile ( __dirname + "/../src/content/docs/development/guide/component-docs-for-llm.mdx" , "utf8" ) ) ;
60+
61+ const response = await fetch ( "https://openrouter.ai/api/v1/chat/completions" , {
62+ method : "POST" ,
63+ headers : {
64+ Authorization : `Bearer ${ OPENROUTER_API_KEY } ` ,
65+ "Content-Type" : "application/json" ,
66+ "HTTP-Referer" : "https://github.com/cppdoc/cppdoc" ,
67+ "X-Title" : "CppDoc Migration Bot" ,
68+ } ,
69+ body : JSON . stringify ( {
70+ model : "deepseek/deepseek-v3.2" ,
71+ messages : [
72+ { role : "system" , content : prompt } ,
73+ { role : "user" , content :
74+ `
75+ // Convert the following HTML content from cppreference.com into MDX format suitable for CppDoc.
76+ // Title: ${ title }
77+ // URL: ${ url }
78+ // HTML Content:
79+ ${ html }
80+ `
81+ }
82+ ] ,
83+ } ) ,
84+ } ) ;
85+
86+ if ( ! response . ok ) {
87+ const error = await response . text ( ) ;
88+ throw new Error ( `OpenRouter API error: ${ error } ` ) ;
89+ }
90+
91+ const data = await response . json ( ) ;
92+ return data . choices [ 0 ] . message . content . trim ( ) ;
93+ }
94+
95+ function getLocalPath ( url ) {
96+ // https://en.cppreference.com/w/cpp/comments.html -> src/content/docs/cpp/comments.mdx
97+ const match = url . match ( / h t t p s ? : \/ \/ e n \. c p p r e f e r e n c e \. c o m \/ w \/ ( .+ ) \. h t m l $ / ) ;
98+ if ( ! match ) {
99+ throw new Error ( `无法从URL解析路径: ${ url } ` ) ;
100+ }
101+ const relative = match [ 1 ] ; // "cpp/comments"
102+ return path . join ( __dirname , ".." , "src" , "content" , "docs" , `${ relative } .mdx` ) ;
103+ }
104+
105+ async function writeMDXFile ( filePath , content , title ) {
106+ const dir = path . dirname ( filePath ) ;
107+ await fs . mkdir ( dir , { recursive : true } ) ;
108+ const frontmatter = `---
109+ title: ${ title }
110+ description: Auto‑generated from cppreference
111+ ---\n\n` ;
112+ await fs . writeFile ( filePath , frontmatter + content , "utf8" ) ;
113+ console . log ( `写入 ${ filePath } ` ) ;
114+ }
115+
116+ async function createPullRequest ( issue , filePath , url ) {
117+ const branchName = `migrate/${ issue . number } -${ Date . now ( ) . toString ( 36 ) } ` ;
118+ const commitMessage = `Migrate ${ url } ` ;
119+ const prTitle = `[#${ issue . number } ] Migrate ${ url } ` ;
120+ const prBody = `自动迁移自 ${ url } \n\nclose #${ issue . number } ` ;
121+
122+ const { execSync } = await import ( "child_process" ) ;
123+ try {
124+ execSync ( `git config user.name "github-actions[bot]"` ) ;
125+ execSync ( `git config user.email "github-actions[bot]@users.noreply.github.com"` ) ;
126+ execSync ( `git checkout -b ${ branchName } ` ) ;
127+ execSync ( `git add "${ filePath } "` ) ;
128+ execSync ( `git commit -m "${ commitMessage } "` ) ;
129+ execSync ( `git push origin ${ branchName } ` ) ;
130+ } catch ( error ) {
131+ console . error ( "Git操作失败:" , error . message ) ;
132+ throw error ;
133+ }
134+
135+ const { data : pr } = await octokit . pulls . create ( {
136+ owner : REPO_OWNER ,
137+ repo : REPO_NAME ,
138+ title : prTitle ,
139+ body : prBody ,
140+ head : branchName ,
141+ base : "main" ,
142+ } ) ;
143+
144+ console . log ( `创建PR #${ pr . number } ` ) ;
145+ return pr . number ;
146+ }
147+
148+ async function updateIssue ( issue , prNumber , error = null ) {
149+ const newTitle = `[#${ prNumber } ] ${ issue . title . replace ( / \[ # \d + \] \s * / , "" ) } ` ;
150+ await octokit . issues . update ( {
151+ owner : REPO_OWNER ,
152+ repo : REPO_NAME ,
153+ issue_number : issue . number ,
154+ title : newTitle ,
155+ } ) ;
156+
157+ if ( error ) {
158+ await octokit . issues . createComment ( {
159+ owner : REPO_OWNER ,
160+ repo : REPO_NAME ,
161+ issue_number : issue . number ,
162+ body : `迁移失败: ${ error . message } \n\n已关闭issue。` ,
163+ } ) ;
164+ await octokit . issues . update ( {
165+ owner : REPO_OWNER ,
166+ repo : REPO_NAME ,
167+ issue_number : issue . number ,
168+ state : "closed" ,
169+ } ) ;
170+ } else {
171+ await octokit . issues . createComment ( {
172+ owner : REPO_OWNER ,
173+ repo : REPO_NAME ,
174+ issue_number : issue . number ,
175+ body : `迁移完成!已创建PR [#${ prNumber } ].` ,
176+ } ) ;
177+ }
178+ }
179+
180+ async function main ( ) {
181+ console . log ( "获取带有标签" , LABEL , "的issue..." ) ;
182+ const { data : issues } = await octokit . issues . listForRepo ( {
183+ owner : REPO_OWNER ,
184+ repo : REPO_NAME ,
185+ labels : LABEL ,
186+ state : "open" ,
187+ per_page : 50 ,
188+ } ) ;
189+
190+ console . log ( `找到 ${ issues . length } 个issue` ) ;
191+
192+ for ( const issue of issues ) {
193+ console . log ( `处理issue #${ issue . number } : ${ issue . title } ` ) ;
194+ try {
195+ if ( hasPRReference ( issue . title ) ) {
196+ continue ;
197+ }
198+
199+ const url = extractLink ( issue . title ) ;
200+ if ( ! url ) {
201+ throw new Error ( "标题中未找到有效的cppreference链接" ) ;
202+ }
203+
204+ // 获取页面内容
205+ console . log ( ` 获取 ${ url } ` ) ;
206+ const { html, title } = await fetchPageContent ( url ) ;
207+
208+ // 转换为MDX
209+ console . log ( ` 转换HTML为MDX...` ) ;
210+ const mdx = await convertToMDX ( html , title , url ) ;
211+
212+ // 写入文件
213+ const filePath = getLocalPath ( url ) ;
214+ console . log ( ` 写入 ${ filePath } ` ) ;
215+ await writeMDXFile ( filePath , mdx , title ) ;
216+
217+ // 创建PR
218+ console . log ( ` 创建PR...` ) ;
219+ const prNumber = await createPullRequest ( issue , filePath , url ) ;
220+
221+ // 更新issue
222+ console . log ( ` 更新issue...` ) ;
223+ await updateIssue ( issue , prNumber ) ;
224+
225+ console . log ( ` issue #${ issue . number } 完成` ) ;
226+ } catch ( error ) {
227+ console . error ( ` issue #${ issue . number } 出错:` , error ) ;
228+ await updateIssue ( issue , null , error ) ;
229+ }
230+ }
231+
232+ console . log ( "全部完成" ) ;
233+ }
234+
235+ main ( ) . catch ( ( err ) => {
236+ console . error ( err ) ;
237+ process . exit ( 1 ) ;
238+ } ) ;
0 commit comments