Skip to content

Commit e5af6fc

Browse files
feat(RevisionBlock): replace dir="v" to vertical attribute (#2)
1 parent 598376a commit e5af6fc

File tree

25 files changed

+1731
-104
lines changed

25 files changed

+1731
-104
lines changed

.github/workflows/migrate.yml

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
name: Migrate cppreference pages
2+
3+
on:
4+
issues:
5+
types:
6+
- opened
7+
- labeled
8+
workflow_dispatch:
9+
inputs:
10+
issue_number:
11+
description: "Specific issue number to process (optional)"
12+
required: false
13+
type: string
14+
15+
jobs:
16+
migrate:
17+
runs-on: ubuntu-latest
18+
if: |
19+
github.event_name == 'workflow_dispatch' ||
20+
(github.event_name == 'issues' &&
21+
(github.event.action == 'opened' || github.event.action == 'labeled') &&
22+
contains(github.event.issue.labels.*.name, 'migrate-cppref-page'))
23+
permissions:
24+
contents: write
25+
issues: write
26+
pull-requests: write
27+
steps:
28+
- name: Checkout repository
29+
uses: actions/checkout@v6
30+
with:
31+
fetch-depth: 0
32+
token: ${{ secrets.GITHUB_TOKEN }}
33+
34+
- name: Setup Node.js
35+
uses: actions/setup-node@v6
36+
with:
37+
node-version: "22"
38+
cache: "npm"
39+
40+
- name: Install dependencies
41+
run: npm ci
42+
43+
- name: Run migration bot
44+
env:
45+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
46+
OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }}
47+
GITHUB_REPOSITORY: ${{ github.repository }}
48+
GITHUB_REPOSITORY_OWNER: ${{ github.repository_owner }}
49+
run: npm run migrate

migrate/PROMPT.md

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
You are a professional C++ documentation writer. You are now migrating cppreference.com documentation from HTML format to MDX format. During this process, you must adhere to the following rules:
2+
1. Only migrate the format, ensuring that the text of the migrated result is **exactly the same** as the original. Of course, you don't need to process text that was originally invisible.
3+
2. When using the pre-provided component library, import and only import the component you used. Do not try to write your own component. Do not try to write your own component. Do not try to write your own component. DO NOT USE NORMAL HTML ELEMENTS. Replace them with our MDX component.
4+
3. For links, take the URL part, remove `/w/` and the latter part `.html`, and then wrap it with `DocLink`. For example:
5+
If the current path is: `/w/cpp/language/basics.html`
6+
Link: `<a href="declarations.html" title="cpp/language/declarations">declarations</a>`
7+
You should, based on the current link, change it to: `<DocLink src="/cpp/language/declarations">declarations</DocLink>`
8+
4. Currently available components:
9+
```mdx
10+
{{LLM_DOCS}}
11+
```
12+
13+
## Note: The above content is all part of the component library examples. Do not confuse it with the actual content that needs to be migrated.
14+
15+
The original content will be provided in the following format:
16+
// URL: Original page link
17+
Original page content
18+
Please proceed with the migration.

migrate/migrate-bot.js

Lines changed: 257 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,257 @@
1+
#!/usr/bin/env node
2+
3+
import { Octokit } from "@octokit/rest";
4+
import { JSDOM } from "jsdom";
5+
import fs, { readFile } from "fs/promises";
6+
import path from "path";
7+
import { fileURLToPath } from "url";
8+
9+
const __dirname = path.dirname(fileURLToPath(import.meta.url));
10+
11+
const GITHUB_TOKEN = process.env.GITHUB_TOKEN;
12+
const OPENROUTER_API_KEY = process.env.OPENROUTER_API_KEY;
13+
const REPO_OWNER = process.env.GITHUB_REPOSITORY_OWNER || "owner";
14+
const REPO_NAME = process.env.GITHUB_REPOSITORY?.split("/")[1] || "cppdoc";
15+
const LABEL = "migrate-cppref-page";
16+
17+
if (!GITHUB_TOKEN) {
18+
console.error("Missing GITHUB_TOKEN");
19+
process.exit(1);
20+
}
21+
if (!OPENROUTER_API_KEY) {
22+
console.error("Missing OPENROUTER_API_KEY");
23+
process.exit(1);
24+
}
25+
26+
const octokit = new Octokit({ auth: GITHUB_TOKEN });
27+
28+
function extractLink(title) {
29+
const urlRegex = /https?:\/\/en\.cppreference\.com\/w\/[^\s]+/g;
30+
const match = title.match(urlRegex);
31+
return match ? match[0] : null;
32+
}
33+
34+
function hasPRReference(title) {
35+
return /\[#\d+\]/.test(title);
36+
}
37+
38+
async function fetchPageContent(url) {
39+
const response = await fetch(url);
40+
if (!response.ok) {
41+
throw new Error(`Failed to fetch ${url}: ${response.status}`);
42+
}
43+
const html = await response.text();
44+
const dom = new JSDOM(html);
45+
const contentElement = dom.window.document.querySelector("#mw-content-text");
46+
const headingElement = dom.window.document.querySelector("#firstHeading");
47+
if (!contentElement) {
48+
throw new Error("Could not find #mw-content-text");
49+
}
50+
return {
51+
html: contentElement.innerHTML,
52+
title: headingElement?.textContent?.trim() || "",
53+
url,
54+
};
55+
}
56+
57+
async function convertToMDX(html, title, url) {
58+
const prompt = await readFile(__dirname + "/PROMPT.md", "utf8").replace(
59+
"{{LLM_DOCS}}",
60+
await readFile(
61+
__dirname +
62+
"/../src/content/docs/development/guide/component-docs-for-llm.mdx",
63+
"utf8",
64+
),
65+
);
66+
67+
const response = await fetch(
68+
"https://openrouter.ai/api/v1/chat/completions",
69+
{
70+
method: "POST",
71+
headers: {
72+
Authorization: `Bearer ${OPENROUTER_API_KEY}`,
73+
"Content-Type": "application/json",
74+
"HTTP-Referer": "https://github.com/cppdoc/cppdoc",
75+
"X-Title": "CppDoc Migration Bot",
76+
},
77+
body: JSON.stringify({
78+
model: "deepseek/deepseek-v3.2",
79+
messages: [
80+
{ role: "system", content: prompt },
81+
{
82+
role: "user",
83+
content: `
84+
// Convert the following HTML content from cppreference.com into MDX format suitable for CppDoc.
85+
// Title: ${title}
86+
// URL: ${url}
87+
// HTML Content:
88+
${html}
89+
`,
90+
},
91+
],
92+
}),
93+
},
94+
);
95+
96+
if (!response.ok) {
97+
const error = await response.text();
98+
throw new Error(`OpenRouter API error: ${error}`);
99+
}
100+
101+
const data = await response.json();
102+
return data.choices[0].message.content.trim();
103+
}
104+
105+
function getLocalPath(url) {
106+
// https://en.cppreference.com/w/cpp/comments.html -> src/content/docs/cpp/comments.mdx
107+
const match = url.match(/https?:\/\/en\.cppreference\.com\/w\/(.+)\.html$/);
108+
if (!match) {
109+
throw new Error(`无法从URL解析路径: ${url}`);
110+
}
111+
const relative = match[1]; // "cpp/comments"
112+
return path.join(
113+
__dirname,
114+
"..",
115+
"src",
116+
"content",
117+
"docs",
118+
`${relative}.mdx`,
119+
);
120+
}
121+
122+
async function writeMDXFile(filePath, content, title) {
123+
const dir = path.dirname(filePath);
124+
await fs.mkdir(dir, { recursive: true });
125+
const frontmatter = `---
126+
title: ${title}
127+
description: Auto‑generated from cppreference
128+
---\n\n`;
129+
await fs.writeFile(filePath, frontmatter + content, "utf8");
130+
console.log(`写入 ${filePath}`);
131+
}
132+
133+
async function createPullRequest(issue, filePath, url) {
134+
const branchName = `migrate/${issue.number}-${Date.now().toString(36)}`;
135+
const commitMessage = `Migrate ${url}`;
136+
const prTitle = `[#${issue.number}] Migrate ${url}`;
137+
const prBody = `自动迁移自 ${url}\n\nclose #${issue.number}`;
138+
139+
const { execSync } = await import("child_process");
140+
try {
141+
execSync(`git config user.name "github-actions[bot]"`);
142+
execSync(
143+
`git config user.email "github-actions[bot]@users.noreply.github.com"`,
144+
);
145+
execSync(`git checkout -b ${branchName}`);
146+
execSync(`git add "${filePath}"`);
147+
execSync(`git commit -m "${commitMessage}"`);
148+
execSync(`git push origin ${branchName}`);
149+
} catch (error) {
150+
console.error("Git操作失败:", error.message);
151+
throw error;
152+
}
153+
154+
const { data: pr } = await octokit.pulls.create({
155+
owner: REPO_OWNER,
156+
repo: REPO_NAME,
157+
title: prTitle,
158+
body: prBody,
159+
head: branchName,
160+
base: "main",
161+
});
162+
163+
console.log(`创建PR #${pr.number}`);
164+
return pr.number;
165+
}
166+
167+
async function updateIssue(issue, prNumber, error = null) {
168+
const newTitle = `[#${prNumber}] ${issue.title.replace(/\[#\d+\]\s*/, "")}`;
169+
await octokit.issues.update({
170+
owner: REPO_OWNER,
171+
repo: REPO_NAME,
172+
issue_number: issue.number,
173+
title: newTitle,
174+
});
175+
176+
if (error) {
177+
await octokit.issues.createComment({
178+
owner: REPO_OWNER,
179+
repo: REPO_NAME,
180+
issue_number: issue.number,
181+
body: `迁移失败: ${error.message}\n\n已关闭issue。`,
182+
});
183+
await octokit.issues.update({
184+
owner: REPO_OWNER,
185+
repo: REPO_NAME,
186+
issue_number: issue.number,
187+
state: "closed",
188+
});
189+
} else {
190+
await octokit.issues.createComment({
191+
owner: REPO_OWNER,
192+
repo: REPO_NAME,
193+
issue_number: issue.number,
194+
body: `迁移完成!已创建PR [#${prNumber}].`,
195+
});
196+
}
197+
}
198+
199+
async function main() {
200+
console.log("获取带有标签", LABEL, "的issue...");
201+
const { data: issues } = await octokit.issues.listForRepo({
202+
owner: REPO_OWNER,
203+
repo: REPO_NAME,
204+
labels: LABEL,
205+
state: "open",
206+
per_page: 50,
207+
});
208+
209+
console.log(`找到 ${issues.length} 个issue`);
210+
211+
for (const issue of issues) {
212+
console.log(`处理issue #${issue.number}: ${issue.title}`);
213+
try {
214+
if (hasPRReference(issue.title)) {
215+
continue;
216+
}
217+
218+
const url = extractLink(issue.title);
219+
if (!url) {
220+
throw new Error("标题中未找到有效的cppreference链接");
221+
}
222+
223+
// 获取页面内容
224+
console.log(` 获取 ${url}`);
225+
const { html, title } = await fetchPageContent(url);
226+
227+
// 转换为MDX
228+
console.log(` 转换HTML为MDX...`);
229+
const mdx = await convertToMDX(html, title, url);
230+
231+
// 写入文件
232+
const filePath = getLocalPath(url);
233+
console.log(` 写入 ${filePath}`);
234+
await writeMDXFile(filePath, mdx, title);
235+
236+
// 创建PR
237+
console.log(` 创建PR...`);
238+
const prNumber = await createPullRequest(issue, filePath, url);
239+
240+
// 更新issue
241+
console.log(` 更新issue...`);
242+
await updateIssue(issue, prNumber);
243+
244+
console.log(` issue #${issue.number} 完成`);
245+
} catch (error) {
246+
console.error(` issue #${issue.number} 出错:`, error);
247+
await updateIssue(issue, null, error);
248+
}
249+
}
250+
251+
console.log("全部完成");
252+
}
253+
254+
main().catch((err) => {
255+
console.error(err);
256+
process.exit(1);
257+
});

0 commit comments

Comments
 (0)