Skip to content

Commit 234a15d

Browse files
authored
gh-141004: Improve make check-c-api-docs (GH-143564)
- Gather all documented names into a set in a single pass. This makes the check much faster. - Do not match substrings (e.g. documenting `PyErr_WarnEx` doesn't mean that `PyErr_Warn` is documented) - Consider `PY`-prefixed names (a lot of old macros use this)
1 parent fd6d41b commit 234a15d

File tree

2 files changed

+76
-24
lines changed

2 files changed

+76
-24
lines changed

Tools/check-c-api-docs/ignored_c_api.txt

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,27 @@ Py_LL
4545
Py_SAFE_DOWNCAST
4646
Py_ULL
4747
Py_VA_COPY
48+
PYLONG_BITS_IN_DIGIT
49+
PY_DWORD_MAX
50+
PY_FORMAT_SIZE_T
51+
PY_INT32_T
52+
PY_INT64_T
53+
PY_LITTLE_ENDIAN
54+
PY_LLONG_MAX
55+
PY_LLONG_MIN
56+
PY_LONG_LONG
57+
PY_SIZE_MAX
58+
PY_UINT32_T
59+
PY_UINT64_T
60+
PY_ULLONG_MAX
61+
# patchlevel.h
62+
PYTHON_ABI_STRING
63+
PYTHON_API_STRING
64+
PY_RELEASE_LEVEL_ALPHA
65+
PY_RELEASE_LEVEL_BETA
66+
PY_RELEASE_LEVEL_FINAL
67+
PY_RELEASE_LEVEL_GAMMA
68+
PY_VERSION
4869
# unicodeobject.h
4970
Py_UNICODE_SIZE
5071
# cpython/methodobject.h
@@ -91,3 +112,39 @@ Py_FrozenMain
91112
# cpython/unicodeobject.h
92113
PyUnicode_IS_COMPACT
93114
PyUnicode_IS_COMPACT_ASCII
115+
# pythonrun.h
116+
PyErr_Display
117+
# cpython/objimpl.h
118+
PyObject_GET_WEAKREFS_LISTPTR
119+
# cpython/pythonrun.h
120+
PyOS_Readline
121+
# cpython/warnings.h
122+
PyErr_Warn
123+
# fileobject.h
124+
PY_STDIOTEXTMODE
125+
# structmember.h
126+
PY_WRITE_RESTRICTED
127+
# pythread.h
128+
PY_TIMEOUT_T
129+
PY_TIMEOUT_MAX
130+
# cpython/pyctype.h
131+
PY_CTF_ALNUM
132+
PY_CTF_ALPHA
133+
PY_CTF_DIGIT
134+
PY_CTF_LOWER
135+
PY_CTF_SPACE
136+
PY_CTF_UPPER
137+
PY_CTF_XDIGIT
138+
# cpython/code.h
139+
PY_DEF_EVENT
140+
PY_FOREACH_CODE_EVENT
141+
# cpython/funcobject.h
142+
PY_DEF_EVENT
143+
PY_FOREACH_FUNC_EVENT
144+
# cpython/monitoring.h
145+
PY_MONITORING_EVENT_BRANCH
146+
# cpython/dictobject.h
147+
PY_DEF_EVENT
148+
PY_FOREACH_DICT_EVENT
149+
# cpython/pystats.h
150+
PYSTATS_MAX_UOP_ID

Tools/check-c-api-docs/main.py

Lines changed: 19 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
SIMPLE_MACRO_REGEX = re.compile(r"# *define *(\w+)(\(.+\))? ")
99
SIMPLE_INLINE_REGEX = re.compile(r"static inline .+( |\n)(\w+)")
1010
SIMPLE_DATA_REGEX = re.compile(r"PyAPI_DATA\(.+\) (\w+)")
11+
API_NAME_REGEX = re.compile(r'\bP[yY][a-zA-Z0-9_]+')
1112

1213
CPYTHON = Path(__file__).parent.parent.parent
1314
INCLUDE = CPYTHON / "Include"
@@ -72,24 +73,10 @@ def found_ignored_documented(singular: bool) -> str:
7273
)
7374

7475

75-
def is_documented(name: str) -> bool:
76-
"""
77-
Is a name present in the C API documentation?
78-
"""
79-
for path in C_API_DOCS.iterdir():
80-
if path.is_dir():
81-
continue
82-
if path.suffix != ".rst":
83-
continue
84-
85-
text = path.read_text(encoding="utf-8")
86-
if name in text:
87-
return True
88-
89-
return False
90-
91-
92-
def scan_file_for_docs(filename: str, text: str) -> tuple[list[str], list[str]]:
76+
def scan_file_for_docs(
77+
filename: str,
78+
text: str,
79+
names: set[str]) -> tuple[list[str], list[str]]:
9380
"""
9481
Scan a header file for C API functions.
9582
"""
@@ -98,22 +85,22 @@ def scan_file_for_docs(filename: str, text: str) -> tuple[list[str], list[str]]:
9885
colors = _colorize.get_colors()
9986

10087
def check_for_name(name: str) -> None:
101-
documented = is_documented(name)
88+
documented = name in names
10289
if documented and (name in IGNORED):
10390
documented_ignored.append(name)
10491
elif not documented and (name not in IGNORED):
10592
undocumented.append(name)
10693

10794
for function in SIMPLE_FUNCTION_REGEX.finditer(text):
10895
name = function.group(2)
109-
if not name.startswith("Py"):
96+
if not API_NAME_REGEX.fullmatch(name):
11097
continue
11198

11299
check_for_name(name)
113100

114101
for macro in SIMPLE_MACRO_REGEX.finditer(text):
115102
name = macro.group(1)
116-
if not name.startswith("Py"):
103+
if not API_NAME_REGEX.fullmatch(name):
117104
continue
118105

119106
if "(" in name:
@@ -123,14 +110,14 @@ def check_for_name(name: str) -> None:
123110

124111
for inline in SIMPLE_INLINE_REGEX.finditer(text):
125112
name = inline.group(2)
126-
if not name.startswith("Py"):
113+
if not API_NAME_REGEX.fullmatch(name):
127114
continue
128115

129116
check_for_name(name)
130117

131118
for data in SIMPLE_DATA_REGEX.finditer(text):
132119
name = data.group(1)
133-
if not name.startswith("Py"):
120+
if not API_NAME_REGEX.fullmatch(name):
134121
continue
135122

136123
check_for_name(name)
@@ -152,6 +139,14 @@ def check_for_name(name: str) -> None:
152139

153140

154141
def main() -> None:
142+
print("Gathering C API names from docs...")
143+
names = set()
144+
for path in C_API_DOCS.glob('**/*.rst'):
145+
text = path.read_text(encoding="utf-8")
146+
for name in API_NAME_REGEX.findall(text):
147+
names.add(name)
148+
print(f"Got {len(names)} names!")
149+
155150
print("Scanning for undocumented C API functions...")
156151
files = [*INCLUDE.iterdir(), *(INCLUDE / "cpython").iterdir()]
157152
all_missing: list[str] = []
@@ -162,7 +157,7 @@ def main() -> None:
162157
continue
163158
assert file.exists()
164159
text = file.read_text(encoding="utf-8")
165-
missing, ignored = scan_file_for_docs(str(file.relative_to(INCLUDE)), text)
160+
missing, ignored = scan_file_for_docs(str(file.relative_to(INCLUDE)), text, names)
166161
all_found_ignored += ignored
167162
all_missing += missing
168163

0 commit comments

Comments
 (0)