Skip to content

Commit 3e99049

Browse files
authored
Merge pull request #82 from AlexWaygood/more-regex-compilations
2 parents 1e8c3f9 + 61c449b commit 3e99049

File tree

3 files changed

+58
-26
lines changed

3 files changed

+58
-26
lines changed

sphinxlint/checkers.py

Lines changed: 40 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,10 @@ def check_missing_backtick_after_role(file, lines, options=None):
6363
yield paragraph_lno + error_offset, f"role missing closing backtick: {error.group(0)!r}"
6464

6565

66+
_RST_ROLE_RE = re.compile("``.+?``(?!`).", flags=re.DOTALL)
67+
_END_STRING_SUFFIX_RE = re.compile(rst.END_STRING_SUFFIX)
68+
69+
6670
@checker(".rst", ".po")
6771
def check_missing_space_after_literal(file, lines, options=None):
6872
r"""Search for inline literals immediately followed by a character.
@@ -74,8 +78,8 @@ def check_missing_space_after_literal(file, lines, options=None):
7478
if paragraph.count("|") > 4:
7579
return # we don't handle tables yet.
7680
paragraph = clean_paragraph(paragraph)
77-
for role in re.finditer("``.+?``(?!`).", paragraph, flags=re.DOTALL):
78-
if not re.match(rst.END_STRING_SUFFIX, role.group(0)[-1]):
81+
for role in _RST_ROLE_RE.finditer(paragraph):
82+
if not _END_STRING_SUFFIX_RE.match(role[0][-1]):
7983
error_offset = paragraph[: role.start()].count("\n")
8084
yield (
8185
paragraph_lno + error_offset,
@@ -84,6 +88,9 @@ def check_missing_space_after_literal(file, lines, options=None):
8488
)
8589

8690

91+
_LONE_DOUBLE_BACKTICK_RE = re.compile("(?<!`)``(?!`)")
92+
93+
8794
@checker(".rst", ".po")
8895
def check_unbalanced_inline_literals_delimiters(file, lines, options=None):
8996
r"""Search for unbalanced inline literals delimiters.
@@ -95,14 +102,18 @@ def check_unbalanced_inline_literals_delimiters(file, lines, options=None):
95102
if paragraph.count("|") > 4:
96103
return # we don't handle tables yet.
97104
paragraph = clean_paragraph(paragraph)
98-
for lone_double_backtick in re.finditer("(?<!`)``(?!`)", paragraph):
105+
for lone_double_backtick in _LONE_DOUBLE_BACKTICK_RE.finditer(paragraph):
99106
error_offset = paragraph[: lone_double_backtick.start()].count("\n")
100107
yield (
101108
paragraph_lno + error_offset,
102109
"found an unbalanced inline literal markup.",
103110
)
104111

105112

113+
_ends_with_role_tag = re.compile(rst.ROLE_TAG + "$").search
114+
_starts_with_role_tag = re.compile("^" + rst.ROLE_TAG).search
115+
116+
106117
@checker(".rst", ".po", enabled=False)
107118
def check_default_role(file, lines, options=None):
108119
"""Search for default roles (but they are allowed in many projects).
@@ -121,12 +132,12 @@ def check_default_role(file, lines, options=None):
121132
if (stripped_line.startswith("|") and stripped_line.endswith("|") and
122133
stripped_line.count("|") >= 4 and "|" in match.group(0)):
123134
return # we don't handle tables yet.
124-
if re.search(rst.ROLE_TAG + "$", before_match):
125-
# It's not a default role: it starts with a tag.
126-
continue
127-
if re.search("^" + rst.ROLE_TAG, after_match):
135+
if _ends_with_role_tag(before_match):
128136
# It's not a default role: it ends with a tag.
129137
continue
138+
if _starts_with_role_tag(after_match):
139+
# It's not a default role: it starts with a tag.
140+
continue
130141
if match.group(0).startswith("``") and match.group(0).endswith("``"):
131142
# It's not a default role: it's an inline literal.
132143
continue
@@ -274,7 +285,7 @@ def check_role_with_double_backticks(file, lines, options=None):
274285
if inline_literal is None:
275286
break
276287
before = paragraph[: inline_literal.start()]
277-
if re.search(rst.ROLE_TAG + "$", before):
288+
if _ends_with_role_tag(before):
278289
error_offset = paragraph[: inline_literal.start()].count("\n")
279290
yield paragraph_lno + error_offset, "role use a single backtick, double backtick found."
280291
paragraph = (
@@ -325,6 +336,9 @@ def check_missing_space_before_default_role(file, lines, options=None):
325336
)
326337

327338

339+
_HYPERLINK_REFERENCE_RE = re.compile(r"\S* <https?://[^ ]+>`_")
340+
341+
328342
@checker(".rst", ".po")
329343
def check_hyperlink_reference_missing_backtick(file, lines, options=None):
330344
"""Search for missing backticks in front of hyperlink references.
@@ -337,7 +351,7 @@ def check_hyperlink_reference_missing_backtick(file, lines, options=None):
337351
return # we don't handle tables yet.
338352
paragraph = clean_paragraph(paragraph)
339353
paragraph = rst.INTERPRETED_TEXT_RE.sub("", paragraph)
340-
for hyperlink_reference in re.finditer(r"\S* <https?://[^ ]+>`_", paragraph):
354+
for hyperlink_reference in _HYPERLINK_REFERENCE_RE.finditer(paragraph):
341355
error_offset = paragraph[: hyperlink_reference.start()].count("\n")
342356
context = hyperlink_reference.group(0)
343357
yield (
@@ -391,6 +405,12 @@ def check_missing_final_newline(file, lines, options=None):
391405
yield len(lines), "No newline at end of file."
392406

393407

408+
_is_long_interpreted_text = re.compile(r"^\s*\W*(:(\w+:)+)?`.*`\W*$").match
409+
_starts_with_directive_or_hyperlink = re.compile(r"^\s*\.\. ").match
410+
_starts_with_anonymous_hyperlink = re.compile(r"^\s*__ ").match
411+
_is_very_long_string_literal = re.compile(r"^\s*``[^`]+``$").match
412+
413+
394414
@checker(".rst", ".po", enabled=False, rst_only=True)
395415
def check_line_too_long(file, lines, options=None):
396416
"""Check for line length; this checker is not run by default."""
@@ -399,13 +419,13 @@ def check_line_too_long(file, lines, options=None):
399419
if len(line) - 1 > options.max_line_length:
400420
if line.lstrip()[0] in "+|":
401421
continue # ignore wide tables
402-
if re.match(r"^\s*\W*(:(\w+:)+)?`.*`\W*$", line):
422+
if _is_long_interpreted_text(line):
403423
continue # ignore long interpreted text
404-
if re.match(r"^\s*\.\. ", line):
424+
if _starts_with_directive_or_hyperlink(line):
405425
continue # ignore directives and hyperlink targets
406-
if re.match(r"^\s*__ ", line):
426+
if _starts_with_anonymous_hyperlink(line):
407427
continue # ignore anonymous hyperlink targets
408-
if re.match(r"^\s*``[^`]+``$", line):
428+
if _is_very_long_string_literal(line):
409429
continue # ignore a very long literal string
410430
yield lno + 1, f"Line too long ({len(line)-1}/{options.max_line_length})"
411431

@@ -438,6 +458,9 @@ def check_triple_backticks(file, lines, options=None):
438458
yield lno + 1, "There's no rst syntax using triple backticks"
439459

440460

461+
_has_bad_dedent = re.compile(" [^ ].*::$").match
462+
463+
441464
@checker(".rst", ".po", rst_only=False)
442465
def check_bad_dedent(file, lines, options=None):
443466
"""Check for mis-alignment in indentation in code blocks.
@@ -455,19 +478,20 @@ def check_bad_dedent(file, lines, options=None):
455478

456479
def check_block(block_lineno, block):
457480
for lineno, line in enumerate(block.splitlines()):
458-
if re.match(" [^ ].*::$", line):
481+
if _has_bad_dedent(line):
459482
errors.append((block_lineno + lineno, "Bad dedent in block"))
460483

461484
list(hide_non_rst_blocks(lines, hidden_block_cb=check_block))
462485
yield from errors
463486

464487

465-
_DANGLING_HYPHEN_RE = re.compile(r".*[a-z]-$")
488+
_has_dangling_hyphen = re.compile(r".*[a-z]-$").match
489+
466490

467491
@checker(".rst", rst_only=True)
468492
def check_dangling_hyphen(file, lines, options):
469493
"""Check for lines ending in a hyphen."""
470494
for lno, line in enumerate(lines):
471495
stripped_line = line.rstrip("\n")
472-
if _DANGLING_HYPHEN_RE.match(stripped_line):
496+
if _has_dangling_hyphen(stripped_line):
473497
yield lno + 1, f"Line ends with dangling hyphen"

sphinxlint/utils.py

Lines changed: 17 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,7 @@ def is_multiline_non_rst_block(line):
151151

152152

153153
_NON_RST_BLOCKS_CACHE = {}
154+
_ZERO_OR_MORE_SPACES_RE = re.compile(" *")
154155

155156

156157
def hide_non_rst_blocks(lines, hidden_block_cb=None):
@@ -172,7 +173,7 @@ def hide_non_rst_blocks(lines, hidden_block_cb=None):
172173
output = []
173174
for lineno, line in enumerate(lines, start=1):
174175
if in_literal is not None:
175-
current_indentation = len(re.match(" *", line).group(0))
176+
current_indentation = len(_ZERO_OR_MORE_SPACES_RE.match(line)[0])
176177
if current_indentation > in_literal or line == "\n":
177178
excluded_lines.append(line if line == "\n" else line[in_literal:])
178179
line = "\n" # Hiding line
@@ -182,12 +183,12 @@ def hide_non_rst_blocks(lines, hidden_block_cb=None):
182183
hidden_block_cb(block_line_start, "".join(excluded_lines))
183184
excluded_lines = []
184185
if in_literal is None and is_multiline_non_rst_block(line):
185-
in_literal = len(re.match(" *", line).group(0))
186+
in_literal = len(_ZERO_OR_MORE_SPACES_RE.match(line)[0])
186187
block_line_start = lineno
187188
assert not excluded_lines
188189
if (
189-
_COMMENT_RE.search(line)
190-
and type_of_explicit_markup(line) == "comment"
190+
type_of_explicit_markup(line) == "comment"
191+
and _COMMENT_RE.search(line)
191192
):
192193
line = "\n"
193194
output.append(line)
@@ -199,19 +200,26 @@ def hide_non_rst_blocks(lines, hidden_block_cb=None):
199200
return output
200201

201202

203+
_starts_with_directive_marker = re.compile(rf"\.\. {rst.ALL_DIRECTIVES}::").match
204+
_starts_with_footnote_marker = re.compile(r"\.\. \[[0-9]+\] ").match
205+
_starts_with_citation_marker = re.compile(r"\.\. \[[^\]]+\] ").match
206+
_starts_with_target = re.compile(r"\.\. _.*[^_]: ").match
207+
_starts_with_substitution_definition = re.compile(r"\.\. \|[^\|]*\| ").match
208+
209+
202210
@lru_cache()
203211
def type_of_explicit_markup(line):
204212
"""Tell apart various explicit markup blocks."""
205213
line = line.lstrip()
206-
if re.match(rf"\.\. {rst.ALL_DIRECTIVES}::", line):
214+
if _starts_with_directive_marker(line):
207215
return "directive"
208-
if re.match(r"\.\. \[[0-9]+\] ", line):
216+
if _starts_with_footnote_marker(line):
209217
return "footnote"
210-
if re.match(r"\.\. \[[^\]]+\] ", line):
218+
if _starts_with_citation_marker(line):
211219
return "citation"
212-
if re.match(r"\.\. _.*[^_]: ", line):
220+
if _starts_with_target(line):
213221
return "target"
214-
if re.match(r"\.\. \|[^\|]*\| ", line):
222+
if _starts_with_substitution_definition(line):
215223
return "substitution_definition"
216224
return "comment"
217225

tests/test_sphinxlint.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ def test_sphinxlint_shall_not_pass(file, expected_errors, capsys):
6464
assert expected_error in out
6565
number_of_expected_errors = len(expected_errors)
6666
number_of_reported_errors = len(out.splitlines())
67-
assert number_of_expected_errors == number_of_reported_errors
67+
assert number_of_expected_errors == number_of_reported_errors, f"{number_of_reported_errors=}, {out=}"
6868

6969

7070
@pytest.mark.parametrize("file", [str(FIXTURE_DIR / "paragraphs.rst")])

0 commit comments

Comments
 (0)