Merge pull request #79 from AlexWaygood/caches

hugovk · web-flow · commit dc0514d6a09c · 2023-10-11T16:14:43.000+02:00
diff --git a/sphinxlint/sphinxlint.py b/sphinxlint/sphinxlint.py
@@ -36,7 +36,7 @@ def check_text(filename, text, checkers, options=None):
     errors = []
     ext = splitext(filename)[1]
     checkers = {checker for checker in checkers if ext in checker.suffixes}
-    lines = text.splitlines(keepends=True)
+    lines = tuple(text.splitlines(keepends=True))
     if any(checker.rst_only for checker in checkers):
         lines_with_rst_only = hide_non_rst_blocks(lines)
     for check in checkers:
diff --git a/sphinxlint/utils.py b/sphinxlint/utils.py
@@ -1,4 +1,6 @@
 """Just a bunch of utility functions for sphinxlint."""
+from functools import lru_cache
+
 import regex as re
 from polib import pofile
 
@@ -27,6 +29,7 @@ def _clean_heuristic(paragraph, regex):
         paragraph = paragraph[: candidate.start()] + paragraph[candidate.end() :]
 
 
+@lru_cache()
 def clean_paragraph(paragraph):
     """Removes all good constructs, so detectors can focus on bad ones.
 
@@ -42,6 +45,7 @@ def clean_paragraph(paragraph):
     return paragraph.replace("\x00", "\\")
 
 
+@lru_cache()
 def escape2null(text):
     r"""Return a string with escape-backslashes converted to nulls.
 
@@ -75,10 +79,12 @@ def escape2null(text):
         start = found + 2  # skip character after escape
 
 
+@lru_cache()
 def paragraphs(lines):
     """Yield (paragraph_line_no, paragraph_text) pairs describing
     paragraphs of the given lines.
     """
+    output = []
     paragraph = []
     paragraph_lno = 1
     for lno, line in enumerate(lines, start=1):
@@ -88,10 +94,11 @@ def paragraphs(lines):
                 paragraph_lno = lno
             paragraph.append(line)
         elif paragraph:
-            yield paragraph_lno, "".join(paragraph)
+            output.append((paragraph_lno, "".join(paragraph)))
             paragraph = []
     if paragraph:
-        yield paragraph_lno, "".join(paragraph)
+        output.append((paragraph_lno, "".join(paragraph)))
+    return tuple(output)
 
 
 def looks_like_glued(match):
@@ -143,12 +150,22 @@ def is_multiline_non_rst_block(line):
     return False
 
 
+_NON_RST_BLOCKS_CACHE = {}
+
+
 def hide_non_rst_blocks(lines, hidden_block_cb=None):
     """Filters out literal, comments, code blocks, ...
 
     The filter actually replace "removed" lines by empty lines, so the
     line numbering still make sense.
+
+    This function is quite hot, so we cache the returned value where possible.
+    The function is only "pure" when hidden_block_cb is None, however,
+    so we can only safely cache the output when hidden_block_cb=None.
     """
+    lines = tuple(lines)
+    if hidden_block_cb is None and lines in _NON_RST_BLOCKS_CACHE:
+        return _NON_RST_BLOCKS_CACHE[lines]
     in_literal = None
     excluded_lines = []
     block_line_start = None
@@ -176,9 +193,13 @@ def hide_non_rst_blocks(lines, hidden_block_cb=None):
         output.append(line)
     if excluded_lines and hidden_block_cb:
         hidden_block_cb(block_line_start, "".join(excluded_lines))
+    output = tuple(output)
+    if hidden_block_cb is None:
+        _NON_RST_BLOCKS_CACHE[lines] = output
     return output
 
 
+@lru_cache()
 def type_of_explicit_markup(line):
     """Tell apart various explicit markup blocks."""
     line = line.lstrip()
diff --git a/tests/test_sphinxlint.py b/tests/test_sphinxlint.py
@@ -70,7 +70,7 @@ def test_sphinxlint_shall_not_pass(file, expected_errors, capsys):
 @pytest.mark.parametrize("file", [str(FIXTURE_DIR / "paragraphs.rst")])
 def test_paragraphs(file):
     with open(file) as f:
-        lines = f.readlines()
+        lines = tuple(f.readlines())
     actual = paragraphs(lines)
     for lno, para in actual:
         firstpline = para.splitlines(keepends=True)[0]