From 30f0802582b6b7b33372bee21d8b0c656a896d2f Mon Sep 17 00:00:00 2001 From: mdevolde Date: Thu, 18 Dec 2025 18:43:53 +0100 Subject: [PATCH] feat (server): new method to check regex regions of a text --- coverage-badge.svg | 2 +- language_tool_python/server.py | 36 ++++++++++++++++++++++++++++++++++ tests/test_match.py | 23 ++++++++++++++++++++++ 3 files changed, 60 insertions(+), 1 deletion(-) diff --git a/coverage-badge.svg b/coverage-badge.svg index 98519dc..2bf3aef 100644 --- a/coverage-badge.svg +++ b/coverage-badge.svg @@ -1 +1 @@ -coverage: 77.48%coverage77.48% \ No newline at end of file +coverage: 77.67%coverage77.67% \ No newline at end of file diff --git a/language_tool_python/server.py b/language_tool_python/server.py index 0e2ea9c..c9e2fe5 100644 --- a/language_tool_python/server.py +++ b/language_tool_python/server.py @@ -6,6 +6,7 @@ import json import logging import random +import re import socket import subprocess import time @@ -610,6 +611,41 @@ def check(self, text: str) -> List[Match]: matches = response["matches"] return [Match(match, text) for match in matches] + def check_matching_regions( + self, text: str, pattern: str, flags: int = 0 + ) -> List[Match]: + """ + Check only the parts of the text that match a regex pattern. + The returned Match objects can be applied to the original text with + :func:`language_tool_python.utils.correct`. + + :param text: The full text. + :param pattern: Regular expression defining the regions to check + :param flags: Regex flags (re.IGNORECASE, re.MULTILINE, etc.) + :return: List of Match with offsets adjusted to the original text + :rtype: List[Match] + """ + + # Find all matching regions + matches_iter = re.finditer(pattern, text, flags) + regions = [(m.start(), m.group()) for m in matches_iter] + + if not regions: + return [] # No regions to check + + all_matches: List[Match] = [] + + for start_offset, region_text in regions: + region_matches = self.check(region_text) + + # Adjust offsets for the original text + for match in region_matches: + match.offset += start_offset + + all_matches.extend(region_matches) + + return sorted(all_matches, key=lambda m: m.offset) + def _create_params(self, text: str) -> Dict[str, str]: """ Create a dictionary of parameters for the language tool server request. diff --git a/tests/test_match.py b/tests/test_match.py index cd7f46d..364ca16 100644 --- a/tests/test_match.py +++ b/tests/test_match.py @@ -182,3 +182,26 @@ def test_special_char_in_text() -> None: tool.correct(text) == "The sun was setting 🌅, casting a warm glow over the park. Birds chipped softly 🐦 as the day slowly fade into night." ) + + +def test_check_with_regex() -> None: + """ + Test the check_matching_regions method for selective grammar checking. + This test verifies that LanguageTool can limit its grammar checking to specific + regions of text defined by a regular expression, allowing for targeted error detection. + Additionally, the test is performed with some special characters in the text to ensure + correct handling of offsets. + + :raises AssertionError: If the detected matches do not correspond to the specified regions. + """ + import language_tool_python + + with language_tool_python.LanguageTool("en-US") as tool: + text = '❗ He said "❗ I has a problem" but she replied ❗ "It are fine ❗".' + matches = tool.check_matching_regions(text, r'"[^"]*"') + + assert len(matches) == 2 + assert ( + language_tool_python.utils.correct(text, matches) + == '❗ He said "❗ I have a problem" but she replied ❗ "It is fine ❗".' + )