Skip to content

Commit a279924

Browse files
authored
Merge pull request jxmorris12#122 from mdevolde/config_file_2
refactor: rewrote config_file.py (better handling and transformation of config values), moved exceptions to a dedicated file, added missing config entries to README
2 parents 934d9c4 + ba7bec8 commit a279924

File tree

9 files changed

+237
-115
lines changed

9 files changed

+237
-115
lines changed

README.md

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -209,7 +209,7 @@ tool = language_tool_python.LanguageTool('en-US', config={ 'maxTextLength': 100
209209

210210
### Full list of configuration options
211211

212-
Here's a full list of configuration options. See the LanguageTool [HTTPServerConfig](https://languagetool.org/development/api/org/languagetool/server/HTTPServerConfig.html) documentation for details.
212+
Here's a full list of configuration options:
213213

214214
```
215215
'maxTextLength' - maximum text length, longer texts will cause an error (optional)
@@ -240,6 +240,18 @@ Here's a full list of configuration options. See the LanguageTool [HTTPServerCon
240240
'maxPipelinePoolSize' - cache size if 'pipelineCaching' is set
241241
'pipelineExpireTimeInSeconds' - time after which pipeline cache items expire
242242
'pipelinePrewarming' - set to 'true' to fill pipeline cache on start (can slow down start a lot)
243+
'trustXForwardForHeader' - set this to 'true' if you run the server behind a reverse proxy and want the
244+
request limit to work on the original IP addresses provided by the 'X-forwarded-for' HTTP header,
245+
usually set by the proxy
246+
'suggestionsEnabled' - if suggestions should be generated for spell check errors (optional, default: true)
247+
248+
Spellcheck-only languages: You can add simple spellcheck-only support for languages that LT doesn't
249+
support by defining two optional properties:
250+
'lang-xx' - set name of the language, use language code instead of 'xx', e.g. lang-tr=Turkish
251+
'lang-xx-dictPath' - absolute path to the hunspell .dic file, use language code instead of 'xx', e.g.
252+
lang-tr-dictPath=/path/to/tr.dic. Note that the same directory also needs to
253+
contain a common_words.txt file with the most common 10,000 words (used for
254+
better language detection)
243255
```
244256

245257
## Installation

language_tool_python/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,10 @@
66
"LanguageTag",
77
"Match",
88
"utils",
9+
"exceptions",
910
]
1011

11-
from . import utils
12+
from . import exceptions, utils
1213
from .language_tag import LanguageTag
1314
from .match import Match
1415
from .server import LanguageTool, LanguageToolPublicAPI

language_tool_python/__main__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,8 @@
99

1010
import toml
1111

12+
from .exceptions import LanguageToolError
1213
from .server import LanguageTool
13-
from .utils import LanguageToolError
1414

1515
try:
1616
__version__ = version("language_tool_python")

language_tool_python/config_file.py

Lines changed: 163 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -3,36 +3,133 @@
33
import atexit
44
import os
55
import tempfile
6-
from typing import Any, Dict
7-
8-
# Allowed configuration keys for LanguageTool.
9-
ALLOWED_CONFIG_KEYS = {
10-
"maxTextLength",
11-
"maxTextHardLength",
12-
"maxCheckTimeMillis",
13-
"maxErrorsPerWordRate",
14-
"maxSpellingSuggestions",
15-
"maxCheckThreads",
16-
"cacheSize",
17-
"cacheTTLSeconds",
18-
"requestLimit",
19-
"requestLimitInBytes",
20-
"timeoutRequestLimit",
21-
"requestLimitPeriodInSeconds",
22-
"languageModel",
23-
"fasttextModel",
24-
"fasttextBinary",
25-
"maxWorkQueueSize",
26-
"rulesFile",
27-
"blockedReferrers",
28-
"premiumOnly",
29-
"disabledRuleIds",
30-
"pipelineCaching",
31-
"maxPipelinePoolSize",
32-
"pipelineExpireTimeInSeconds",
33-
"pipelinePrewarming",
34-
"trustXForwardForHeader",
35-
"suggestionsEnabled",
6+
from dataclasses import dataclass
7+
from pathlib import Path
8+
from typing import Any, Callable, Dict, Iterable, Optional, Union
9+
10+
from .exceptions import PathError
11+
12+
13+
@dataclass(frozen=True)
14+
class OptionSpec:
15+
"""
16+
Specification for a configuration option.
17+
18+
This class defines the structure and behavior of a configuration option,
19+
including its type constraints, encoding mechanism, and optional validation.
20+
21+
Attributes:
22+
py_types (Union[type, tuple[type, ...]]): The Python type(s) that this option accepts.
23+
encoder (Callable[[Any], str]): A callable that converts the option value to its string representation.
24+
validator (Optional[Callable[[Any], None]]): An optional callable that validates the option value.
25+
26+
.. note::
27+
This class is frozen (immutable) to ensure configuration specifications
28+
remain constant throughout the application lifecycle.
29+
"""
30+
31+
py_types: Union[type, tuple[type, ...]]
32+
encoder: Callable[[Any], str]
33+
validator: Optional[Callable[[Any], None]] = None
34+
35+
36+
def _bool_encoder(v: Any) -> str:
37+
"""
38+
Encode a value as a lowercase boolean string.
39+
40+
Converts any value to a boolean and returns its string representation
41+
in lowercase format ('true' or 'false').
42+
43+
:param v: The value to be converted to a boolean string.
44+
:type v: Any
45+
:return: A lowercase string representation of the boolean value ('true' or 'false').
46+
:rtype: str
47+
"""
48+
return str(bool(v)).lower()
49+
50+
51+
def _comma_list_encoder(v: Any) -> str:
52+
"""
53+
Encode a value as a comma-separated list string.
54+
55+
Converts a value into a string representation suitable for comma-separated
56+
list configuration options. If the input is already a string, it is returned
57+
as-is. If it's an iterable, its elements are converted to strings and joined
58+
with commas.
59+
60+
:param v: The value to encode. Can be a string or an iterable of values.
61+
:type v: Any
62+
:return: A comma-separated string representation of the input value.
63+
:rtype: str
64+
:raises TypeError: If the input is neither a string nor an iterable.
65+
"""
66+
if isinstance(v, str):
67+
return v
68+
if isinstance(v, Iterable):
69+
return ",".join(str(x) for x in v)
70+
raise TypeError("expected string or iterable for comma-list option")
71+
72+
73+
def _path_encoder(v: Any) -> str:
74+
"""
75+
Encode a path value to a string.
76+
Converts the input to a Path object, then to a string, and escapes all
77+
backslashes by doubling them. This is useful for windows file paths and
78+
other contexts where backslashes need to be escaped. (because they will
79+
be used by LT java binary)
80+
81+
:param v: The path value to encode. Can be any type that Path accepts
82+
(str, Path, etc.).
83+
:type v: Any
84+
:return: The path as a string with escaped backslashes (e.g., "C:\\\\Users\\\\file").
85+
:rtype: str
86+
"""
87+
return str(Path(v)).replace("\\", "\\\\")
88+
89+
90+
def _path_validator(v: Any) -> None:
91+
"""
92+
Validate that a given path exists and is a file.
93+
94+
:param v: The path to validate, which will be converted to a Path object
95+
:type v: Any
96+
:raises PathError: If the path does not exist
97+
:raises PathError: If the path exists but is not a file
98+
"""
99+
p = Path(v)
100+
if not p.exists():
101+
raise PathError(f"path does not exist: {p}")
102+
if not p.is_file():
103+
raise PathError(f"path is not a file: {p}")
104+
105+
106+
CONFIG_SCHEMA: Dict[str, OptionSpec] = {
107+
"maxTextLength": OptionSpec(int, lambda v: str(int(v))),
108+
"maxTextHardLength": OptionSpec(int, lambda v: str(int(v))),
109+
"maxCheckTimeMillis": OptionSpec(int, lambda v: str(int(v))),
110+
"maxErrorsPerWordRate": OptionSpec((int, float), lambda v: str(float(v))),
111+
"maxSpellingSuggestions": OptionSpec(int, lambda v: str(int(v))),
112+
"maxCheckThreads": OptionSpec(int, lambda v: str(int(v))),
113+
"cacheSize": OptionSpec(int, lambda v: str(int(v))),
114+
"cacheTTLSeconds": OptionSpec(int, lambda v: str(int(v))),
115+
"requestLimit": OptionSpec(int, lambda v: str(int(v))),
116+
"requestLimitInBytes": OptionSpec(int, lambda v: str(int(v))),
117+
"timeoutRequestLimit": OptionSpec(int, lambda v: str(int(v))),
118+
"requestLimitPeriodInSeconds": OptionSpec(int, lambda v: str(int(v))),
119+
"languageModel": OptionSpec((str, Path), _path_encoder, _path_validator),
120+
"fasttextModel": OptionSpec((str, Path), _path_encoder, _path_validator),
121+
"fasttextBinary": OptionSpec((str, Path), _path_encoder, _path_validator),
122+
"maxWorkQueueSize": OptionSpec(int, lambda v: str(int(v))),
123+
"rulesFile": OptionSpec((str, Path), _path_encoder, _path_validator),
124+
"blockedReferrers": OptionSpec((str, list, tuple, set), _comma_list_encoder),
125+
"premiumOnly": OptionSpec((bool, int), _bool_encoder),
126+
"disabledRuleIds": OptionSpec((str, list, tuple, set), _comma_list_encoder),
127+
"pipelineCaching": OptionSpec((bool, int), _bool_encoder),
128+
"maxPipelinePoolSize": OptionSpec(int, lambda v: str(int(v))),
129+
"pipelineExpireTimeInSeconds": OptionSpec(int, lambda v: str(int(v))),
130+
"pipelinePrewarming": OptionSpec((bool, int), _bool_encoder),
131+
"trustXForwardForHeader": OptionSpec((bool, int), _bool_encoder),
132+
"suggestionsEnabled": OptionSpec((bool, int), _bool_encoder),
36133
}
37134

38135

@@ -53,23 +150,50 @@ def _is_lang_key(key: str) -> bool:
53150
return False
54151

55152
parts = key.split("-")
56-
return (len(parts) == 2 and len(parts[1]) > 0) or (
57-
len(parts) == 3 and len(parts[1]) > 0 and parts[2] == "dictPath"
153+
return (len(parts) == 2 and len(parts[1]) > 0) or ( # lang-<code>
154+
len(parts) == 3
155+
and len(parts[1]) > 0
156+
and parts[2] == "dictPath" # lang-<code>-dictPath
58157
)
59158

60159

61-
def _validate_config_keys(config: Dict[str, Any]) -> None:
160+
def _encode_config(config: Dict[str, Any]) -> Dict[str, str]:
62161
"""
63-
Validate that all keys in the configuration dictionary are allowed.
162+
Encode configuration dictionary values to their string representations.
163+
This function converts a configuration dictionary into a format suitable for
164+
serialization by encoding each value according to its corresponding schema
165+
specification.
64166
65-
:param config: Dictionary containing configuration keys and values.
167+
:param config: A dictionary containing configuration keys and values to be encoded.
66168
:type config: Dict[str, Any]
67-
:raises ValueError: If a key is found that is not in ALLOWED_CONFIG_KEYS and is not a language key.
169+
:return: A dictionary with the same keys but with all values encoded as strings.
170+
:rtype: Dict[str, str]
171+
:raises ValueError: If a key in the config is not found in the CONFIG_SCHEMA and
172+
is not a language key.
173+
:raises TypeError: If a value's type does not match the expected type(s) defined
174+
in the CONFIG_SCHEMA specification.
68175
"""
69-
for key in config:
70-
if key not in ALLOWED_CONFIG_KEYS and not _is_lang_key(key):
176+
encoded: Dict[str, str] = {}
177+
for key, value in config.items():
178+
if _is_lang_key(key) and key.count("-") == 1: # lang-<code>
179+
encoded[key] = str(value)
180+
continue
181+
if _is_lang_key(key) and key.count("-") == 2: # lang-<code>-dictPath
182+
_path_validator(value)
183+
encoded[key] = _path_encoder(value)
184+
continue
185+
186+
spec = CONFIG_SCHEMA.get(key)
187+
if spec is None:
71188
raise ValueError(f"unexpected key in config: {key}")
72189

190+
if not isinstance(value, spec.py_types):
191+
raise TypeError(f"invalid type for {key}: {type(value).__name__}")
192+
if spec.validator is not None:
193+
spec.validator(value)
194+
encoded[key] = spec.encoder(value)
195+
return encoded
196+
73197

74198
class LanguageToolConfig:
75199
"""
@@ -92,24 +216,8 @@ def __init__(self, config: Dict[str, Any]):
92216
"""
93217
if not config:
94218
raise ValueError("config cannot be empty")
95-
_validate_config_keys(config)
96-
97-
self.config = config
98-
99-
if "disabledRuleIds" in self.config:
100-
self.config["disabledRuleIds"] = ",".join(self.config["disabledRuleIds"])
101-
if "blockedReferrers" in self.config:
102-
self.config["blockedReferrers"] = ",".join(self.config["blockedReferrers"])
103-
for key in [
104-
"pipelineCaching",
105-
"premiumOnly",
106-
"pipelinePrewarming",
107-
"trustXForwardForHeader",
108-
"suggestionsEnabled",
109-
]:
110-
if key in self.config:
111-
self.config[key] = str(bool(self.config[key])).lower()
112219

220+
self.config = _encode_config(config)
113221
self.path = self._create_temp_file()
114222

115223
def _create_temp_file(self) -> str:

language_tool_python/download_lt.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,9 @@
1414
import requests
1515
import tqdm
1616

17+
from .exceptions import PathError
1718
from .utils import (
1819
LTP_JAR_DIR_PATH_ENV_VAR,
19-
PathError,
2020
find_existing_language_tool_downloads,
2121
get_language_tool_download_path,
2222
)

language_tool_python/exceptions.py

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
class LanguageToolError(Exception):
2+
"""
3+
Exception raised for errors in the LanguageTool library.
4+
This is a generic exception that can be used to indicate various types of
5+
errors encountered while using the LanguageTool library.
6+
"""
7+
8+
pass
9+
10+
11+
class ServerError(LanguageToolError):
12+
"""
13+
Exception raised for errors that occur when interacting with the LanguageTool server.
14+
This exception is a subclass of `LanguageToolError` and is used to indicate
15+
issues such as server startup failures.
16+
"""
17+
18+
pass
19+
20+
21+
class JavaError(LanguageToolError):
22+
"""
23+
Exception raised for errors related to the Java backend of LanguageTool.
24+
This exception is a subclass of `LanguageToolError` and is used to indicate
25+
issues that occur when interacting with Java, such as Java not being found.
26+
"""
27+
28+
pass
29+
30+
31+
class PathError(LanguageToolError):
32+
"""
33+
Exception raised for errors in the file path used in LanguageTool.
34+
This error is raised when there is an issue with the file path provided
35+
to LanguageTool, such as the LanguageTool JAR file not being found,
36+
or a download path not being a valid available file path.
37+
"""
38+
39+
pass
40+
41+
42+
class RateLimitError(LanguageToolError):
43+
"""
44+
Exception raised for errors related to rate limiting in the LanguageTool server.
45+
This exception is a subclass of `LanguageToolError` and is used to indicate
46+
issues such as exceeding the allowed number of requests to the public API without a key.
47+
"""
48+
49+
pass

language_tool_python/server.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,14 +16,16 @@
1616

1717
from .config_file import LanguageToolConfig
1818
from .download_lt import LTP_DOWNLOAD_VERSION, download_lt
19-
from .language_tag import LanguageTag
20-
from .match import Match
21-
from .utils import (
22-
FAILSAFE_LANGUAGE,
19+
from .exceptions import (
2320
LanguageToolError,
2421
PathError,
2522
RateLimitError,
2623
ServerError,
24+
)
25+
from .language_tag import LanguageTag
26+
from .match import Match
27+
from .utils import (
28+
FAILSAFE_LANGUAGE,
2729
correct,
2830
get_language_tool_directory,
2931
get_locale_language,

0 commit comments

Comments
 (0)