33import atexit
44import os
55import tempfile
6- from typing import Any , Dict
7-
8- # Allowed configuration keys for LanguageTool.
9- ALLOWED_CONFIG_KEYS = {
10- "maxTextLength" ,
11- "maxTextHardLength" ,
12- "maxCheckTimeMillis" ,
13- "maxErrorsPerWordRate" ,
14- "maxSpellingSuggestions" ,
15- "maxCheckThreads" ,
16- "cacheSize" ,
17- "cacheTTLSeconds" ,
18- "requestLimit" ,
19- "requestLimitInBytes" ,
20- "timeoutRequestLimit" ,
21- "requestLimitPeriodInSeconds" ,
22- "languageModel" ,
23- "fasttextModel" ,
24- "fasttextBinary" ,
25- "maxWorkQueueSize" ,
26- "rulesFile" ,
27- "blockedReferrers" ,
28- "premiumOnly" ,
29- "disabledRuleIds" ,
30- "pipelineCaching" ,
31- "maxPipelinePoolSize" ,
32- "pipelineExpireTimeInSeconds" ,
33- "pipelinePrewarming" ,
34- "trustXForwardForHeader" ,
35- "suggestionsEnabled" ,
6+ from dataclasses import dataclass
7+ from pathlib import Path
8+ from typing import Any , Callable , Dict , Iterable , Optional , Union
9+
10+ from .exceptions import PathError
11+
12+
13+ @dataclass (frozen = True )
14+ class OptionSpec :
15+ """
16+ Specification for a configuration option.
17+
18+ This class defines the structure and behavior of a configuration option,
19+ including its type constraints, encoding mechanism, and optional validation.
20+
21+ Attributes:
22+ py_types (Union[type, tuple[type, ...]]): The Python type(s) that this option accepts.
23+ encoder (Callable[[Any], str]): A callable that converts the option value to its string representation.
24+ validator (Optional[Callable[[Any], None]]): An optional callable that validates the option value.
25+
26+ .. note::
27+ This class is frozen (immutable) to ensure configuration specifications
28+ remain constant throughout the application lifecycle.
29+ """
30+
31+ py_types : Union [type , tuple [type , ...]]
32+ encoder : Callable [[Any ], str ]
33+ validator : Optional [Callable [[Any ], None ]] = None
34+
35+
36+ def _bool_encoder (v : Any ) -> str :
37+ """
38+ Encode a value as a lowercase boolean string.
39+
40+ Converts any value to a boolean and returns its string representation
41+ in lowercase format ('true' or 'false').
42+
43+ :param v: The value to be converted to a boolean string.
44+ :type v: Any
45+ :return: A lowercase string representation of the boolean value ('true' or 'false').
46+ :rtype: str
47+ """
48+ return str (bool (v )).lower ()
49+
50+
51+ def _comma_list_encoder (v : Any ) -> str :
52+ """
53+ Encode a value as a comma-separated list string.
54+
55+ Converts a value into a string representation suitable for comma-separated
56+ list configuration options. If the input is already a string, it is returned
57+ as-is. If it's an iterable, its elements are converted to strings and joined
58+ with commas.
59+
60+ :param v: The value to encode. Can be a string or an iterable of values.
61+ :type v: Any
62+ :return: A comma-separated string representation of the input value.
63+ :rtype: str
64+ :raises TypeError: If the input is neither a string nor an iterable.
65+ """
66+ if isinstance (v , str ):
67+ return v
68+ if isinstance (v , Iterable ):
69+ return "," .join (str (x ) for x in v )
70+ raise TypeError ("expected string or iterable for comma-list option" )
71+
72+
73+ def _path_encoder (v : Any ) -> str :
74+ """
75+ Encode a path value to a string.
76+ Converts the input to a Path object, then to a string, and escapes all
77+ backslashes by doubling them. This is useful for windows file paths and
78+ other contexts where backslashes need to be escaped. (because they will
79+ be used by LT java binary)
80+
81+ :param v: The path value to encode. Can be any type that Path accepts
82+ (str, Path, etc.).
83+ :type v: Any
84+ :return: The path as a string with escaped backslashes (e.g., "C:\\ \\ Users\\ \\ file").
85+ :rtype: str
86+ """
87+ return str (Path (v )).replace ("\\ " , "\\ \\ " )
88+
89+
90+ def _path_validator (v : Any ) -> None :
91+ """
92+ Validate that a given path exists and is a file.
93+
94+ :param v: The path to validate, which will be converted to a Path object
95+ :type v: Any
96+ :raises PathError: If the path does not exist
97+ :raises PathError: If the path exists but is not a file
98+ """
99+ p = Path (v )
100+ if not p .exists ():
101+ raise PathError (f"path does not exist: { p } " )
102+ if not p .is_file ():
103+ raise PathError (f"path is not a file: { p } " )
104+
105+
106+ CONFIG_SCHEMA : Dict [str , OptionSpec ] = {
107+ "maxTextLength" : OptionSpec (int , lambda v : str (int (v ))),
108+ "maxTextHardLength" : OptionSpec (int , lambda v : str (int (v ))),
109+ "maxCheckTimeMillis" : OptionSpec (int , lambda v : str (int (v ))),
110+ "maxErrorsPerWordRate" : OptionSpec ((int , float ), lambda v : str (float (v ))),
111+ "maxSpellingSuggestions" : OptionSpec (int , lambda v : str (int (v ))),
112+ "maxCheckThreads" : OptionSpec (int , lambda v : str (int (v ))),
113+ "cacheSize" : OptionSpec (int , lambda v : str (int (v ))),
114+ "cacheTTLSeconds" : OptionSpec (int , lambda v : str (int (v ))),
115+ "requestLimit" : OptionSpec (int , lambda v : str (int (v ))),
116+ "requestLimitInBytes" : OptionSpec (int , lambda v : str (int (v ))),
117+ "timeoutRequestLimit" : OptionSpec (int , lambda v : str (int (v ))),
118+ "requestLimitPeriodInSeconds" : OptionSpec (int , lambda v : str (int (v ))),
119+ "languageModel" : OptionSpec ((str , Path ), _path_encoder , _path_validator ),
120+ "fasttextModel" : OptionSpec ((str , Path ), _path_encoder , _path_validator ),
121+ "fasttextBinary" : OptionSpec ((str , Path ), _path_encoder , _path_validator ),
122+ "maxWorkQueueSize" : OptionSpec (int , lambda v : str (int (v ))),
123+ "rulesFile" : OptionSpec ((str , Path ), _path_encoder , _path_validator ),
124+ "blockedReferrers" : OptionSpec ((str , list , tuple , set ), _comma_list_encoder ),
125+ "premiumOnly" : OptionSpec ((bool , int ), _bool_encoder ),
126+ "disabledRuleIds" : OptionSpec ((str , list , tuple , set ), _comma_list_encoder ),
127+ "pipelineCaching" : OptionSpec ((bool , int ), _bool_encoder ),
128+ "maxPipelinePoolSize" : OptionSpec (int , lambda v : str (int (v ))),
129+ "pipelineExpireTimeInSeconds" : OptionSpec (int , lambda v : str (int (v ))),
130+ "pipelinePrewarming" : OptionSpec ((bool , int ), _bool_encoder ),
131+ "trustXForwardForHeader" : OptionSpec ((bool , int ), _bool_encoder ),
132+ "suggestionsEnabled" : OptionSpec ((bool , int ), _bool_encoder ),
36133}
37134
38135
@@ -53,23 +150,50 @@ def _is_lang_key(key: str) -> bool:
53150 return False
54151
55152 parts = key .split ("-" )
56- return (len (parts ) == 2 and len (parts [1 ]) > 0 ) or (
57- len (parts ) == 3 and len (parts [1 ]) > 0 and parts [2 ] == "dictPath"
153+ return (len (parts ) == 2 and len (parts [1 ]) > 0 ) or ( # lang-<code>
154+ len (parts ) == 3
155+ and len (parts [1 ]) > 0
156+ and parts [2 ] == "dictPath" # lang-<code>-dictPath
58157 )
59158
60159
61- def _validate_config_keys (config : Dict [str , Any ]) -> None :
160+ def _encode_config (config : Dict [str , Any ]) -> Dict [ str , str ] :
62161 """
63- Validate that all keys in the configuration dictionary are allowed.
162+ Encode configuration dictionary values to their string representations.
163+ This function converts a configuration dictionary into a format suitable for
164+ serialization by encoding each value according to its corresponding schema
165+ specification.
64166
65- :param config: Dictionary containing configuration keys and values.
167+ :param config: A dictionary containing configuration keys and values to be encoded .
66168 :type config: Dict[str, Any]
67- :raises ValueError: If a key is found that is not in ALLOWED_CONFIG_KEYS and is not a language key.
169+ :return: A dictionary with the same keys but with all values encoded as strings.
170+ :rtype: Dict[str, str]
171+ :raises ValueError: If a key in the config is not found in the CONFIG_SCHEMA and
172+ is not a language key.
173+ :raises TypeError: If a value's type does not match the expected type(s) defined
174+ in the CONFIG_SCHEMA specification.
68175 """
69- for key in config :
70- if key not in ALLOWED_CONFIG_KEYS and not _is_lang_key (key ):
176+ encoded : Dict [str , str ] = {}
177+ for key , value in config .items ():
178+ if _is_lang_key (key ) and key .count ("-" ) == 1 : # lang-<code>
179+ encoded [key ] = str (value )
180+ continue
181+ if _is_lang_key (key ) and key .count ("-" ) == 2 : # lang-<code>-dictPath
182+ _path_validator (value )
183+ encoded [key ] = _path_encoder (value )
184+ continue
185+
186+ spec = CONFIG_SCHEMA .get (key )
187+ if spec is None :
71188 raise ValueError (f"unexpected key in config: { key } " )
72189
190+ if not isinstance (value , spec .py_types ):
191+ raise TypeError (f"invalid type for { key } : { type (value ).__name__ } " )
192+ if spec .validator is not None :
193+ spec .validator (value )
194+ encoded [key ] = spec .encoder (value )
195+ return encoded
196+
73197
74198class LanguageToolConfig :
75199 """
@@ -92,24 +216,8 @@ def __init__(self, config: Dict[str, Any]):
92216 """
93217 if not config :
94218 raise ValueError ("config cannot be empty" )
95- _validate_config_keys (config )
96-
97- self .config = config
98-
99- if "disabledRuleIds" in self .config :
100- self .config ["disabledRuleIds" ] = "," .join (self .config ["disabledRuleIds" ])
101- if "blockedReferrers" in self .config :
102- self .config ["blockedReferrers" ] = "," .join (self .config ["blockedReferrers" ])
103- for key in [
104- "pipelineCaching" ,
105- "premiumOnly" ,
106- "pipelinePrewarming" ,
107- "trustXForwardForHeader" ,
108- "suggestionsEnabled" ,
109- ]:
110- if key in self .config :
111- self .config [key ] = str (bool (self .config [key ])).lower ()
112219
220+ self .config = _encode_config (config )
113221 self .path = self ._create_temp_file ()
114222
115223 def _create_temp_file (self ) -> str :
0 commit comments