File tree Expand file tree Collapse file tree 1 file changed +18
-0
lines changed
Expand file tree Collapse file tree 1 file changed +18
-0
lines changed Original file line number Diff line number Diff line change @@ -57,8 +57,26 @@ def parse_url(url_str):
5757 return urllib .parse .urlparse (url_str ).geturl ()
5858
5959
60+ def _4_bytes_encoded_positions (text : str ) -> List [int ]:
61+ """Return a list of positions of 4-byte encoded characters in the text."""
62+ positions = []
63+ char_index = 0
64+ for char in text :
65+ if len (char .encode ('utf-8' )) == 4 :
66+ positions .append (char_index )
67+ # Adding 1 to the index because 4 byte characters are
68+ # 2 bytes in length in LanguageTool, instead of 1 byte in Python.
69+ char_index += 1
70+ char_index += 1
71+ return positions
72+
73+
6074def correct (text : str , matches : List [Match ]) -> str :
6175 """Automatically apply suggestions to the text."""
76+ # Get the positions of 4-byte encoded characters in the text because without
77+ # carrying out this step, the offsets of the matches could be incorrect.
78+ for match in matches :
79+ match .offset -= sum (1 for i in _4_bytes_encoded_positions (text ) if i <= match .offset )
6280 ltext = list (text )
6381 matches = [match for match in matches if match .replacements ]
6482 errors = [ltext [match .offset :match .offset + match .errorLength ]
You can’t perform that action at this time.
0 commit comments