Skip to content

Commit ae06154

Browse files
Add some tests.
1 parent ba1e9b7 commit ae06154

File tree

1 file changed

+47
-0
lines changed

1 file changed

+47
-0
lines changed

Lib/test/test_unicodedata.py

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -848,5 +848,52 @@ class MyStr(str):
848848
self.assertIs(type(normalize(form, MyStr(input_str))), str)
849849

850850

851+
class GraphemeBreakTest(unittest.TestCase):
852+
@staticmethod
853+
def check_version(testfile):
854+
hdr = testfile.readline()
855+
return unicodedata.unidata_version in hdr
856+
857+
@requires_resource('network')
858+
def test_grapheme_break(self):
859+
TESTDATAFILE = "auxiliary/GraphemeBreakTest.txt"
860+
TESTDATAURL = f"https://www.unicode.org/Public/{unicodedata.unidata_version}/ucd/{TESTDATAFILE}"
861+
862+
# Hit the exception early
863+
try:
864+
testdata = open_urlresource(TESTDATAURL, encoding="utf-8",
865+
check=self.check_version)
866+
except PermissionError:
867+
self.skipTest(f"Permission error when downloading {TESTDATAURL} "
868+
f"into the test data directory")
869+
except (OSError, HTTPException) as exc:
870+
self.skipTest(f"Failed to download {TESTDATAURL}: {exc}")
871+
872+
with testdata:
873+
self.run_grapheme_break_tests(testdata, unicodedata)
874+
875+
def run_grapheme_break_tests(self, testdata, ucd):
876+
part = None
877+
part1_data = set()
878+
879+
for line in testdata:
880+
line, _, comment = line.partition('#')
881+
line = line.strip()
882+
if not line:
883+
continue
884+
comment = comment.strip()
885+
886+
chunks = []
887+
for field in line.replace('×', ' ').split():
888+
if field == '÷':
889+
chunks.append('')
890+
else:
891+
chunks[-1] += chr(int(field, 16))
892+
self.assertEqual(chunks.pop(), '', line)
893+
with self.subTest(line):
894+
result = list(unicodedata.iter_graphemes(''.join(chunks)))
895+
self.assertEqual(result, chunks, comment)
896+
897+
851898
if __name__ == "__main__":
852899
unittest.main()

0 commit comments

Comments
 (0)