Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions Doc/library/mailbox.rst
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,14 @@ Supported mailbox formats are Maildir, mbox, MH, Babyl, and MMDF.
message. Failing to lock the mailbox runs the risk of losing messages or
corrupting the entire mailbox.

The :class:`!Mailbox` class supports the :keyword:`with` statement. When used
as a context manager, :class:`!Mailbox` calls :meth:`lock` when the context is entered,
returns the mailbox object as the context object, and at context end calls :meth:`close`,
thereby releasing the lock.

.. versionchanged:: next
Support for the :keyword:`with` statement was added.

:class:`!Mailbox` instances have the following methods:


Expand Down
7 changes: 7 additions & 0 deletions Lib/mailbox.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,13 @@ def __init__(self, path, factory=None, create=True):
self._path = os.path.abspath(os.path.expanduser(path))
self._factory = factory

def __enter__(self):
self.lock()
return self

def __exit__(self, type, value, traceback):
self.close()

def add(self, message):
"""Add message and return assigned key."""
raise NotImplementedError('Method must be implemented by subclass')
Expand Down
15 changes: 15 additions & 0 deletions Lib/test/test_mailbox.py
Original file line number Diff line number Diff line change
Expand Up @@ -542,6 +542,11 @@ def _test_flush_or_close(self, method, should_call_close):
self.assertIn(self._box.get_string(key), contents)
oldbox.close()

def test_use_context_manager(self):
# Mailboxes are usable as a context manager
with self._box as box:
self.assertIs(self._box, box)

def test_dump_message(self):
# Write message representations to disk
for input in (email.message_from_string(_sample_message),
Expand Down Expand Up @@ -1122,6 +1127,16 @@ def test_ownership_after_flush(self):
self.assertEqual(st.st_gid, other_gid)
self.assertEqual(st.st_mode, mode)

def test_context_manager_locks_and_closes(self):
# Context manager locks/unlocks and closes.
# (This test uses an implementation detail to get the state.)
self.assertFalse(self._box._locked)
with self._box as context_object:
self.assertIs(self._box, context_object)
self.assertTrue(self._box._locked)
self.assertFalse(self._box._file.closed)
self.assertFalse(self._box._locked)
self.assertTrue(self._box._file.closed)

class _TestMboxMMDF(_TestSingleFile):

Expand Down
24 changes: 24 additions & 0 deletions Lib/test/test_ucn.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,30 @@ def test_cjk_unified_ideographs(self):
self.checkletter("cjK UniFIeD idEogRAph-2aBcD", "\U0002abcd")
self.checkletter("CJk uNIfiEd IDeOGraPH-2AbCd", "\U0002abcd")

def test_tangut_ideographs(self):
self.checkletter("TANGUT IDEOGRAPH-17000", "\U00017000")
self.checkletter("TANGUT IDEOGRAPH-187FF", "\U000187ff")
self.checkletter("TANGUT IDEOGRAPH-18D00", "\U00018D00")
self.checkletter("TANGUT IDEOGRAPH-18D1E", "\U00018d1e")
self.checkletter("tangut ideograph-18d1e", "\U00018d1e")

def test_egyptian_hieroglyphs(self):
self.checkletter("EGYPTIAN HIEROGLYPH-13460", "\U00013460")
self.checkletter("EGYPTIAN HIEROGLYPH-143FA", "\U000143fa")
self.checkletter("egyptian hieroglyph-143fa", "\U000143fa")

def test_khitan_small_script_characters(self):
self.checkletter("KHITAN SMALL SCRIPT CHARACTER-18B00", "\U00018b00")
self.checkletter("KHITAN SMALL SCRIPT CHARACTER-18CD5", "\U00018cd5")
self.checkletter("KHITAN SMALL SCRIPT CHARACTER-18CFF", "\U00018cff")
self.checkletter("KHITAN SMALL SCRIPT CHARACTER-18CFF", "\U00018cff")
self.checkletter("khitan small script character-18cff", "\U00018cff")

def test_nushu_characters(self):
self.checkletter("NUSHU CHARACTER-1B170", "\U0001b170")
self.checkletter("NUSHU CHARACTER-1B2FB", "\U0001b2fb")
self.checkletter("nushu character-1b2fb", "\U0001b2fb")

def test_bmp_characters(self):
for code in range(0x10000):
char = chr(code)
Expand Down
107 changes: 106 additions & 1 deletion Lib/test/test_unicodedata.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,60 @@ def test_function_checksum(self):
result = h.hexdigest()
self.assertEqual(result, self.expectedchecksum)

def test_name(self):
name = self.db.name
self.assertRaises(ValueError, name, '\0')
self.assertRaises(ValueError, name, '\n')
self.assertRaises(ValueError, name, '\x1F')
self.assertRaises(ValueError, name, '\x7F')
self.assertRaises(ValueError, name, '\x9F')
self.assertRaises(ValueError, name, '\uFFFE')
self.assertRaises(ValueError, name, '\uFFFF')
self.assertRaises(ValueError, name, '\U0010FFFF')
self.assertEqual(name('\U0010FFFF', 42), 42)

self.assertEqual(name(' '), 'SPACE')
self.assertEqual(name('1'), 'DIGIT ONE')
self.assertEqual(name('A'), 'LATIN CAPITAL LETTER A')
self.assertEqual(name('\xA0'), 'NO-BREAK SPACE')
self.assertEqual(name('\u0221', None), None if self.old else
'LATIN SMALL LETTER D WITH CURL')
self.assertEqual(name('\u3400'), 'CJK UNIFIED IDEOGRAPH-3400')
self.assertEqual(name('\u9FA5'), 'CJK UNIFIED IDEOGRAPH-9FA5')
self.assertEqual(name('\uAC00'), 'HANGUL SYLLABLE GA')
self.assertEqual(name('\uD7A3'), 'HANGUL SYLLABLE HIH')
self.assertEqual(name('\uF900'), 'CJK COMPATIBILITY IDEOGRAPH-F900')
self.assertEqual(name('\uFA6A'), 'CJK COMPATIBILITY IDEOGRAPH-FA6A')
self.assertEqual(name('\uFBF9'),
'ARABIC LIGATURE UIGHUR KIRGHIZ YEH WITH HAMZA '
'ABOVE WITH ALEF MAKSURA ISOLATED FORM')
self.assertEqual(name('\U00013460', None), None if self.old else
'EGYPTIAN HIEROGLYPH-13460')
self.assertEqual(name('\U000143FA', None), None if self.old else
'EGYPTIAN HIEROGLYPH-143FA')
self.assertEqual(name('\U00017000', None), None if self.old else
'TANGUT IDEOGRAPH-17000')
self.assertEqual(name('\U00018B00', None), None if self.old else
'KHITAN SMALL SCRIPT CHARACTER-18B00')
self.assertEqual(name('\U00018CD5', None), None if self.old else
'KHITAN SMALL SCRIPT CHARACTER-18CD5')
self.assertEqual(name('\U00018CFF', None), None if self.old else
'KHITAN SMALL SCRIPT CHARACTER-18CFF')
self.assertEqual(name('\U00018D1E', None), None if self.old else
'TANGUT IDEOGRAPH-18D1E')
self.assertEqual(name('\U0001B170', None), None if self.old else
'NUSHU CHARACTER-1B170')
self.assertEqual(name('\U0001B2FB', None), None if self.old else
'NUSHU CHARACTER-1B2FB')
self.assertEqual(name('\U0001FBA8', None), None if self.old else
'BOX DRAWINGS LIGHT DIAGONAL UPPER CENTRE TO '
'MIDDLE LEFT AND MIDDLE RIGHT TO LOWER CENTRE')
self.assertEqual(name('\U0002A6D6'), 'CJK UNIFIED IDEOGRAPH-2A6D6')
self.assertEqual(name('\U0002FA1D'), 'CJK COMPATIBILITY IDEOGRAPH-2FA1D')
self.assertEqual(name('\U00033479', None), None if self.old else
'CJK UNIFIED IDEOGRAPH-33479')

@requires_resource('cpu')
def test_name_inverse_lookup(self):
for char in iterallchars():
looked_name = self.db.name(char, None)
Expand All @@ -151,6 +205,17 @@ def test_lookup_nonexistant(self):
"HANDBUG",
"MODIFIER LETTER CYRILLIC SMALL QUESTION MARK",
"???",
"CJK UNIFIED IDEOGRAPH-03400",
"CJK UNIFIED IDEOGRAPH-020000",
"CJK UNIFIED IDEOGRAPH-33FF",
"CJK UNIFIED IDEOGRAPH-F900",
"CJK UNIFIED IDEOGRAPH-13460",
"CJK UNIFIED IDEOGRAPH-17000",
"CJK UNIFIED IDEOGRAPH-18B00",
"CJK UNIFIED IDEOGRAPH-1B170",
"CJK COMPATIBILITY IDEOGRAPH-3400",
"TANGUT IDEOGRAPH-3400",
"HANGUL SYLLABLE AC00",
]:
self.assertRaises(KeyError, self.db.lookup, nonexistent)

Expand Down Expand Up @@ -613,7 +678,47 @@ class UnicodeFunctionsTest(unittest.TestCase, BaseUnicodeFunctionsTest):
# (e.g. 'make distclean && make') to get the correct checksum.
expectedchecksum = ('83cc43a2fbb779185832b4c049217d80b05bf349'
if quicktest else
'65670ae03a324c5f9e826a4de3e25bae4d73c9b7')
'180bdc91143d8aa2eb9dd6726e66d37606205942')

@requires_resource('network')
def test_all_names(self):
TESTDATAFILE = "DerivedName.txt"
testdata = download_test_data_file(TESTDATAFILE)

with testdata:
self.run_name_tests(testdata)

def run_name_tests(self, testdata):
names_ref = {}

def parse_cp(s):
return int(s, 16)

# Parse data
for line in testdata:
line = line.strip()
if not line or line.startswith("#"):
continue
raw_cp, name = line.split("; ")
# Check for a range
if ".." in raw_cp:
cp1, cp2 = map(parse_cp, raw_cp.split(".."))
# remove ‘*’ at the end
assert name[-1] == '*', (raw_cp, name)
name = name[:-1]
for cp in range(cp1, cp2 + 1):
names_ref[cp] = f"{name}{cp:04X}"
elif name[-1] == '*':
cp = parse_cp(raw_cp)
name = name[:-1]
names_ref[cp] = f"{name}{cp:04X}"
else:
assert '*' not in name, (raw_cp, name)
cp = parse_cp(raw_cp)
names_ref[cp] = name

for cp in range(0, sys.maxunicode + 1):
self.assertEqual(self.db.name(chr(cp), None), names_ref.get(cp))

def test_isxidstart(self):
self.assertTrue(self.db.isxidstart('S'))
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
:class:`mailbox.Mailbox` instances can now be used as a context manager.
The Mailbox is locked on context entry and unlocked and closed at context exit.
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add support for Tangut Ideographs names in :mod:`unicodedata`.
105 changes: 63 additions & 42 deletions Modules/unicodedata.c
Original file line number Diff line number Diff line change
Expand Up @@ -1052,22 +1052,18 @@
{ 0, 0, "H" }
};

/* These ranges need to match makeunicodedata.py:cjk_ranges. */
static int
is_unified_ideograph(Py_UCS4 code)
{
return
(0x3400 <= code && code <= 0x4DBF) || /* CJK Ideograph Extension A */
(0x4E00 <= code && code <= 0x9FFF) || /* CJK Ideograph */
(0x20000 <= code && code <= 0x2A6DF) || /* CJK Ideograph Extension B */
(0x2A700 <= code && code <= 0x2B73F) || /* CJK Ideograph Extension C */
(0x2B740 <= code && code <= 0x2B81D) || /* CJK Ideograph Extension D */
(0x2B820 <= code && code <= 0x2CEAD) || /* CJK Ideograph Extension E */
(0x2CEB0 <= code && code <= 0x2EBE0) || /* CJK Ideograph Extension F */
(0x2EBF0 <= code && code <= 0x2EE5D) || /* CJK Ideograph Extension I */
(0x30000 <= code && code <= 0x3134A) || /* CJK Ideograph Extension G */
(0x31350 <= code && code <= 0x323AF) || /* CJK Ideograph Extension H */
(0x323B0 <= code && code <= 0x33479); /* CJK Ideograph Extension J */
find_prefix_id(Py_UCS4 code)
{
for (int i = 0; i < (int)Py_ARRAY_LENGTH(derived_name_ranges); i++) {
if (code < derived_name_ranges[i].first) {
return -1;
}
if (code <= derived_name_ranges[i].last) {
return derived_name_ranges[i].prefixid;
}
}
return -1;
}

/* macros used to determine if the given code point is in the PUA range that
Expand Down Expand Up @@ -1345,7 +1341,9 @@
}
}

if (SBase <= code && code < SBase+SCount) {
int prefixid = find_prefix_id(code);
if (prefixid == 0) {
assert(SBase <= code && code < SBase+SCount);
/* Hangul syllable. */
int SIndex = code - SBase;
int L = SIndex / NCount;
Expand All @@ -1367,11 +1365,11 @@
return 1;
}

if (is_unified_ideograph(code)) {
if (buflen < 28)
/* Worst case: CJK UNIFIED IDEOGRAPH-20000 */
if (prefixid > 0) {
const char *prefix = derived_name_prefixes[prefixid];
if (snprintf(buffer, buflen, "%s%04X", prefix, code) >= buflen) {
return 0;
sprintf(buffer, "CJK UNIFIED IDEOGRAPH-%X", code);
}
return 1;
}

Expand Down Expand Up @@ -1428,6 +1426,35 @@
return 1;
}

static Py_UCS4
parse_hex_code(const char *name, int namelen)
{
if (namelen < 4 || namelen > 6) {
return (Py_UCS4)-1;
}
if (*name == '0') {
return (Py_UCS4)-1;
}
int v = 0;
while (namelen--) {
v *= 16;
Py_UCS1 c = Py_TOUPPER(*name);
if (c >= '0' && c <= '9') {
v += c - '0';
}
else if (c >= 'A' && c <= 'F') {
v += c - 'A' + 10;
}
else {
return (Py_UCS4)-1;
}
name++;
}
if (v > 0x10ffff) {
return (Py_UCS4)-1;
}
return v;
}

static int
_getcode(const char* name, int namelen, Py_UCS4* code)
Expand All @@ -1436,8 +1463,19 @@
* Named aliases are not resolved, they are returned as a code point in the
* PUA */

/* Check for hangul syllables. */
if (PyOS_strnicmp(name, "HANGUL SYLLABLE ", 16) == 0) {
int i = 0;
size_t prefixlen;
for (; i < (int)Py_ARRAY_LENGTH(derived_name_prefixes); i++) {
const char *prefix = derived_name_prefixes[i];
prefixlen = strlen(derived_name_prefixes[i]);
if (PyOS_strnicmp(name, prefix, prefixlen) == 0) {
break;
}
}

if (i == 0) {
/* Hangul syllables. */
assert(PyOS_strnicmp(name, "HANGUL SYLLABLE ", 16) == 0);
int len, L = -1, V = -1, T = -1;
const char *pos = name + 16;
find_syllable(pos, &len, &L, LCount, 0);
Expand All @@ -1454,28 +1492,11 @@
return 0;
}

/* Check for unified ideographs. */
if (PyOS_strnicmp(name, "CJK UNIFIED IDEOGRAPH-", 22) == 0) {
/* Four or five hexdigits must follow. */
unsigned int v;
v = 0;
name += 22;
namelen -= 22;
if (namelen != 4 && namelen != 5)
if (i < (int)Py_ARRAY_LENGTH(derived_name_prefixes)) {
Py_UCS4 v = parse_hex_code(name + prefixlen, namelen - prefixlen);

Check warning on line 1496 in Modules/unicodedata.c

View workflow job for this annotation

GitHub Actions / Windows (free-threading) / Build and test (x64)

'function': conversion from 'size_t' to 'int', possible loss of data [D:\a\cpython\cpython\PCbuild\unicodedata.vcxproj]

Check warning on line 1496 in Modules/unicodedata.c

View workflow job for this annotation

GitHub Actions / Windows (free-threading) / Build and test (arm64)

'function': conversion from 'size_t' to 'int', possible loss of data [C:\a\cpython\cpython\PCbuild\unicodedata.vcxproj]

Check warning on line 1496 in Modules/unicodedata.c

View workflow job for this annotation

GitHub Actions / Windows / Build and test (x64)

'function': conversion from 'size_t' to 'int', possible loss of data [D:\a\cpython\cpython\PCbuild\unicodedata.vcxproj]

Check warning on line 1496 in Modules/unicodedata.c

View workflow job for this annotation

GitHub Actions / Windows / Build and test (arm64)

'function': conversion from 'size_t' to 'int', possible loss of data [C:\a\cpython\cpython\PCbuild\unicodedata.vcxproj]

Check warning on line 1496 in Modules/unicodedata.c

View workflow job for this annotation

GitHub Actions / Windows / Build and test (x64)

'function': conversion from 'size_t' to 'int', possible loss of data [D:\a\cpython\cpython\PCbuild\unicodedata.vcxproj]

Check warning on line 1496 in Modules/unicodedata.c

View workflow job for this annotation

GitHub Actions / Windows / Build and test (arm64)

'function': conversion from 'size_t' to 'int', possible loss of data [C:\a\cpython\cpython\PCbuild\unicodedata.vcxproj]

Check warning on line 1496 in Modules/unicodedata.c

View workflow job for this annotation

GitHub Actions / Windows (free-threading) / Build and test (x64)

'function': conversion from 'size_t' to 'int', possible loss of data [D:\a\cpython\cpython\PCbuild\unicodedata.vcxproj]

Check warning on line 1496 in Modules/unicodedata.c

View workflow job for this annotation

GitHub Actions / Windows (free-threading) / Build and test (arm64)

'function': conversion from 'size_t' to 'int', possible loss of data [C:\a\cpython\cpython\PCbuild\unicodedata.vcxproj]
if (find_prefix_id(v) != i) {
return 0;
while (namelen--) {
v *= 16;
Py_UCS1 c = Py_TOUPPER(*name);
if (c >= '0' && c <= '9')
v += c - '0';
else if (c >= 'A' && c <= 'F')
v += c - 'A' + 10;
else
return 0;
name++;
}
if (!is_unified_ideograph(v))
return 0;
*code = v;
return 1;
}
Expand Down
Loading
Loading