Skip to content

Commit fad52cd

Browse files
Use table, replace with 'ascii'
1 parent 95f2e65 commit fad52cd

File tree

2 files changed

+6
-7
lines changed

2 files changed

+6
-7
lines changed

Lib/email/_header_value_parser.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -796,14 +796,14 @@ def params(self):
796796
value = urllib.parse.unquote(value, encoding='latin-1')
797797
else:
798798
try:
799-
charset = utils._sanitize_charset_name(charset, 'us-ascii')
799+
charset = utils._sanitize_charset_name(charset, 'ascii')
800800
value = value.decode(charset, 'surrogateescape')
801801
except (LookupError, UnicodeEncodeError):
802802
# XXX: there should really be a custom defect for
803803
# unknown character set to make it easy to find,
804804
# because otherwise unknown charset is a silent
805805
# failure.
806-
value = value.decode('us-ascii', 'surrogateescape')
806+
value = value.decode('ascii', 'surrogateescape')
807807
if utils._has_surrogates(value):
808808
param.defects.append(errors.UndecodableBytesDefect())
809809
value_parts.append(value)

Lib/email/utils.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -446,17 +446,16 @@ def decode_params(params):
446446
new_params.append((name, '"%s"' % value))
447447
return new_params
448448

449+
_SANITIZE_TABLE = str.maketrans({i: None for i in range(128, 65536)})
450+
449451
def _sanitize_charset_name(charset, fallback_charset):
450452
if not charset:
451453
return charset
452-
sanitized = ''.join(
453-
c for c in charset
454-
if (ord(c) < 0xDC80 or ord(c) > 0xDCFF) and c.isascii()
455-
)
454+
sanitized = charset.translate(_SANITIZE_TABLE)
456455
return sanitized if sanitized else fallback_charset
457456

458457
def collapse_rfc2231_value(value, errors='replace',
459-
fallback_charset='us-ascii'):
458+
fallback_charset='ascii'):
460459
if not isinstance(value, tuple) or len(value) != 3:
461460
return unquote(value)
462461
# While value comes to us as a unicode string, we need it to be a bytes

0 commit comments

Comments
 (0)