Skip to content

Commit c540c9f

Browse files
committed
gh-121284: Fix email address header folding with parsed encoded-word
Email generators using email.policy.default may convert an RFC 2047 encoded-word to unencoded form during header refolding. In a structured header, this could allow 'specials' chars outside a quoted-string, leading to invalid address headers and enabling spoofing. This change ensures a parsed encoded-word that contains specials is kept as an encoded-word while the header is refolded.
1 parent c4e8196 commit c540c9f

File tree

3 files changed

+38
-2
lines changed

3 files changed

+38
-2
lines changed

Lib/email/_header_value_parser.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2829,6 +2829,13 @@ def _refold_parse_tree(parse_tree, *, policy):
28292829
_fold_mime_parameters(part, lines, maxlen, encoding)
28302830
continue
28312831

2832+
allow_refolding_subparts = True
2833+
if part.token_type == 'encoded-word':
2834+
# A parsed encoded-word containing specials must remain encoded,
2835+
# to keep specials from sneaking into a structured header unquoted.
2836+
# (The encoded-word can be split for folding.)
2837+
allow_refolding_subparts = SPECIALSNL.isdisjoint(tstr)
2838+
28322839
if want_encoding and not wrap_as_ew_blocked:
28332840
if not part.as_ew_allowed:
28342841
want_encoding = False
@@ -2848,7 +2855,7 @@ def _refold_parse_tree(parse_tree, *, policy):
28482855
# want it on a line by itself even if it fits, or it
28492856
# doesn't fit on a line by itself. Either way, fall through
28502857
# to unpacking the subparts and wrapping them.
2851-
if not hasattr(part, 'encode'):
2858+
if allow_refolding_subparts and not hasattr(part, 'encode'):
28522859
# It's not a Terminal, do each piece individually.
28532860
parts = list(part) + parts
28542861
want_encoding = False
@@ -2902,7 +2909,7 @@ def _refold_parse_tree(parse_tree, *, policy):
29022909
leading_whitespace = ''.join(whitespace_accumulator)
29032910
last_ew = None
29042911
continue
2905-
if not hasattr(part, 'encode'):
2912+
if allow_refolding_subparts and not hasattr(part, 'encode'):
29062913
# It's not a terminal, try folding the subparts.
29072914
newparts = list(part)
29082915
if not part.as_ew_allowed:

Lib/test/test_email/test__header_value_parser.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3076,6 +3076,31 @@ def test_address_list_with_unicode_names_in_quotes(self):
30763076
'=?utf-8?q?H=C3=BCbsch?= Kaktus <beautiful@example.com>,\n'
30773077
' =?utf-8?q?bei=C3=9Ft_bei=C3=9Ft?= <biter@example.com>\n')
30783078

3079+
def test_address_list_with_specials_in_encoded_word(self):
3080+
# An encoded-word parsed from a structured header must remain
3081+
# encoded when it contains specials. Regression for gh-121284.
3082+
policy = self.policy.clone(max_line_length=40)
3083+
cases = [
3084+
# (to, folded)
3085+
('=?utf-8?q?A_v=C3=A9ry_long_name_with=2C_comma?= <to@example.com>',
3086+
'=?utf-8?q?A_v=C3=A9ry_long_name_with?=\n'
3087+
' =?utf-8?q?=2C_comma?= <to@example.com>\n'),
3088+
('=?utf-8?q?This_long_name_does_not_need_encoded=2Dword?= <to@example.com>',
3089+
'This long name does not need\n'
3090+
' encoded-word <to@example.com>\n'),
3091+
('"A véry long name with, comma" <to@example.com>',
3092+
# (This isn't the best fold point, but it's not invalid.)
3093+
'A =?utf-8?q?v=C3=A9ry_long_name_with?=\n'
3094+
' =?utf-8?q?=2C?= comma <to@example.com>\n'),
3095+
('"A véry long name containing a, comma" <to@example.com>',
3096+
'A =?utf-8?q?v=C3=A9ry?= long name\n'
3097+
' containing =?utf-8?q?a=2C?= comma\n'
3098+
' <to@example.com>\n'),
3099+
]
3100+
for (to, folded) in cases:
3101+
with self.subTest(to=to):
3102+
self._test(parser.get_address_list(to)[0], folded, policy=policy)
3103+
30793104
def test_address_list_with_list_separator_after_fold(self):
30803105
a = 'x' * 66 + '@example.com'
30813106
to = f'{a}, "Hübsch Kaktus" <beautiful@example.com>'
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
Fix a problem where email.policy.default header refolding could incorrectly
2+
convert an RFC 2047 encoded-word containing commas or other special
3+
characters to unencoded, unquoted text, enabling sender or recipient
4+
spoofing via a carefully crafted display-name.

0 commit comments

Comments
 (0)