Skip to content

Commit c249511

Browse files
medmundsbitdancer
andcommitted
squash! gh-121284: Fix email address header folding with parsed encoded-word
[Better fix from @bitdancer.] Co-authored-by: R David Murray <rdmurray@bitdance.com>
1 parent e63ec20 commit c249511

File tree

2 files changed

+9
-16
lines changed

2 files changed

+9
-16
lines changed

Lib/email/_header_value_parser.py

Lines changed: 7 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1053,7 +1053,7 @@ def get_fws(value):
10531053
fws = WhiteSpaceTerminal(value[:len(value)-len(newvalue)], 'fws')
10541054
return fws, newvalue
10551055

1056-
def get_encoded_word(value):
1056+
def get_encoded_word(value, terminal_type='vtext'):
10571057
""" encoded-word = "=?" charset "?" encoding "?" encoded-text "?="
10581058
10591059
"""
@@ -1092,7 +1092,7 @@ def get_encoded_word(value):
10921092
ew.append(token)
10931093
continue
10941094
chars, *remainder = _wsp_splitter(text, 1)
1095-
vtext = ValueTerminal(chars, 'vtext')
1095+
vtext = ValueTerminal(chars, terminal_type)
10961096
_validate_xtext(vtext)
10971097
ew.append(vtext)
10981098
text = ''.join(remainder)
@@ -1134,7 +1134,7 @@ def get_unstructured(value):
11341134
valid_ew = True
11351135
if value.startswith('=?'):
11361136
try:
1137-
token, value = get_encoded_word(value)
1137+
token, value = get_encoded_word(value, 'utext')
11381138
except _InvalidEwError:
11391139
valid_ew = False
11401140
except errors.HeaderParseError:
@@ -1163,7 +1163,7 @@ def get_unstructured(value):
11631163
# the parser to go in an infinite loop.
11641164
if valid_ew and rfc2047_matcher.search(tok):
11651165
tok, *remainder = value.partition('=?')
1166-
vtext = ValueTerminal(tok, 'vtext')
1166+
vtext = ValueTerminal(tok, 'utext')
11671167
_validate_xtext(vtext)
11681168
unstructured.append(vtext)
11691169
value = ''.join(remainder)
@@ -2813,7 +2813,7 @@ def _refold_parse_tree(parse_tree, *, policy):
28132813
continue
28142814
tstr = str(part)
28152815
if not want_encoding:
2816-
if part.token_type == 'ptext':
2816+
if part.token_type in ('ptext', 'vtext'):
28172817
# Encode if tstr contains special characters.
28182818
want_encoding = not SPECIALSNL.isdisjoint(tstr)
28192819
else:
@@ -2837,13 +2837,6 @@ def _refold_parse_tree(parse_tree, *, policy):
28372837
_fold_mime_parameters(part, lines, maxlen, encoding)
28382838
continue
28392839

2840-
allow_refolding_subparts = True
2841-
if part.token_type == 'encoded-word':
2842-
# A parsed encoded-word containing specials must remain encoded,
2843-
# to keep specials from sneaking into a structured header unquoted.
2844-
# (The encoded-word can be split for folding.)
2845-
allow_refolding_subparts = SPECIALSNL.isdisjoint(tstr)
2846-
28472840
if want_encoding and not wrap_as_ew_blocked:
28482841
if not part.as_ew_allowed:
28492842
want_encoding = False
@@ -2863,7 +2856,7 @@ def _refold_parse_tree(parse_tree, *, policy):
28632856
# want it on a line by itself even if it fits, or it
28642857
# doesn't fit on a line by itself. Either way, fall through
28652858
# to unpacking the subparts and wrapping them.
2866-
if allow_refolding_subparts and not hasattr(part, 'encode'):
2859+
if not hasattr(part, 'encode'):
28672860
# It's not a Terminal, do each piece individually.
28682861
parts = list(part) + parts
28692862
want_encoding = False
@@ -2917,7 +2910,7 @@ def _refold_parse_tree(parse_tree, *, policy):
29172910
leading_whitespace = ''.join(whitespace_accumulator)
29182911
last_ew = None
29192912
continue
2920-
if allow_refolding_subparts and not hasattr(part, 'encode'):
2913+
if not hasattr(part, 'encode'):
29212914
# It's not a terminal, try folding the subparts.
29222915
newparts = list(part)
29232916
if part.token_type == 'bare-quoted-string':

Lib/test/test_email/test__header_value_parser.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3083,8 +3083,8 @@ def test_address_list_with_specials_in_encoded_word(self):
30833083
cases = [
30843084
# (to, folded)
30853085
('=?utf-8?q?A_v=C3=A9ry_long_name_with=2C_comma?= <to@example.com>',
3086-
'=?utf-8?q?A_v=C3=A9ry_long_name_with?=\n'
3087-
' =?utf-8?q?=2C_comma?= <to@example.com>\n'),
3086+
'A =?utf-8?q?v=C3=A9ry_long_name_with?=\n'
3087+
' =?utf-8?q?=2C?= comma <to@example.com>\n'),
30883088
('=?utf-8?q?This_long_name_does_not_need_encoded=2Dword?= <to@example.com>',
30893089
'This long name does not need\n'
30903090
' encoded-word <to@example.com>\n'),

0 commit comments

Comments
 (0)