Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
76 changes: 44 additions & 32 deletions Lib/email/_header_value_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -2835,6 +2835,29 @@ def _steal_trailing_WSP_if_exists(lines):
lines.pop()
return wsp


def _steal_all_trailing_WSP_if_exists(lines):
lines_popped = False
wsp_lines = []
while lines and lines[-1]:
for i in range(len(lines[-1]), -1, -1):
if i <= 0 or lines[-1][i - 1] not in WSP:
break
wsp_line = lines[-1][i:]
if not wsp_line:
break
wsp_lines.insert(0, wsp_line)
lines[-1] = lines[-1][:i]
if not lines[-1]:
lines_popped = True
lines.pop()
else:
break
if lines_popped:
lines.append(' ' if lines else '')
return ''.join(wsp_lines)


def _refold_parse_tree(parse_tree, *, policy):
"""Return string of contents of parse_tree folded according to RFC rules.

Expand All @@ -2843,9 +2866,7 @@ def _refold_parse_tree(parse_tree, *, policy):
maxlen = policy.max_line_length or sys.maxsize
encoding = 'utf-8' if policy.utf8 else 'us-ascii'
lines = [''] # Folded lines to be output
leading_whitespace = '' # When we have whitespace between two encoded
# words, we may need to encode the whitespace
# at the beginning of the second word.
last_word_is_ew = False
last_ew = None # Points to the last encoded character if there's an ew on
# the line
last_charset = None
Expand Down Expand Up @@ -2882,6 +2903,7 @@ def _refold_parse_tree(parse_tree, *, policy):
if part.token_type == 'mime-parameters':
# Mime parameter folding (using RFC2231) is extra special.
_fold_mime_parameters(part, lines, maxlen, encoding)
last_word_is_ew = False
continue

if want_encoding and not wrap_as_ew_blocked:
Expand All @@ -2898,6 +2920,7 @@ def _refold_parse_tree(parse_tree, *, policy):
# XXX what if encoded_part has no leading FWS?
lines.append(newline)
lines[-1] += encoded_part
last_word_is_ew = False
continue
# Either this is not a major syntactic break, so we don't
# want it on a line by itself even if it fits, or it
Expand All @@ -2917,10 +2940,8 @@ def _refold_parse_tree(parse_tree, *, policy):
last_charset == 'utf-8' and charset != 'us-ascii')):
last_ew = None
last_ew = _fold_as_ew(tstr, lines, maxlen, last_ew,
part.ew_combine_allowed, charset, leading_whitespace)
# This whitespace has been added to the lines in _fold_as_ew()
# so clear it now.
leading_whitespace = ''
part.ew_combine_allowed, charset, last_word_is_ew)
last_word_is_ew = True
last_charset = charset
want_encoding = False
continue
Expand All @@ -2933,28 +2954,20 @@ def _refold_parse_tree(parse_tree, *, policy):

if len(tstr) <= maxlen - len(lines[-1]):
lines[-1] += tstr
if any(char not in WSP for char in tstr):
last_word_is_ew = False
continue

# This part is too long to fit. The RFC wants us to break at
# "major syntactic breaks", so unless we don't consider this
# to be one, check if it will fit on the next line by itself.
leading_whitespace = ''
if (part.syntactic_break and
len(tstr) + 1 <= maxlen):
newline = _steal_trailing_WSP_if_exists(lines)
if newline or part.startswith_fws():
# We're going to fold the data onto a new line here. Due to
# the way encoded strings handle continuation lines, we need to
# be prepared to encode any whitespace if the next line turns
# out to start with an encoded word.
lines.append(newline + tstr)

whitespace_accumulator = []
for char in lines[-1]:
if char not in WSP:
break
whitespace_accumulator.append(char)
leading_whitespace = ''.join(whitespace_accumulator)
if any(char not in WSP for char in lines[-1]):
last_word_is_ew = False
last_ew = None
continue
if not hasattr(part, 'encode'):
Expand Down Expand Up @@ -2994,10 +3007,12 @@ def _refold_parse_tree(parse_tree, *, policy):
else:
# We can't fold it onto the next line either...
lines[-1] += tstr
if any(char not in WSP for char in tstr):
last_word_is_ew = False

return policy.linesep.join(lines) + policy.linesep

def _fold_as_ew(to_encode, lines, maxlen, last_ew, ew_combine_allowed, charset, leading_whitespace):
def _fold_as_ew(to_encode, lines, maxlen, last_ew, ew_combine_allowed, charset, last_word_is_ew):
"""Fold string to_encode into lines as encoded word, combining if allowed.
Return the new value for last_ew, or None if ew_combine_allowed is False.

Expand All @@ -3012,14 +3027,21 @@ def _fold_as_ew(to_encode, lines, maxlen, last_ew, ew_combine_allowed, charset,
to_encode = str(
get_unstructured(lines[-1][last_ew:] + to_encode))
lines[-1] = lines[-1][:last_ew]
elif to_encode[0] in WSP:
elif to_encode[0] in WSP and not last_word_is_ew:
# We're joining this to non-encoded text, so don't encode
# the leading blank.
leading_wsp = to_encode[0]
to_encode = to_encode[1:]
if (len(lines[-1]) == maxlen):
lines.append(_steal_trailing_WSP_if_exists(lines))
lines[-1] += leading_wsp
elif last_word_is_ew:
# If we are following up an encoded word with another encoded word,
# any white space between the two will be ignored when decoded.
# Therefore, we encode all to-be-displayed whitespace in the second
# encoded word.
leading_whitespace = _steal_all_trailing_WSP_if_exists(lines)
to_encode = leading_whitespace + to_encode

trailing_wsp = ''
if to_encode[-1] in WSP:
Expand All @@ -3040,20 +3062,11 @@ def _fold_as_ew(to_encode, lines, maxlen, last_ew, ew_combine_allowed, charset,

while to_encode:
remaining_space = maxlen - len(lines[-1])
text_space = remaining_space - chrome_len - len(leading_whitespace)
text_space = remaining_space - chrome_len
if text_space <= 0:
lines.append(' ')
continue

# If we are at the start of a continuation line, prepend whitespace
# (we only want to do this when the line starts with an encoded word
# but if we're folding in this helper function, then we know that we
# are going to be writing out an encoded word.)
if len(lines) > 1 and len(lines[-1]) == 1 and leading_whitespace:
encoded_word = _ew.encode(leading_whitespace, charset=encode_as)
lines[-1] += encoded_word
leading_whitespace = ''

to_encode_word = to_encode[:text_space]
encoded_word = _ew.encode(to_encode_word, charset=encode_as)
excess = len(encoded_word) - remaining_space
Expand All @@ -3065,7 +3078,6 @@ def _fold_as_ew(to_encode, lines, maxlen, last_ew, ew_combine_allowed, charset,
excess = len(encoded_word) - remaining_space
lines[-1] += encoded_word
to_encode = to_encode[len(to_encode_word):]
leading_whitespace = ''

if to_encode:
lines.append(' ')
Expand Down
20 changes: 20 additions & 0 deletions Lib/test/test_email/test_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -393,6 +393,26 @@ def test_defaults_handle_spaces_at_start_of_continuation_line(self):
g.flatten(msg)
self.assertEqual(s.getvalue(), expected)

# gh-144156: fold between non-encoded and encoded words don't need to encoded
# the separating space
def test_defaults_handle_spaces_at_start_of_continuation_line_2(self):
source = ("Re: [SOS-1495488] Commande et livraison - Demande de retour - "
"bibijolie - 251210-AABBCC - Abo actualités digitales 20 semaines "
"d’abonnement à 24 heures, Bilan, Tribune de Genève et tous les titres Tamedia")
expected = (
b"Subject: "
b"Re: [SOS-1495488] Commande et livraison - Demande de retour -\n"
b" bibijolie - 251210-AABBCC - Abo =?utf-8?q?actualit=C3=A9s?= digitales 20\n"
b" semaines =?utf-8?q?d=E2=80=99abonnement_=C3=A0?= 24 heures, Bilan, Tribune de\n"
b" =?utf-8?q?Gen=C3=A8ve?= et tous les titres Tamedia\n\n"
)
msg = EmailMessage()
msg['Subject'] = source
s = io.BytesIO()
g = BytesGenerator(s)
g.flatten(msg)
self.assertEqual(s.getvalue(), expected)

def test_cte_type_7bit_handles_unknown_8bit(self):
source = ("Subject: Maintenant je vous présente mon "
"collègue\n\n").encode('utf-8')
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Fix folding of email headers violating `RFC 2047`_ with two consecutive encoded words without separating linear-white-space.

.. _RFC 2047: https://www.rfc-editor.org/rfc/rfc2047
Loading