From b5925e056e5e03f642926204872ac40ce8643dbf Mon Sep 17 00:00:00 2001 From: Robsdedude Date: Tue, 10 Feb 2026 22:15:06 +0100 Subject: [PATCH 01/12] Fix email header wrapping omitting white space --- Lib/email/_header_value_parser.py | 78 ++++++++++++++++----------- Lib/test/test_email/test_generator.py | 18 +++++++ 2 files changed, 64 insertions(+), 32 deletions(-) diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py index 172f9ef9e5f096..08d60c8a50dd6c 100644 --- a/Lib/email/_header_value_parser.py +++ b/Lib/email/_header_value_parser.py @@ -2835,6 +2835,30 @@ def _steal_trailing_WSP_if_exists(lines): lines.pop() return wsp +def _steal_all_trailing_WSP_if_exists(lines): + lines_popped = False + wsp_lines = [] + while lines and lines[-1]: + for i in range(len(lines[-1]), -1, -1): + if i <= 0: + break + if lines[-1][i - 1] not in WSP: + break + wsp_line = lines[-1][i:] + if not wsp_line: + break + wsp_lines.insert(0, wsp_line) + lines[-1] = lines[-1][:i] + if not lines[-1]: + lines_popped = True + lines.pop() + else: + break + + if lines_popped: + lines.append(' ') + return ''.join(wsp_lines) + def _refold_parse_tree(parse_tree, *, policy): """Return string of contents of parse_tree folded according to RFC rules. @@ -2843,9 +2867,7 @@ def _refold_parse_tree(parse_tree, *, policy): maxlen = policy.max_line_length or sys.maxsize encoding = 'utf-8' if policy.utf8 else 'us-ascii' lines = [''] # Folded lines to be output - leading_whitespace = '' # When we have whitespace between two encoded - # words, we may need to encode the whitespace - # at the beginning of the second word. + last_word_is_ew = False last_ew = None # Points to the last encoded character if there's an ew on # the line last_charset = None @@ -2882,6 +2904,7 @@ def _refold_parse_tree(parse_tree, *, policy): if part.token_type == 'mime-parameters': # Mime parameter folding (using RFC2231) is extra special. _fold_mime_parameters(part, lines, maxlen, encoding) + last_word_is_ew = False continue if want_encoding and not wrap_as_ew_blocked: @@ -2898,6 +2921,7 @@ def _refold_parse_tree(parse_tree, *, policy): # XXX what if encoded_part has no leading FWS? lines.append(newline) lines[-1] += encoded_part + last_word_is_ew = False continue # Either this is not a major syntactic break, so we don't # want it on a line by itself even if it fits, or it @@ -2917,10 +2941,8 @@ def _refold_parse_tree(parse_tree, *, policy): last_charset == 'utf-8' and charset != 'us-ascii')): last_ew = None last_ew = _fold_as_ew(tstr, lines, maxlen, last_ew, - part.ew_combine_allowed, charset, leading_whitespace) - # This whitespace has been added to the lines in _fold_as_ew() - # so clear it now. - leading_whitespace = '' + part.ew_combine_allowed, charset, last_word_is_ew) + last_word_is_ew = True last_charset = charset want_encoding = False continue @@ -2933,28 +2955,20 @@ def _refold_parse_tree(parse_tree, *, policy): if len(tstr) <= maxlen - len(lines[-1]): lines[-1] += tstr + if any(char not in WSP for char in tstr): + last_word_is_ew = False continue # This part is too long to fit. The RFC wants us to break at # "major syntactic breaks", so unless we don't consider this # to be one, check if it will fit on the next line by itself. - leading_whitespace = '' if (part.syntactic_break and len(tstr) + 1 <= maxlen): newline = _steal_trailing_WSP_if_exists(lines) if newline or part.startswith_fws(): - # We're going to fold the data onto a new line here. Due to - # the way encoded strings handle continuation lines, we need to - # be prepared to encode any whitespace if the next line turns - # out to start with an encoded word. lines.append(newline + tstr) - - whitespace_accumulator = [] - for char in lines[-1]: - if char not in WSP: - break - whitespace_accumulator.append(char) - leading_whitespace = ''.join(whitespace_accumulator) + if not all(char in WSP for char in lines[-1]): + last_word_is_ew = False last_ew = None continue if not hasattr(part, 'encode'): @@ -2994,10 +3008,12 @@ def _refold_parse_tree(parse_tree, *, policy): else: # We can't fold it onto the next line either... lines[-1] += tstr + if any(char not in WSP for char in tstr): + last_word_is_ew = False return policy.linesep.join(lines) + policy.linesep -def _fold_as_ew(to_encode, lines, maxlen, last_ew, ew_combine_allowed, charset, leading_whitespace): +def _fold_as_ew(to_encode, lines, maxlen, last_ew, ew_combine_allowed, charset, last_word_is_ew): """Fold string to_encode into lines as encoded word, combining if allowed. Return the new value for last_ew, or None if ew_combine_allowed is False. @@ -3012,7 +3028,7 @@ def _fold_as_ew(to_encode, lines, maxlen, last_ew, ew_combine_allowed, charset, to_encode = str( get_unstructured(lines[-1][last_ew:] + to_encode)) lines[-1] = lines[-1][:last_ew] - elif to_encode[0] in WSP: + elif to_encode[0] in WSP and not last_word_is_ew: # We're joining this to non-encoded text, so don't encode # the leading blank. leading_wsp = to_encode[0] @@ -3020,6 +3036,14 @@ def _fold_as_ew(to_encode, lines, maxlen, last_ew, ew_combine_allowed, charset, if (len(lines[-1]) == maxlen): lines.append(_steal_trailing_WSP_if_exists(lines)) lines[-1] += leading_wsp + elif last_word_is_ew: + # If we are following up an encoded word with another encoded word, + # any white space between the two will be ignored when decoded. + # Therefore, we encode all to-be-displayed whitespace in the second + # encoded word. + leading_whitespace = _steal_all_trailing_WSP_if_exists(lines) + to_encode = leading_whitespace + to_encode + lines[-1] = ' ' trailing_wsp = '' if to_encode[-1] in WSP: @@ -3040,20 +3064,11 @@ def _fold_as_ew(to_encode, lines, maxlen, last_ew, ew_combine_allowed, charset, while to_encode: remaining_space = maxlen - len(lines[-1]) - text_space = remaining_space - chrome_len - len(leading_whitespace) + text_space = remaining_space - chrome_len if text_space <= 0: lines.append(' ') continue - # If we are at the start of a continuation line, prepend whitespace - # (we only want to do this when the line starts with an encoded word - # but if we're folding in this helper function, then we know that we - # are going to be writing out an encoded word.) - if len(lines) > 1 and len(lines[-1]) == 1 and leading_whitespace: - encoded_word = _ew.encode(leading_whitespace, charset=encode_as) - lines[-1] += encoded_word - leading_whitespace = '' - to_encode_word = to_encode[:text_space] encoded_word = _ew.encode(to_encode_word, charset=encode_as) excess = len(encoded_word) - remaining_space @@ -3065,7 +3080,6 @@ def _fold_as_ew(to_encode, lines, maxlen, last_ew, ew_combine_allowed, charset, excess = len(encoded_word) - remaining_space lines[-1] += encoded_word to_encode = to_encode[len(to_encode_word):] - leading_whitespace = '' if to_encode: lines.append(' ') diff --git a/Lib/test/test_email/test_generator.py b/Lib/test/test_email/test_generator.py index 3ca79edf6a65d9..bdbf081834384f 100644 --- a/Lib/test/test_email/test_generator.py +++ b/Lib/test/test_email/test_generator.py @@ -393,6 +393,24 @@ def test_defaults_handle_spaces_at_start_of_continuation_line(self): g.flatten(msg) self.assertEqual(s.getvalue(), expected) + # gh-144156 + # https://github.com/python/cpython/issues/144156 + def test_defaults_handle_spaces_at_start_of_continuation_line_2(self): + source = ("Re: [SOS-1495488] Commande et livraison - Demande de retour - " + "bibijolie - 251210-AABBCC - Abo actualités digitales 20 semaines " + "d’abonnement à 24 heures, Bilan, Tribune de Genève et tous les titres Tamedia") + expected = (b"Subject: " + b"Re: [SOS-1495488] Commande et livraison - Demande de retour -\n" + b" bibijolie - 251210-AABBCC - Abo =?utf-8?q?actualit=C3=A9s?= digitales 20\n" + b" semaines =?utf-8?q?d=E2=80=99abonnement_=C3=A0?= 24 heures, Bilan, Tribune de\n" + b" =?utf-8?q?Gen=C3=A8ve?= et tous les titres Tamedia\n\n") + msg = EmailMessage() + msg['Subject'] = source + s = io.BytesIO() + g = BytesGenerator(s) + g.flatten(msg) + self.assertEqual(s.getvalue(), expected) + def test_cte_type_7bit_handles_unknown_8bit(self): source = ("Subject: Maintenant je vous présente mon " "collègue\n\n").encode('utf-8') From 1a7824e87bd98e7ce294f03f9ae31df0ee29146b Mon Sep 17 00:00:00 2001 From: "blurb-it[bot]" <43283697+blurb-it[bot]@users.noreply.github.com> Date: Tue, 10 Feb 2026 22:05:52 +0000 Subject: [PATCH 02/12] =?UTF-8?q?=F0=9F=93=9C=F0=9F=A4=96=20Added=20by=20b?= =?UTF-8?q?lurb=5Fit.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../2026-02-10-22-05-51.gh-issue-144156.UbrC7F.rst | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2026-02-10-22-05-51.gh-issue-144156.UbrC7F.rst diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2026-02-10-22-05-51.gh-issue-144156.UbrC7F.rst b/Misc/NEWS.d/next/Core_and_Builtins/2026-02-10-22-05-51.gh-issue-144156.UbrC7F.rst new file mode 100644 index 00000000000000..bc5e788fda675c --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2026-02-10-22-05-51.gh-issue-144156.UbrC7F.rst @@ -0,0 +1,3 @@ +Fix folding of email headers violating `RFC 2047`_ with two consecutive encoded words without separating linear-white-space. + +.. _RFC 2047: https://www.rfc-editor.org/rfc/rfc2047 From fb23a6eb37042378ce2ef025fb5c46ea7464c3d0 Mon Sep 17 00:00:00 2001 From: Robsdedude Date: Tue, 10 Feb 2026 23:23:24 +0100 Subject: [PATCH 03/12] fixup! Fix email header wrapping omitting white space --- Lib/email/_header_value_parser.py | 1 - 1 file changed, 1 deletion(-) diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py index 08d60c8a50dd6c..1c9e1405802576 100644 --- a/Lib/email/_header_value_parser.py +++ b/Lib/email/_header_value_parser.py @@ -3043,7 +3043,6 @@ def _fold_as_ew(to_encode, lines, maxlen, last_ew, ew_combine_allowed, charset, # encoded word. leading_whitespace = _steal_all_trailing_WSP_if_exists(lines) to_encode = leading_whitespace + to_encode - lines[-1] = ' ' trailing_wsp = '' if to_encode[-1] in WSP: From 050491c170b5a165fee4808b39b896718fbb9411 Mon Sep 17 00:00:00 2001 From: Robsdedude Date: Wed, 11 Feb 2026 13:38:02 +0100 Subject: [PATCH 04/12] fixup! Fix email header wrapping omitting white space --- Lib/email/_header_value_parser.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py index 1c9e1405802576..fc46f9ac1795e8 100644 --- a/Lib/email/_header_value_parser.py +++ b/Lib/email/_header_value_parser.py @@ -2840,9 +2840,7 @@ def _steal_all_trailing_WSP_if_exists(lines): wsp_lines = [] while lines and lines[-1]: for i in range(len(lines[-1]), -1, -1): - if i <= 0: - break - if lines[-1][i - 1] not in WSP: + if i <= 0 or lines[-1][i - 1] not in WSP: break wsp_line = lines[-1][i:] if not wsp_line: @@ -2854,9 +2852,8 @@ def _steal_all_trailing_WSP_if_exists(lines): lines.pop() else: break - if lines_popped: - lines.append(' ') + lines.append(' ' if lines else '') return ''.join(wsp_lines) def _refold_parse_tree(parse_tree, *, policy): @@ -2967,7 +2964,7 @@ def _refold_parse_tree(parse_tree, *, policy): newline = _steal_trailing_WSP_if_exists(lines) if newline or part.startswith_fws(): lines.append(newline + tstr) - if not all(char in WSP for char in lines[-1]): + if any(char not in WSP for char in lines[-1]): last_word_is_ew = False last_ew = None continue From ed6f197ab5483f62632582cc8b2057ae7563a1f6 Mon Sep 17 00:00:00 2001 From: Robsdedude Date: Wed, 11 Feb 2026 17:20:13 +0100 Subject: [PATCH 05/12] Formatting --- Lib/email/_header_value_parser.py | 2 ++ Lib/test/test_email/test_generator.py | 16 +++++++++------- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py index fc46f9ac1795e8..c5d400555b8422 100644 --- a/Lib/email/_header_value_parser.py +++ b/Lib/email/_header_value_parser.py @@ -2835,6 +2835,7 @@ def _steal_trailing_WSP_if_exists(lines): lines.pop() return wsp + def _steal_all_trailing_WSP_if_exists(lines): lines_popped = False wsp_lines = [] @@ -2856,6 +2857,7 @@ def _steal_all_trailing_WSP_if_exists(lines): lines.append(' ' if lines else '') return ''.join(wsp_lines) + def _refold_parse_tree(parse_tree, *, policy): """Return string of contents of parse_tree folded according to RFC rules. diff --git a/Lib/test/test_email/test_generator.py b/Lib/test/test_email/test_generator.py index bdbf081834384f..1c1f9b657bf0f8 100644 --- a/Lib/test/test_email/test_generator.py +++ b/Lib/test/test_email/test_generator.py @@ -393,17 +393,19 @@ def test_defaults_handle_spaces_at_start_of_continuation_line(self): g.flatten(msg) self.assertEqual(s.getvalue(), expected) - # gh-144156 - # https://github.com/python/cpython/issues/144156 + # gh-144156: fold between non-encoded and encoded words don't need to encoded + # the separating space def test_defaults_handle_spaces_at_start_of_continuation_line_2(self): source = ("Re: [SOS-1495488] Commande et livraison - Demande de retour - " "bibijolie - 251210-AABBCC - Abo actualités digitales 20 semaines " "d’abonnement à 24 heures, Bilan, Tribune de Genève et tous les titres Tamedia") - expected = (b"Subject: " - b"Re: [SOS-1495488] Commande et livraison - Demande de retour -\n" - b" bibijolie - 251210-AABBCC - Abo =?utf-8?q?actualit=C3=A9s?= digitales 20\n" - b" semaines =?utf-8?q?d=E2=80=99abonnement_=C3=A0?= 24 heures, Bilan, Tribune de\n" - b" =?utf-8?q?Gen=C3=A8ve?= et tous les titres Tamedia\n\n") + expected = ( + b"Subject: " + b"Re: [SOS-1495488] Commande et livraison - Demande de retour -\n" + b" bibijolie - 251210-AABBCC - Abo =?utf-8?q?actualit=C3=A9s?= digitales 20\n" + b" semaines =?utf-8?q?d=E2=80=99abonnement_=C3=A0?= 24 heures, Bilan, Tribune de\n" + b" =?utf-8?q?Gen=C3=A8ve?= et tous les titres Tamedia\n\n" + ) msg = EmailMessage() msg['Subject'] = source s = io.BytesIO() From 91b4b3b89538fa51efc3f76045b021e0e2684083 Mon Sep 17 00:00:00 2001 From: Robsdedude Date: Thu, 12 Feb 2026 21:44:40 +0000 Subject: [PATCH 06/12] Extend news fragment Co-authored-by: R. David Murray --- .../2026-02-10-22-05-51.gh-issue-144156.UbrC7F.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2026-02-10-22-05-51.gh-issue-144156.UbrC7F.rst b/Misc/NEWS.d/next/Core_and_Builtins/2026-02-10-22-05-51.gh-issue-144156.UbrC7F.rst index bc5e788fda675c..be8103b7aac408 100644 --- a/Misc/NEWS.d/next/Core_and_Builtins/2026-02-10-22-05-51.gh-issue-144156.UbrC7F.rst +++ b/Misc/NEWS.d/next/Core_and_Builtins/2026-02-10-22-05-51.gh-issue-144156.UbrC7F.rst @@ -1,3 +1,3 @@ Fix folding of email headers violating `RFC 2047`_ with two consecutive encoded words without separating linear-white-space. -.. _RFC 2047: https://www.rfc-editor.org/rfc/rfc2047 +Fix the folding of headers by the :mod:`email` library when :rfc:`2047` encoded words are used. Now whitespace is correctly preserved and also correctly added between adjacent encoded words. The latter property was broken by the fix for gh-92081, which mostly fixed previous failures to preserve whitespace. From d5f5001f96cfceb7d529ee07506ac1a227c1dfcc Mon Sep 17 00:00:00 2001 From: Robsdedude Date: Thu, 12 Feb 2026 22:39:35 +0100 Subject: [PATCH 07/12] Formatting --- Lib/email/_header_value_parser.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py index c5d400555b8422..f934a51c96a835 100644 --- a/Lib/email/_header_value_parser.py +++ b/Lib/email/_header_value_parser.py @@ -2939,8 +2939,15 @@ def _refold_parse_tree(parse_tree, *, policy): (last_charset == 'unknown-8bit' or last_charset == 'utf-8' and charset != 'us-ascii')): last_ew = None - last_ew = _fold_as_ew(tstr, lines, maxlen, last_ew, - part.ew_combine_allowed, charset, last_word_is_ew) + last_ew = _fold_as_ew( + tstr, + lines, + maxlen, + last_ew, + part.ew_combine_allowed, + charset, + last_word_is_ew, + ) last_word_is_ew = True last_charset = charset want_encoding = False From 15e6618cb0129003a72dcc803b0738b1027f12db Mon Sep 17 00:00:00 2001 From: Robsdedude Date: Thu, 12 Feb 2026 22:47:39 +0100 Subject: [PATCH 08/12] Fix comment --- Lib/email/_header_value_parser.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py index f934a51c96a835..1251949fb9a05d 100644 --- a/Lib/email/_header_value_parser.py +++ b/Lib/email/_header_value_parser.py @@ -2867,8 +2867,8 @@ def _refold_parse_tree(parse_tree, *, policy): encoding = 'utf-8' if policy.utf8 else 'us-ascii' lines = [''] # Folded lines to be output last_word_is_ew = False - last_ew = None # Points to the last encoded character if there's an ew on - # the line + last_ew = None # if there is an encoded word in the last line of lines, + # points to the encoded word's first character last_charset = None wrap_as_ew_blocked = 0 want_encoding = False # This is set to True if we need to encode this part From 902ec59bd567b256ff974f5bbb982026612637f3 Mon Sep 17 00:00:00 2001 From: Robsdedude Date: Thu, 12 Feb 2026 23:04:40 +0100 Subject: [PATCH 09/12] Faster and more readable last_word_is_ew tracking --- Lib/email/_header_value_parser.py | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py index 1251949fb9a05d..474a0d716e224d 100644 --- a/Lib/email/_header_value_parser.py +++ b/Lib/email/_header_value_parser.py @@ -80,7 +80,8 @@ # Useful constants and functions # -WSP = set(' \t') +_WSP = ' \t' +WSP = set(_WSP) CFWS_LEADER = WSP | set('(') SPECIALS = set(r'()<>@,:;.\"[]') ATOM_ENDS = SPECIALS | WSP @@ -2858,6 +2859,12 @@ def _steal_all_trailing_WSP_if_exists(lines): return ''.join(wsp_lines) +def _last_word_is_sill_ew(_last_word_is_ew, added_str): + # If the last word is an encoded word, and the added string is all WSP, + # then (and only then) is the last word is still an encoded word. + return _last_word_is_ew and not bool(added_str.strip(_WSP)) + + def _refold_parse_tree(parse_tree, *, policy): """Return string of contents of parse_tree folded according to RFC rules. @@ -2961,8 +2968,7 @@ def _refold_parse_tree(parse_tree, *, policy): if len(tstr) <= maxlen - len(lines[-1]): lines[-1] += tstr - if any(char not in WSP for char in tstr): - last_word_is_ew = False + last_word_is_ew = _last_word_is_sill_ew(last_word_is_ew, tstr) continue # This part is too long to fit. The RFC wants us to break at @@ -2973,8 +2979,9 @@ def _refold_parse_tree(parse_tree, *, policy): newline = _steal_trailing_WSP_if_exists(lines) if newline or part.startswith_fws(): lines.append(newline + tstr) - if any(char not in WSP for char in lines[-1]): - last_word_is_ew = False + last_word_is_ew = _last_word_is_sill_ew( + last_word_is_ew, lines[-1] + ) last_ew = None continue if not hasattr(part, 'encode'): @@ -3014,8 +3021,7 @@ def _refold_parse_tree(parse_tree, *, policy): else: # We can't fold it onto the next line either... lines[-1] += tstr - if any(char not in WSP for char in tstr): - last_word_is_ew = False + last_word_is_ew = _last_word_is_sill_ew(last_word_is_ew, tstr) return policy.linesep.join(lines) + policy.linesep From a1cfe3cfb2dd4c0dfcd9c9e608e22da4c0b3a9bc Mon Sep 17 00:00:00 2001 From: Robsdedude Date: Fri, 13 Feb 2026 18:20:40 +0100 Subject: [PATCH 10/12] Fix broken application of news fragment suggestion --- .../2026-02-10-22-05-51.gh-issue-144156.UbrC7F.rst | 2 -- 1 file changed, 2 deletions(-) diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2026-02-10-22-05-51.gh-issue-144156.UbrC7F.rst b/Misc/NEWS.d/next/Core_and_Builtins/2026-02-10-22-05-51.gh-issue-144156.UbrC7F.rst index be8103b7aac408..c4a065528512e1 100644 --- a/Misc/NEWS.d/next/Core_and_Builtins/2026-02-10-22-05-51.gh-issue-144156.UbrC7F.rst +++ b/Misc/NEWS.d/next/Core_and_Builtins/2026-02-10-22-05-51.gh-issue-144156.UbrC7F.rst @@ -1,3 +1 @@ -Fix folding of email headers violating `RFC 2047`_ with two consecutive encoded words without separating linear-white-space. - Fix the folding of headers by the :mod:`email` library when :rfc:`2047` encoded words are used. Now whitespace is correctly preserved and also correctly added between adjacent encoded words. The latter property was broken by the fix for gh-92081, which mostly fixed previous failures to preserve whitespace. From 45022be603f33d8d59c945f0347ab932ce72ce1f Mon Sep 17 00:00:00 2001 From: Robsdedude Date: Fri, 13 Feb 2026 18:37:12 +0100 Subject: [PATCH 11/12] Simplify code Make use of the fact that folder must never produce a line of only WSP. --- Lib/email/_header_value_parser.py | 27 ++++----------------------- 1 file changed, 4 insertions(+), 23 deletions(-) diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py index 474a0d716e224d..df3df50ff3162a 100644 --- a/Lib/email/_header_value_parser.py +++ b/Lib/email/_header_value_parser.py @@ -2837,28 +2837,6 @@ def _steal_trailing_WSP_if_exists(lines): return wsp -def _steal_all_trailing_WSP_if_exists(lines): - lines_popped = False - wsp_lines = [] - while lines and lines[-1]: - for i in range(len(lines[-1]), -1, -1): - if i <= 0 or lines[-1][i - 1] not in WSP: - break - wsp_line = lines[-1][i:] - if not wsp_line: - break - wsp_lines.insert(0, wsp_line) - lines[-1] = lines[-1][:i] - if not lines[-1]: - lines_popped = True - lines.pop() - else: - break - if lines_popped: - lines.append(' ' if lines else '') - return ''.join(wsp_lines) - - def _last_word_is_sill_ew(_last_word_is_ew, added_str): # If the last word is an encoded word, and the added string is all WSP, # then (and only then) is the last word is still an encoded word. @@ -3053,7 +3031,10 @@ def _fold_as_ew(to_encode, lines, maxlen, last_ew, ew_combine_allowed, charset, # any white space between the two will be ignored when decoded. # Therefore, we encode all to-be-displayed whitespace in the second # encoded word. - leading_whitespace = _steal_all_trailing_WSP_if_exists(lines) + len_without_wsp = len(lines[-1].rstrip(_WSP)) + leading_whitespace = lines[-1][len_without_wsp:] + lines[-1] = (lines[-1][:len_without_wsp] + + (' ' if leading_whitespace else '')) to_encode = leading_whitespace + to_encode trailing_wsp = '' From ccf399d1a325df948d633724d43641aec77b89dd Mon Sep 17 00:00:00 2001 From: Robsdedude Date: Fri, 13 Feb 2026 18:39:32 +0100 Subject: [PATCH 12/12] Fix related WSP folding bug --- Lib/email/_header_value_parser.py | 4 +++- Lib/test/test_email/test_generator.py | 24 ++++++++++++++++++++++ Lib/test/test_email/test_headerregistry.py | 4 ++-- Lib/test/test_email/test_policy.py | 2 +- 4 files changed, 30 insertions(+), 4 deletions(-) diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py index df3df50ff3162a..57cc4f845e7273 100644 --- a/Lib/email/_header_value_parser.py +++ b/Lib/email/_header_value_parser.py @@ -3058,7 +3058,9 @@ def _fold_as_ew(to_encode, lines, maxlen, last_ew, ew_combine_allowed, charset, remaining_space = maxlen - len(lines[-1]) text_space = remaining_space - chrome_len if text_space <= 0: - lines.append(' ') + newline = _steal_trailing_WSP_if_exists(lines) + lines.append(newline or ' ') + new_last_ew = len(lines[-1]) continue to_encode_word = to_encode[:text_space] diff --git a/Lib/test/test_email/test_generator.py b/Lib/test/test_email/test_generator.py index 1c1f9b657bf0f8..c2d7d09d591e86 100644 --- a/Lib/test/test_email/test_generator.py +++ b/Lib/test/test_email/test_generator.py @@ -413,6 +413,30 @@ def test_defaults_handle_spaces_at_start_of_continuation_line_2(self): g.flatten(msg) self.assertEqual(s.getvalue(), expected) + def test_ew_folding_round_trip_1(self): + print() + source = "aaaaaaaaa фффффффф " + msg = EmailMessage() + msg['Subject'] = source + s = io.BytesIO() + g = BytesGenerator(s, maxheaderlen=30) + g.flatten(msg) + flat = s.getvalue() + reparsed = message_from_bytes(flat, policy=policy.default)['Subject'] + self.assertMultiLineEqual(reparsed, source) + + def test_ew_folding_round_trip_2(self): + print() + source = "aaa aaaaaaa aaa ффф фффф " + msg = EmailMessage() + msg['Subject'] = source + s = io.BytesIO() + g = BytesGenerator(s, maxheaderlen=30) + g.flatten(msg) + flat = s.getvalue() + reparsed = message_from_bytes(flat, policy=policy.default)['Subject'] + self.assertMultiLineEqual(reparsed, source) + def test_cte_type_7bit_handles_unknown_8bit(self): source = ("Subject: Maintenant je vous présente mon " "collègue\n\n").encode('utf-8') diff --git a/Lib/test/test_email/test_headerregistry.py b/Lib/test/test_email/test_headerregistry.py index 95c6afbee41ef5..c9c63951597244 100644 --- a/Lib/test/test_email/test_headerregistry.py +++ b/Lib/test/test_email/test_headerregistry.py @@ -1711,7 +1711,7 @@ def test_fold_unstructured_with_overlong_word(self): 'singlewordthatwontfit') self.assertEqual( h.fold(policy=policy.default.clone(max_line_length=20)), - 'Subject: \n' + 'Subject:\n' ' =?utf-8?q?thisisa?=\n' ' =?utf-8?q?verylon?=\n' ' =?utf-8?q?glineco?=\n' @@ -1727,7 +1727,7 @@ def test_fold_unstructured_with_two_overlong_words(self): 'singlewordthatwontfit plusanotherverylongwordthatwontfit') self.assertEqual( h.fold(policy=policy.default.clone(max_line_length=20)), - 'Subject: \n' + 'Subject:\n' ' =?utf-8?q?thisisa?=\n' ' =?utf-8?q?verylon?=\n' ' =?utf-8?q?glineco?=\n' diff --git a/Lib/test/test_email/test_policy.py b/Lib/test/test_email/test_policy.py index 71ec0febb0fd86..90e8e5580295f9 100644 --- a/Lib/test/test_email/test_policy.py +++ b/Lib/test/test_email/test_policy.py @@ -273,7 +273,7 @@ def test_non_ascii_chars_do_not_cause_inf_loop(self): actual = policy.fold('Subject', 'ą' * 12) self.assertEqual( actual, - 'Subject: \n' + + 'Subject:\n' + 12 * ' =?utf-8?q?=C4=85?=\n') def test_short_maxlen_error(self):