8080# Useful constants and functions
8181#
8282
83- WSP = set (' \t ' )
83+ _WSP = ' \t '
84+ WSP = set (_WSP )
8485CFWS_LEADER = WSP | set ('(' )
8586SPECIALS = set (r'()<>@,:;.\"[]' )
8687ATOM_ENDS = SPECIALS | WSP
@@ -2831,6 +2832,7 @@ def _steal_trailing_WSP_if_exists(lines):
28312832 lines .pop ()
28322833 return wsp
28332834
2835+
28342836def _refold_parse_tree (parse_tree , * , policy ):
28352837 """Return string of contents of parse_tree folded according to RFC rules.
28362838
@@ -2839,11 +2841,9 @@ def _refold_parse_tree(parse_tree, *, policy):
28392841 maxlen = policy .max_line_length or sys .maxsize
28402842 encoding = 'utf-8' if policy .utf8 else 'us-ascii'
28412843 lines = ['' ] # Folded lines to be output
2842- leading_whitespace = '' # When we have whitespace between two encoded
2843- # words, we may need to encode the whitespace
2844- # at the beginning of the second word.
2845- last_ew = None # Points to the last encoded character if there's an ew on
2846- # the line
2844+ last_word_is_ew = False
2845+ last_ew = None # if there is an encoded word in the last line of lines,
2846+ # points to the encoded word's first character
28472847 last_charset = None
28482848 wrap_as_ew_blocked = 0
28492849 want_encoding = False # This is set to True if we need to encode this part
@@ -2878,6 +2878,7 @@ def _refold_parse_tree(parse_tree, *, policy):
28782878 if part .token_type == 'mime-parameters' :
28792879 # Mime parameter folding (using RFC2231) is extra special.
28802880 _fold_mime_parameters (part , lines , maxlen , encoding )
2881+ last_word_is_ew = False
28812882 continue
28822883
28832884 if want_encoding and not wrap_as_ew_blocked :
@@ -2894,6 +2895,7 @@ def _refold_parse_tree(parse_tree, *, policy):
28942895 # XXX what if encoded_part has no leading FWS?
28952896 lines .append (newline )
28962897 lines [- 1 ] += encoded_part
2898+ last_word_is_ew = False
28972899 continue
28982900 # Either this is not a major syntactic break, so we don't
28992901 # want it on a line by itself even if it fits, or it
@@ -2912,11 +2914,16 @@ def _refold_parse_tree(parse_tree, *, policy):
29122914 (last_charset == 'unknown-8bit' or
29132915 last_charset == 'utf-8' and charset != 'us-ascii' )):
29142916 last_ew = None
2915- last_ew = _fold_as_ew (tstr , lines , maxlen , last_ew ,
2916- part .ew_combine_allowed , charset , leading_whitespace )
2917- # This whitespace has been added to the lines in _fold_as_ew()
2918- # so clear it now.
2919- leading_whitespace = ''
2917+ last_ew = _fold_as_ew (
2918+ tstr ,
2919+ lines ,
2920+ maxlen ,
2921+ last_ew ,
2922+ part .ew_combine_allowed ,
2923+ charset ,
2924+ last_word_is_ew ,
2925+ )
2926+ last_word_is_ew = True
29202927 last_charset = charset
29212928 want_encoding = False
29222929 continue
@@ -2929,28 +2936,19 @@ def _refold_parse_tree(parse_tree, *, policy):
29292936
29302937 if len (tstr ) <= maxlen - len (lines [- 1 ]):
29312938 lines [- 1 ] += tstr
2939+ last_word_is_ew = last_word_is_ew and not bool (tstr .strip (_WSP ))
29322940 continue
29332941
29342942 # This part is too long to fit. The RFC wants us to break at
29352943 # "major syntactic breaks", so unless we don't consider this
29362944 # to be one, check if it will fit on the next line by itself.
2937- leading_whitespace = ''
29382945 if (part .syntactic_break and
29392946 len (tstr ) + 1 <= maxlen ):
29402947 newline = _steal_trailing_WSP_if_exists (lines )
29412948 if newline or part .startswith_fws ():
2942- # We're going to fold the data onto a new line here. Due to
2943- # the way encoded strings handle continuation lines, we need to
2944- # be prepared to encode any whitespace if the next line turns
2945- # out to start with an encoded word.
29462949 lines .append (newline + tstr )
2947-
2948- whitespace_accumulator = []
2949- for char in lines [- 1 ]:
2950- if char not in WSP :
2951- break
2952- whitespace_accumulator .append (char )
2953- leading_whitespace = '' .join (whitespace_accumulator )
2950+ last_word_is_ew = (last_word_is_ew
2951+ and not bool (lines [- 1 ].strip (_WSP )))
29542952 last_ew = None
29552953 continue
29562954 if not hasattr (part , 'encode' ):
@@ -2990,10 +2988,11 @@ def _refold_parse_tree(parse_tree, *, policy):
29902988 else :
29912989 # We can't fold it onto the next line either...
29922990 lines [- 1 ] += tstr
2991+ last_word_is_ew = last_word_is_ew and not bool (tstr .strip (_WSP ))
29932992
29942993 return policy .linesep .join (lines ) + policy .linesep
29952994
2996- def _fold_as_ew (to_encode , lines , maxlen , last_ew , ew_combine_allowed , charset , leading_whitespace ):
2995+ def _fold_as_ew (to_encode , lines , maxlen , last_ew , ew_combine_allowed , charset , last_word_is_ew ):
29972996 """Fold string to_encode into lines as encoded word, combining if allowed.
29982997 Return the new value for last_ew, or None if ew_combine_allowed is False.
29992998
@@ -3008,6 +3007,16 @@ def _fold_as_ew(to_encode, lines, maxlen, last_ew, ew_combine_allowed, charset,
30083007 to_encode = str (
30093008 get_unstructured (lines [- 1 ][last_ew :] + to_encode ))
30103009 lines [- 1 ] = lines [- 1 ][:last_ew ]
3010+ elif last_word_is_ew :
3011+ # If we are following up an encoded word with another encoded word,
3012+ # any white space between the two will be ignored when decoded.
3013+ # Therefore, we encode all to-be-displayed whitespace in the second
3014+ # encoded word.
3015+ len_without_wsp = len (lines [- 1 ].rstrip (_WSP ))
3016+ leading_whitespace = lines [- 1 ][len_without_wsp :]
3017+ lines [- 1 ] = (lines [- 1 ][:len_without_wsp ]
3018+ + (' ' if leading_whitespace else '' ))
3019+ to_encode = leading_whitespace + to_encode
30113020 elif to_encode [0 ] in WSP :
30123021 # We're joining this to non-encoded text, so don't encode
30133022 # the leading blank.
@@ -3036,20 +3045,13 @@ def _fold_as_ew(to_encode, lines, maxlen, last_ew, ew_combine_allowed, charset,
30363045
30373046 while to_encode :
30383047 remaining_space = maxlen - len (lines [- 1 ])
3039- text_space = remaining_space - chrome_len - len ( leading_whitespace )
3048+ text_space = remaining_space - chrome_len
30403049 if text_space <= 0 :
3041- lines .append (' ' )
3050+ newline = _steal_trailing_WSP_if_exists (lines )
3051+ lines .append (newline or ' ' )
3052+ new_last_ew = len (lines [- 1 ])
30423053 continue
30433054
3044- # If we are at the start of a continuation line, prepend whitespace
3045- # (we only want to do this when the line starts with an encoded word
3046- # but if we're folding in this helper function, then we know that we
3047- # are going to be writing out an encoded word.)
3048- if len (lines ) > 1 and len (lines [- 1 ]) == 1 and leading_whitespace :
3049- encoded_word = _ew .encode (leading_whitespace , charset = encode_as )
3050- lines [- 1 ] += encoded_word
3051- leading_whitespace = ''
3052-
30533055 to_encode_word = to_encode [:text_space ]
30543056 encoded_word = _ew .encode (to_encode_word , charset = encode_as )
30553057 excess = len (encoded_word ) - remaining_space
@@ -3061,7 +3063,6 @@ def _fold_as_ew(to_encode, lines, maxlen, last_ew, ew_combine_allowed, charset,
30613063 excess = len (encoded_word ) - remaining_space
30623064 lines [- 1 ] += encoded_word
30633065 to_encode = to_encode [len (to_encode_word ):]
3064- leading_whitespace = ''
30653066
30663067 if to_encode :
30673068 lines .append (' ' )
0 commit comments