@@ -249,6 +249,33 @@ def test_refresh_control(self):
249249 self .assertIs (win .is_wintouched (), syncok )
250250 self .assertIs (stdscr .is_wintouched (), syncok )
251251
252+ # Many tests below use a common set of non-ASCII cases, each applied only
253+ # when the window encoding can represent it -- so the whole suite is meant to
254+ # be run under several locales (e.g. ISO-8859-1, ISO-8859-15, KOI8-U):
255+ # 'A'/'a' ASCII
256+ # 'é' common to the Latin encodings
257+ # '¤'/'€'/'є' byte 0xA4 in ISO-8859-1 / ISO-8859-15 / KOI8-U
258+ # Precomposed characters are used so a round-trip does not depend on the form.
259+
260+ def _encodable (self , s ):
261+ # Wide characters are only supported in a locale that can encode them.
262+ try :
263+ s .encode (self .stdscr .encoding )
264+ except UnicodeEncodeError :
265+ return False
266+ return True
267+
268+ def _read_char (self , y , x ):
269+ # The character written to a cell, read back for output checks. inch()
270+ # is unusable here: on a wide build it returns the low 8 bits of the
271+ # character's code point rather than its locale-encoded byte, mangling
272+ # anything outside Latin-1. in_wch() reads the wide cell directly;
273+ # without it, instr() re-encodes the cell to the window encoding.
274+ stdscr = self .stdscr
275+ if hasattr (stdscr , 'in_wch' ):
276+ return str (stdscr .in_wch (y , x ))
277+ return stdscr .instr (y , x , 1 ).decode (stdscr .encoding )
278+
252279 def test_output_character (self ):
253280 stdscr = self .stdscr
254281 encoding = stdscr .encoding
@@ -258,32 +285,98 @@ def test_output_character(self):
258285 stdscr .addch ('A' )
259286 stdscr .addch (b'A' )
260287 stdscr .addch (65 )
261- c = '\u20ac '
262- try :
263- stdscr .addch (c )
264- except UnicodeEncodeError :
265- self .assertRaises (UnicodeEncodeError , c .encode , encoding )
266- except OverflowError :
267- encoded = c .encode (encoding )
268- self .assertNotEqual (len (encoded ), 1 , repr (encoded ))
288+ # See _encodable for the character set. Each is either written (mapped
289+ # to a single byte), or raises UnicodeEncodeError (not in the encoding)
290+ # or OverflowError (a multibyte sequence, e.g. in UTF-8).
291+ for c in ('A' , '\u00e9 ' , '\u00a4 ' , '\u20ac ' , '\u0454 ' ):
292+ try :
293+ stdscr .addch (c )
294+ except UnicodeEncodeError :
295+ self .assertRaises (UnicodeEncodeError , c .encode , encoding )
296+ except OverflowError :
297+ encoded = c .encode (encoding )
298+ self .assertNotEqual (len (encoded ), 1 , repr (encoded ))
269299 stdscr .addch ('A' , curses .A_BOLD )
270300 stdscr .addch (1 , 2 , 'A' )
271301 stdscr .addch (2 , 3 , 'A' , curses .A_BOLD )
272302 self .assertIs (stdscr .is_wintouched (), True )
273303
304+ # The same characters supplied as an int chtype (a byte > 127). The
305+ # cell is read back with _read_char(), not inch(): on a wide build the
306+ # int is stored through the locale as a wide character that inch()
307+ # cannot represent for a character outside Latin-1.
308+ for c in ('é' , '¤' , '€' , 'є' ):
309+ try :
310+ b = c .encode (encoding )
311+ except UnicodeEncodeError :
312+ continue
313+ if len (b ) != 1 :
314+ continue
315+ # A wide build stores a character outside Latin-1 as a wide cell,
316+ # not as its encoded byte, so it cannot round-trip here.
317+ if ord (c ) > 0xff and hasattr (stdscr , 'get_wch' ):
318+ continue
319+ v = b [0 ]
320+ with self .subTest (c = c ):
321+ stdscr .addch (0 , 0 , v )
322+ self .assertEqual (self ._read_char (0 , 0 ), c )
323+ stdscr .addch (0 , 1 , v , curses .A_BOLD )
324+ self .assertEqual (self ._read_char (0 , 1 ), c )
325+ self .assertTrue (stdscr .inch (0 , 1 ) & curses .A_BOLD )
326+ stdscr .move (2 , 0 )
327+ stdscr .echochar (v )
328+ self .assertEqual (self ._read_char (2 , 0 ), c )
329+ # insch() round-trips a byte only where its code point equals
330+ # the byte value (Latin-1): on a wide build ncurses winsch
331+ # stores a printable byte directly as a code point instead of
332+ # decoding it through the locale.
333+ if ord (c ) < 0x100 :
334+ stdscr .insch (1 , 0 , v )
335+ self .assertEqual (self ._read_char (1 , 0 ), c )
336+
337+ # The same characters supplied as a str. Unlike the int path above, a
338+ # str is stored as a wide-character cell on a wide build, so every
339+ # encodable character round-trips, insch() included. A multibyte
340+ # character does not fit a cell on a narrow build and is skipped.
341+ wide = hasattr (stdscr , 'in_wch' )
342+ for c in ('é' , '¤' , '€' , 'є' ):
343+ if not self ._encodable (c ):
344+ continue
345+ if not wide and len (c .encode (encoding )) != 1 :
346+ continue
347+ # A wide build stores a character outside Latin-1 as a wide cell,
348+ # not as its encoded byte, so it cannot round-trip here.
349+ if ord (c ) > 0xff and hasattr (stdscr , 'get_wch' ):
350+ continue
351+ with self .subTest (c = c ):
352+ stdscr .addch (0 , 0 , c )
353+ self .assertEqual (self ._read_char (0 , 0 ), c )
354+ stdscr .addch (0 , 1 , c , curses .A_BOLD )
355+ self .assertEqual (self ._read_char (0 , 1 ), c )
356+ self .assertTrue (stdscr .inch (0 , 1 ) & curses .A_BOLD )
357+ stdscr .insch (1 , 0 , c )
358+ self .assertEqual (self ._read_char (1 , 0 ), c )
359+ stdscr .move (2 , 0 )
360+ stdscr .echochar (c )
361+ self .assertEqual (self ._read_char (2 , 0 ), c )
362+
274363 # echochar()
275364 stdscr .refresh ()
276365 stdscr .move (0 , 0 )
277366 stdscr .echochar ('A' )
278367 stdscr .echochar (b'A' )
279368 stdscr .echochar (65 )
280- with self .assertRaises ((UnicodeEncodeError , OverflowError )):
281- # Unicode is not fully supported yet, but at least it does
282- # not crash.
283- # It is supposed to fail because either the character is
284- # not encodable with the current encoding, or it is encoded to
285- # a multibyte sequence.
286- stdscr .echochar ('\u0114 ' )
369+ # See _encodable for the character set; as in the addch() loop above.
370+ for c in ('A' , '\u00e9 ' , '\u00a4 ' , '\u20ac ' , '\u0454 ' ):
371+ try :
372+ stdscr .echochar (c )
373+ except UnicodeEncodeError :
374+ # The character is not encodable with the current encoding.
375+ self .assertRaises (UnicodeEncodeError , c .encode , encoding )
376+ except OverflowError :
377+ # The character is encoded to a multibyte sequence.
378+ encoded = c .encode (encoding )
379+ self .assertNotEqual (len (encoded ), 1 , repr (encoded ))
287380 stdscr .echochar ('A' , curses .A_BOLD )
288381 self .assertIs (stdscr .is_wintouched (), False )
289382
@@ -293,14 +386,18 @@ def test_output_string(self):
293386 # addstr()/insstr()
294387 for func in [stdscr .addstr , stdscr .insstr ]:
295388 with self .subTest (func .__qualname__ ):
296- stdscr .move (0 , 0 )
297389 func ('abcd' )
298390 func (b'abcd' )
299- s = 'àßçđ'
300- try :
301- func (s )
302- except UnicodeEncodeError :
303- self .assertRaises (UnicodeEncodeError , s .encode , encoding )
391+ # Common and encoding-distinctive strings (see _encodable for the
392+ # 0xA4 set); 'àßçđ' is UTF-8-only. Each is written if the
393+ # encoding allows, else raises UnicodeEncodeError.
394+ for s in ('soupçon' , 'àßçđ' , 'soupçon ¤' , 'soupçon €' , 'дякую' ):
395+ stdscr .move (0 , 0 )
396+ try :
397+ func (s )
398+ except UnicodeEncodeError :
399+ self .assertRaises (UnicodeEncodeError , s .encode , encoding )
400+ stdscr .move (0 , 0 )
304401 func ('abcd' , curses .A_BOLD )
305402 func (1 , 2 , 'abcd' )
306403 func (2 , 3 , 'abcd' , curses .A_BOLD )
@@ -311,11 +408,14 @@ def test_output_string(self):
311408 stdscr .move (0 , 0 )
312409 func ('1234' , 3 )
313410 func (b'1234' , 3 )
314- s = '\u0661 \u0662 \u0663 \u0664 '
315- try :
316- func (s , 3 )
317- except UnicodeEncodeError :
318- self .assertRaises (UnicodeEncodeError , s .encode , encoding )
411+ # As above (see _encodable); Arabic-Indic digits are UTF-8-only.
412+ for s in ('caf\u00e9 ' , '\u0661 \u0662 \u0663 \u0664 ' , 'caf\u00e9 \u00a4 ' , 'caf\u00e9 \u20ac ' , '\u0434 \u044f \u043a \u0443 \u044e ' ):
413+ stdscr .move (0 , 0 )
414+ try :
415+ func (s , 3 )
416+ except UnicodeEncodeError :
417+ self .assertRaises (UnicodeEncodeError , s .encode , encoding )
418+ stdscr .move (0 , 0 )
319419 func ('1234' , 5 )
320420 func ('1234' , 3 , curses .A_BOLD )
321421 func (1 , 2 , '1234' , 3 )
@@ -405,6 +505,24 @@ def test_read_from_window(self):
405505 self .assertEqual (stdscr .instr (0 , 2 , 4 ), b'BCD ' )
406506 self .assertRaises (ValueError , stdscr .instr , - 2 )
407507 self .assertRaises (ValueError , stdscr .instr , 0 , 2 , - 2 )
508+ # A non-ASCII character of an 8-bit locale reads back as its encoded
509+ # byte (see _encodable for the set). instr() returns the locale bytes
510+ # for any single-byte character; inch() packs the text into a chtype, so
511+ # on a wide build it only round-trips a Latin-1 codepoint (byte ==
512+ # codepoint).
513+ encoding = stdscr .encoding
514+ for ch in ('A' , 'é' , '¤' , '€' , 'є' ):
515+ try :
516+ b = ch .encode (encoding )
517+ except UnicodeEncodeError :
518+ continue
519+ if len (b ) != 1 :
520+ continue
521+ with self .subTest (ch = ch ):
522+ stdscr .addstr (2 , 0 , ch )
523+ self .assertEqual (stdscr .instr (2 , 0 , 1 ), b )
524+ if ord (ch ) < 0x100 :
525+ self .assertEqual (stdscr .inch (2 , 0 ) & curses .A_CHARTEXT , b [0 ])
408526
409527 def test_coordinate_errors (self ):
410528 # Addressing a cell outside the window raises curses.error.
@@ -441,6 +559,10 @@ def test_getch(self):
441559 self .assertEqual (win .getch (), b'm' [0 ])
442560 self .assertEqual (win .getch (), b'\n ' [0 ])
443561
562+ # A key value > 127 is delivered unchanged (it is not locale text).
563+ curses .ungetch (0xE9 )
564+ self .assertEqual (win .getch (), 0xE9 )
565+
444566 def test_getstr (self ):
445567 win = curses .newwin (5 , 12 , 5 , 2 )
446568 curses .echo ()
@@ -613,6 +735,33 @@ def test_background(self):
613735 self .assertEqual (win .inch (0 , 0 ), b'L' [0 ] | curses .A_REVERSE )
614736 self .assertEqual (win .inch (0 , 5 ), b'#' [0 ] | curses .A_REVERSE )
615737
738+ # A non-ASCII background character of an 8-bit locale reads back as its
739+ # encoded byte. See _encodable for the character set.
740+ win .bkgd (' ' )
741+ encoding = win .encoding
742+ for ch in ('é' , '¤' , '€' , 'є' ):
743+ try :
744+ b = ch .encode (encoding )
745+ except UnicodeEncodeError :
746+ continue
747+ if len (b ) != 1 :
748+ continue
749+ # A wide build stores a character outside Latin-1 as a wide cell,
750+ # not as its encoded byte, so it cannot round-trip here.
751+ if ord (ch ) > 0xff and hasattr (win , 'get_wch' ):
752+ continue
753+ with self .subTest (ch = ch ):
754+ win .bkgd (ch )
755+ self .assertEqual (win .getbkgd (), b [0 ])
756+ if ord (ch ) < 0x100 :
757+ # The same byte given as an int. A wide build stores it
758+ # through the locale, so only a Latin-1 byte round-trips.
759+ win .bkgd (' ' )
760+ win .bkgdset (b [0 ])
761+ self .assertEqual (win .getbkgd (), b [0 ])
762+ win .bkgd (b [0 ])
763+ self .assertEqual (win .getbkgd (), b [0 ])
764+
616765 def test_overlay (self ):
617766 srcwin = curses .newwin (5 , 18 , 3 , 4 )
618767 lorem_ipsum (srcwin )
@@ -705,6 +854,16 @@ def test_borders_and_lines(self):
705854 win .border (65 , 66 )
706855 win .border (65 )
707856 win .border ()
857+ # With no arguments, border() fills the edges with ACS line and corner
858+ # characters.
859+ chartext = curses .A_CHARTEXT
860+ maxy , maxx = win .getmaxyx ()
861+ self .assertEqual (win .inch (0 , 0 ) & chartext , curses .ACS_ULCORNER & chartext )
862+ self .assertEqual (win .inch (0 , maxx - 1 ) & chartext , curses .ACS_URCORNER & chartext )
863+ self .assertEqual (win .inch (maxy - 1 , 0 ) & chartext , curses .ACS_LLCORNER & chartext )
864+ self .assertEqual (win .inch (maxy - 1 , maxx - 1 ) & chartext , curses .ACS_LRCORNER & chartext )
865+ self .assertEqual (win .inch (0 , 1 ) & chartext , curses .ACS_HLINE & chartext )
866+ self .assertEqual (win .inch (1 , 0 ) & chartext , curses .ACS_VLINE & chartext )
708867
709868 win .box (':' , '~' )
710869 self .assertEqual (win .instr (0 , 1 , 8 ), b'~~~~~~~~' )
@@ -715,6 +874,11 @@ def test_borders_and_lines(self):
715874 self .assertRaises (TypeError , win .box , 65 , 66 , 67 )
716875 self .assertRaises (TypeError , win .box , 65 )
717876 win .box ()
877+ # With no arguments, box() likewise draws ACS corners and lines.
878+ self .assertEqual (win .inch (0 , 0 ) & chartext , curses .ACS_ULCORNER & chartext )
879+ self .assertEqual (win .inch (0 , maxx - 1 ) & chartext , curses .ACS_URCORNER & chartext )
880+ self .assertEqual (win .inch (0 , 1 ) & chartext , curses .ACS_HLINE & chartext )
881+ self .assertEqual (win .inch (1 , 0 ) & chartext , curses .ACS_VLINE & chartext )
718882
719883 win .move (1 , 2 )
720884 win .hline ('-' , 5 )
@@ -736,6 +900,43 @@ def test_borders_and_lines(self):
736900 self .assertEqual (win .inch (2 , 1 ), b';' [0 ] | curses .A_STANDOUT )
737901 self .assertEqual (win .inch (3 , 1 ), b'a' [0 ])
738902
903+ # A border or line character of an 8-bit locale round-trips as its
904+ # encoded byte. See _encodable for the character set.
905+ encoding = win .encoding
906+ for ch in ('é' , '¤' , '€' , 'є' ):
907+ try :
908+ b = ch .encode (encoding )
909+ except UnicodeEncodeError :
910+ continue
911+ if len (b ) != 1 :
912+ continue
913+ # A wide build stores a character outside Latin-1 as a wide cell,
914+ # not as its encoded byte, so it cannot round-trip here.
915+ if ord (ch ) > 0xff and hasattr (win , 'get_wch' ):
916+ continue
917+ with self .subTest (ch = ch ):
918+ win .erase ()
919+ win .hline (2 , 0 , ch , 5 )
920+ self .assertEqual (win .instr (2 , 0 , 5 ), b * 5 )
921+ win .vline (0 , 0 , ch , 3 )
922+ self .assertEqual (win .instr (0 , 0 , 1 ), b )
923+ self .assertEqual (win .instr (1 , 0 , 1 ), b )
924+ win .border (ch , ch , ch , ch , ch , ch , ch , ch )
925+ self .assertEqual (win .instr (0 , 0 ), b * maxx )
926+ if ord (ch ) < 0x100 :
927+ # The same byte given as an int. A wide build stores it
928+ # through the locale, so only a Latin-1 byte round-trips.
929+ v = b [0 ]
930+ win .erase ()
931+ win .hline (2 , 0 , v , 5 )
932+ self .assertEqual (win .instr (2 , 0 , 5 ), b * 5 )
933+ win .vline (0 , 0 , v , 3 )
934+ self .assertEqual (win .instr (1 , 0 , 1 ), b )
935+ win .border (v , v , v , v , v , v , v , v )
936+ self .assertEqual (win .instr (0 , 0 ), b * maxx )
937+ win .box (v , v )
938+ self .assertEqual (win .instr (0 , 1 , 1 ), b )
939+
739940 def test_unctrl (self ):
740941 # TODO: wunctrl()
741942 self .assertEqual (curses .unctrl (b'A' ), b'A' )
@@ -744,6 +945,19 @@ def test_unctrl(self):
744945 self .assertEqual (curses .unctrl (b'\n ' ), b'^J' )
745946 self .assertEqual (curses .unctrl ('\n ' ), b'^J' )
746947 self .assertEqual (curses .unctrl (10 ), b'^J' )
948+ # A printable non-ASCII byte of an 8-bit locale is returned unchanged.
949+ # See _encodable for the character set.
950+ encoding = self .stdscr .encoding
951+ for ch in ('é' , '¤' , '€' , 'є' ):
952+ try :
953+ b = ch .encode (encoding )
954+ except UnicodeEncodeError :
955+ continue
956+ if len (b ) != 1 :
957+ continue
958+ with self .subTest (ch = ch ):
959+ self .assertEqual (curses .unctrl (ch ), b )
960+ self .assertEqual (curses .unctrl (b [0 ]), b ) # the byte as an int
747961 self .assertRaises (TypeError , curses .unctrl , b'' )
748962 self .assertRaises (TypeError , curses .unctrl , b'AB' )
749963 self .assertRaises (TypeError , curses .unctrl , '' )
@@ -1455,7 +1669,8 @@ def test_issue6243(self):
14551669 def test_unget_wch (self ):
14561670 stdscr = self .stdscr
14571671 encoding = stdscr .encoding
1458- for ch in ('a' , '\xe9 ' , '\u20ac ' , '\U0010FFFF ' ):
1672+ # See _encodable for the character set, plus a non-BMP character.
1673+ for ch in ('a' , '\xe9 ' , '\xa4 ' , '\u20ac ' , '\u0454 ' , '\U0010FFFF ' ):
14591674 try :
14601675 ch .encode (encoding )
14611676 except UnicodeEncodeError :
0 commit comments