@@ -128,6 +128,60 @@ def test_function_checksum(self):
128128 result = h .hexdigest ()
129129 self .assertEqual (result , self .expectedchecksum )
130130
131+ def test_name (self ):
132+ name = self .db .name
133+ self .assertRaises (ValueError , name , '\0 ' )
134+ self .assertRaises (ValueError , name , '\n ' )
135+ self .assertRaises (ValueError , name , '\x1F ' )
136+ self .assertRaises (ValueError , name , '\x7F ' )
137+ self .assertRaises (ValueError , name , '\x9F ' )
138+ self .assertRaises (ValueError , name , '\uFFFE ' )
139+ self .assertRaises (ValueError , name , '\uFFFF ' )
140+ self .assertRaises (ValueError , name , '\U0010FFFF ' )
141+ self .assertEqual (name ('\U0010FFFF ' , 42 ), 42 )
142+
143+ self .assertEqual (name (' ' ), 'SPACE' )
144+ self .assertEqual (name ('1' ), 'DIGIT ONE' )
145+ self .assertEqual (name ('A' ), 'LATIN CAPITAL LETTER A' )
146+ self .assertEqual (name ('\xA0 ' ), 'NO-BREAK SPACE' )
147+ self .assertEqual (name ('\u0221 ' , None ), None if self .old else
148+ 'LATIN SMALL LETTER D WITH CURL' )
149+ self .assertEqual (name ('\u3400 ' ), 'CJK UNIFIED IDEOGRAPH-3400' )
150+ self .assertEqual (name ('\u9FA5 ' ), 'CJK UNIFIED IDEOGRAPH-9FA5' )
151+ self .assertEqual (name ('\uAC00 ' ), 'HANGUL SYLLABLE GA' )
152+ self .assertEqual (name ('\uD7A3 ' ), 'HANGUL SYLLABLE HIH' )
153+ self .assertEqual (name ('\uF900 ' ), 'CJK COMPATIBILITY IDEOGRAPH-F900' )
154+ self .assertEqual (name ('\uFA6A ' ), 'CJK COMPATIBILITY IDEOGRAPH-FA6A' )
155+ self .assertEqual (name ('\uFBF9 ' ),
156+ 'ARABIC LIGATURE UIGHUR KIRGHIZ YEH WITH HAMZA '
157+ 'ABOVE WITH ALEF MAKSURA ISOLATED FORM' )
158+ self .assertEqual (name ('\U00013460 ' , None ), None if self .old else
159+ 'EGYPTIAN HIEROGLYPH-13460' )
160+ self .assertEqual (name ('\U000143FA ' , None ), None if self .old else
161+ 'EGYPTIAN HIEROGLYPH-143FA' )
162+ self .assertEqual (name ('\U00017000 ' , None ), None if self .old else
163+ 'TANGUT IDEOGRAPH-17000' )
164+ self .assertEqual (name ('\U00018B00 ' , None ), None if self .old else
165+ 'KHITAN SMALL SCRIPT CHARACTER-18B00' )
166+ self .assertEqual (name ('\U00018CD5 ' , None ), None if self .old else
167+ 'KHITAN SMALL SCRIPT CHARACTER-18CD5' )
168+ self .assertEqual (name ('\U00018CFF ' , None ), None if self .old else
169+ 'KHITAN SMALL SCRIPT CHARACTER-18CFF' )
170+ self .assertEqual (name ('\U00018D1E ' , None ), None if self .old else
171+ 'TANGUT IDEOGRAPH-18D1E' )
172+ self .assertEqual (name ('\U0001B170 ' , None ), None if self .old else
173+ 'NUSHU CHARACTER-1B170' )
174+ self .assertEqual (name ('\U0001B2FB ' , None ), None if self .old else
175+ 'NUSHU CHARACTER-1B2FB' )
176+ self .assertEqual (name ('\U0001FBA8 ' , None ), None if self .old else
177+ 'BOX DRAWINGS LIGHT DIAGONAL UPPER CENTRE TO '
178+ 'MIDDLE LEFT AND MIDDLE RIGHT TO LOWER CENTRE' )
179+ self .assertEqual (name ('\U0002A6D6 ' ), 'CJK UNIFIED IDEOGRAPH-2A6D6' )
180+ self .assertEqual (name ('\U0002FA1D ' ), 'CJK COMPATIBILITY IDEOGRAPH-2FA1D' )
181+ self .assertEqual (name ('\U00033479 ' , None ), None if self .old else
182+ 'CJK UNIFIED IDEOGRAPH-33479' )
183+
184+ @requires_resource ('cpu' )
131185 def test_name_inverse_lookup (self ):
132186 for char in iterallchars ():
133187 looked_name = self .db .name (char , None )
@@ -151,6 +205,17 @@ def test_lookup_nonexistant(self):
151205 "HANDBUG" ,
152206 "MODIFIER LETTER CYRILLIC SMALL QUESTION MARK" ,
153207 "???" ,
208+ "CJK UNIFIED IDEOGRAPH-03400" ,
209+ "CJK UNIFIED IDEOGRAPH-020000" ,
210+ "CJK UNIFIED IDEOGRAPH-33FF" ,
211+ "CJK UNIFIED IDEOGRAPH-F900" ,
212+ "CJK UNIFIED IDEOGRAPH-13460" ,
213+ "CJK UNIFIED IDEOGRAPH-17000" ,
214+ "CJK UNIFIED IDEOGRAPH-18B00" ,
215+ "CJK UNIFIED IDEOGRAPH-1B170" ,
216+ "CJK COMPATIBILITY IDEOGRAPH-3400" ,
217+ "TANGUT IDEOGRAPH-3400" ,
218+ "HANGUL SYLLABLE AC00" ,
154219 ]:
155220 self .assertRaises (KeyError , self .db .lookup , nonexistent )
156221
@@ -613,7 +678,47 @@ class UnicodeFunctionsTest(unittest.TestCase, BaseUnicodeFunctionsTest):
613678 # (e.g. 'make distclean && make') to get the correct checksum.
614679 expectedchecksum = ('83cc43a2fbb779185832b4c049217d80b05bf349'
615680 if quicktest else
616- '65670ae03a324c5f9e826a4de3e25bae4d73c9b7' )
681+ '180bdc91143d8aa2eb9dd6726e66d37606205942' )
682+
683+ @requires_resource ('network' )
684+ def test_all_names (self ):
685+ TESTDATAFILE = "DerivedName.txt"
686+ testdata = download_test_data_file (TESTDATAFILE )
687+
688+ with testdata :
689+ self .run_name_tests (testdata )
690+
691+ def run_name_tests (self , testdata ):
692+ names_ref = {}
693+
694+ def parse_cp (s ):
695+ return int (s , 16 )
696+
697+ # Parse data
698+ for line in testdata :
699+ line = line .strip ()
700+ if not line or line .startswith ("#" ):
701+ continue
702+ raw_cp , name = line .split ("; " )
703+ # Check for a range
704+ if ".." in raw_cp :
705+ cp1 , cp2 = map (parse_cp , raw_cp .split (".." ))
706+ # remove ‘*’ at the end
707+ assert name [- 1 ] == '*' , (raw_cp , name )
708+ name = name [:- 1 ]
709+ for cp in range (cp1 , cp2 + 1 ):
710+ names_ref [cp ] = f"{ name } { cp :04X} "
711+ elif name [- 1 ] == '*' :
712+ cp = parse_cp (raw_cp )
713+ name = name [:- 1 ]
714+ names_ref [cp ] = f"{ name } { cp :04X} "
715+ else :
716+ assert '*' not in name , (raw_cp , name )
717+ cp = parse_cp (raw_cp )
718+ names_ref [cp ] = name
719+
720+ for cp in range (0 , sys .maxunicode + 1 ):
721+ self .assertEqual (self .db .name (chr (cp ), None ), names_ref .get (cp ))
617722
618723 def test_isxidstart (self ):
619724 self .assertTrue (self .db .isxidstart ('S' ))
0 commit comments