Skip to content

Commit 6a664e5

Browse files
Review
1 parent a51525d commit 6a664e5

File tree

1 file changed

+26
-26
lines changed

1 file changed

+26
-26
lines changed

Tools/unicode/makeunicodedata.py

Lines changed: 26 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -198,11 +198,11 @@ def makeunicodedata(unicode, trace):
198198
eastasianwidth = EASTASIANWIDTH_NAMES.index(unicode.widths[char] or 'N')
199199
graphemebreak = GRAPHEME_CLUSTER_NAMES.index(unicode.grapheme_breaks[char] or 'Other')
200200
extpict = unicode.ext_picts[char]
201+
bidirectional = BIDIRECTIONAL_NAMES.index(unicode.bidi_classes[char])
201202
if record:
202203
# extract database properties
203204
category = CATEGORY_NAMES.index(record.general_category)
204205
combining = int(record.canonical_combining_class)
205-
bidirectional = BIDIRECTIONAL_NAMES.index(unicode.bidi_classes[char])
206206
mirrored = record.bidi_mirrored == "Y"
207207
normalizationquickcheck = record.quick_check
208208
incb = INDIC_CONJUNCT_BREAK_NAMES.index(record.incb)
@@ -211,7 +211,6 @@ def makeunicodedata(unicode, trace):
211211
normalizationquickcheck, graphemebreak, incb, extpict,
212212
)
213213
else:
214-
bidirectional = BIDIRECTIONAL_NAMES.index(unicode.bidi_classes[char])
215214
if eastasianwidth or graphemebreak or extpict or bidirectional:
216215
item = (0, 0, bidirectional, 0, eastasianwidth,
217216
0, graphemebreak, 0, extpict)
@@ -815,48 +814,51 @@ def merge_old_version(version, new, old):
815814
elif k == 2:
816815
category_changes[i] = CATEGORY_NAMES.index(value)
817816
elif k == 4:
817+
# bidi_class changes handled via bidi_classes
818+
pass
819+
elif k == 5:
818820
# We assume that all normalization changes are in 1:1 mappings
819821
assert " " not in value
820822
normalization_changes.append((i, value))
821-
elif k == 5:
823+
elif k == 6:
822824
# we only support changes where the old value is a single digit
823825
assert value in "0123456789"
824826
decimal_changes[i] = int(value)
825-
elif k == 7:
827+
elif k == 8:
826828
# Since 0 encodes "no change", the old value is better not 0
827829
if not value:
828830
numeric_changes[i] = -1
829831
else:
830832
numeric_changes[i] = float(value)
831833
assert numeric_changes[i] not in (0, -1)
832-
elif k == 8:
834+
elif k == 9:
833835
if value == 'Y':
834836
mirrored_changes[i] = '1'
835837
else:
836838
mirrored_changes[i] = '0'
837-
elif k == 10:
839+
elif k == 11:
838840
# change to ISO comment, ignore
839841
pass
840-
elif k == 11:
842+
elif k == 12:
841843
# change to simple uppercase mapping; ignore
842844
pass
843-
elif k == 12:
845+
elif k == 13:
844846
# change to simple lowercase mapping; ignore
845847
pass
846-
elif k == 13:
848+
elif k == 14:
847849
# change to simple titlecase mapping; ignore
848850
pass
849-
elif k == 14:
851+
elif k == 15:
850852
# change to east asian width
851853
east_asian_width_changes[i] = EASTASIANWIDTH_NAMES.index(value)
852-
elif k == 15:
854+
elif k == 16:
853855
# derived property changes; not yet
854856
pass
855-
elif k == 16:
857+
elif k == 17:
856858
# normalization quickchecks are not performed
857859
# for older versions
858860
pass
859-
elif k == 17:
861+
elif k == 18:
860862
# The Indic_Conjunct_Break property did not exist for
861863
# older versions
862864
pass
@@ -943,7 +945,7 @@ class UcdRecord:
943945
name: str
944946
general_category: str
945947
canonical_combining_class: str
946-
# UnicodeData.bidi_classes
948+
bidi_class: str
947949
decomposition_type: str
948950
decomposition_mapping: str
949951
numeric_type: str
@@ -975,7 +977,7 @@ class UcdRecord:
975977

976978

977979
def from_row(row: List[str]) -> UcdRecord:
978-
return UcdRecord(*row[:4], *row[5:], None, set(), 0, "None")
980+
return UcdRecord(*row, None, set(), 0, "None")
979981

980982

981983
# --------------------------------------------------------------------
@@ -990,17 +992,14 @@ class UnicodeData:
990992
def __init__(self, version, ideograph_check=True):
991993
self.changed = []
992994
table = [None] * 0x110000
993-
bidi_classes = [None] * 0x110000
994995
for s in UcdFile(UNICODE_DATA, version):
995996
char = int(s[0], 16)
996-
bidi_classes[char] = s[4]
997997
table[char] = from_row(s)
998998

999999
self.derived_name_ranges = []
10001000

10011001
# expand first-last ranges
10021002
field = None
1003-
bidi_val = None
10041003
for i in range(0, 0x110000):
10051004
# The file UnicodeData.txt has its own distinct way of
10061005
# expressing ranges. See:
@@ -1009,8 +1008,7 @@ def __init__(self, version, ideograph_check=True):
10091008
if s:
10101009
if s.name[-6:] == "First>":
10111010
s.name = ""
1012-
field = dataclasses.astuple(s)[:14]
1013-
bidi_val = bidi_classes[i]
1011+
field = dataclasses.astuple(s)[:15]
10141012
elif s.name[-5:] == "Last>":
10151013
for j, (rangename, _) in enumerate(derived_name_range_names):
10161014
if s.name.startswith("<" + rangename):
@@ -1020,8 +1018,7 @@ def __init__(self, version, ideograph_check=True):
10201018
s.name = ""
10211019
field = None
10221020
elif field:
1023-
bidi_classes[i] = bidi_val
1024-
table[i] = UcdRecord('%X' % i, *field[1:], None, set(), 0, "None")
1021+
table[i] = from_row(('%X' % i,) + field[1:])
10251022

10261023
# public attributes
10271024
self.filename = UNICODE_DATA % ''
@@ -1079,18 +1076,21 @@ def __init__(self, version, ideograph_check=True):
10791076

10801077
# Read DerivedBidiClass.txt for bidi classes
10811078
# see https://www.unicode.org/reports/tr44/#Missing_Conventions
1079+
bidi_classes = [None] * 0x110000
1080+
for i in range(0, 0x110000):
1081+
if table[i] is not None:
1082+
bidi_classes[i] = table[i].bidi_class
10821083
if version != '3.2.0':
10831084
missing_re = re.compile(
1084-
r'# @missing: ([\dA-F]+)\.\.([\dA-F]+); (\w+)'
1085+
r'# @missing: ([\dA-F]+\.\.[\dA-F]+); (\w+)'
10851086
)
10861087
with open_data(DERIVED_BIDI_CLASS, version) as f:
10871088
for l in f:
10881089
m = missing_re.match(l)
10891090
if not m:
10901091
continue
1091-
start, end = int(m[1], 16), int(m[2], 16)
1092-
name = BIDI_LONG_NAMES[m[3]]
1093-
for i in range(start, end + 1):
1092+
name = BIDI_LONG_NAMES[m[2]]
1093+
for i in expand_range(m[1]):
10941094
bidi_classes[i] = name
10951095
for char, (bidi,) in UcdFile(DERIVED_BIDI_CLASS, version).expanded():
10961096
bidi_classes[char] = bidi

0 commit comments

Comments
 (0)