Skip to content

Commit a580421

Browse files
committed
More cleanups for unicode.py
1 parent 89feb6d commit a580421

File tree

1 file changed

+23
-25
lines changed

1 file changed

+23
-25
lines changed

src/libcore/unicode/unicode.py

Lines changed: 23 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -28,14 +28,14 @@
2828
# we don't use enum.Enum because of Python 2.7 compatibility
2929
class UnicodeFiles(object):
3030
# ReadMe does not contain any unicode data, we
31-
# use it to extract versions.
31+
# only use it to extract versions.
3232
README = "ReadMe.txt"
3333

3434
DERIVED_CORE_PROPERTIES = "DerivedCoreProperties.txt"
3535
DERIVED_NORMALIZATION_PROPS = "DerivedNormalizationProps.txt"
36-
SPECIAL_CASING = "SpecialCasing.txt"
37-
SCRIPTS = "Scripts.txt"
3836
PROPS = "PropList.txt"
37+
SCRIPTS = "Scripts.txt"
38+
SPECIAL_CASING = "SpecialCasing.txt"
3939
UNICODE_DATA = "UnicodeData.txt"
4040

4141

@@ -66,15 +66,15 @@ class UnicodeFiles(object):
6666
# Mapping taken from Table 12 from:
6767
# http://www.unicode.org/reports/tr44/#General_Category_Values
6868
EXPANDED_CATEGORIES = {
69-
'Lu': ['LC', 'L'], 'Ll': ['LC', 'L'], 'Lt': ['LC', 'L'],
70-
'Lm': ['L'], 'Lo': ['L'],
71-
'Mn': ['M'], 'Mc': ['M'], 'Me': ['M'],
72-
'Nd': ['N'], 'Nl': ['N'], 'No': ['N'],
73-
'Pc': ['P'], 'Pd': ['P'], 'Ps': ['P'], 'Pe': ['P'],
74-
'Pi': ['P'], 'Pf': ['P'], 'Po': ['P'],
75-
'Sm': ['S'], 'Sc': ['S'], 'Sk': ['S'], 'So': ['S'],
76-
'Zs': ['Z'], 'Zl': ['Z'], 'Zp': ['Z'],
77-
'Cc': ['C'], 'Cf': ['C'], 'Cs': ['C'], 'Co': ['C'], 'Cn': ['C'],
69+
"Lu": ["LC", "L"], "Ll": ["LC", "L"], "Lt": ["LC", "L"],
70+
"Lm": ["L"], "Lo": ["L"],
71+
"Mn": ["M"], "Mc": ["M"], "Me": ["M"],
72+
"Nd": ["N"], "Nl": ["N"], "No": ["N"],
73+
"Pc": ["P"], "Pd": ["P"], "Ps": ["P"], "Pe": ["P"],
74+
"Pi": ["P"], "Pf": ["P"], "Po": ["P"],
75+
"Sm": ["S"], "Sc": ["S"], "Sk": ["S"], "So": ["S"],
76+
"Zs": ["Z"], "Zl": ["Z"], "Zp": ["Z"],
77+
"Cc": ["C"], "Cf": ["C"], "Cs": ["C"], "Co": ["C"], "Cn": ["C"],
7878
}
7979

8080
# these are the surrogate codepoints, which are not valid rust characters
@@ -115,7 +115,7 @@ def fetch_files(version=None):
115115
readme_content = subprocess.check_output(("curl", readme_url))
116116

117117
unicode_version = parse_unicode_version(
118-
str(readme_content, "utf8")
118+
readme_content.decode("utf8")
119119
)
120120

121121
download_dir = os.path.join(FETCH_DIR, unicode_version.as_str)
@@ -415,7 +415,7 @@ def compute_trie(rawdata, chunksize):
415415
child_data = []
416416
for i in range(len(rawdata) // chunksize):
417417
data = rawdata[i * chunksize: (i + 1) * chunksize]
418-
child = '|'.join(map(str, data))
418+
child = "|".join(map(str, data))
419419
if child not in childmap:
420420
childmap[child] = len(childmap)
421421
child_data.extend(data)
@@ -444,34 +444,34 @@ def emit_bool_trie(f, name, t_data, is_pub=True):
444444
pub_string = "pub "
445445
f.write(" %sconst %s: &super::BoolTrie = &super::BoolTrie {\n" % (pub_string, name))
446446
f.write(" r1: [\n")
447-
data = ','.join('0x%016x' % chunk for chunk in chunks[0:0x800 // chunk_size])
447+
data = ",".join("0x%016x" % chunk for chunk in chunks[0:0x800 // chunk_size])
448448
format_table_content(f, data, 12)
449449
f.write("\n ],\n")
450450

451451
# 0x800..0x10000 trie
452452
(r2, r3) = compute_trie(chunks[0x800 // chunk_size : 0x10000 // chunk_size], 64 // chunk_size)
453453
f.write(" r2: [\n")
454-
data = ','.join(str(node) for node in r2)
454+
data = ",".join(str(node) for node in r2)
455455
format_table_content(f, data, 12)
456456
f.write("\n ],\n")
457457
f.write(" r3: &[\n")
458-
data = ','.join('0x%016x' % chunk for chunk in r3)
458+
data = ",".join("0x%016x" % chunk for chunk in r3)
459459
format_table_content(f, data, 12)
460460
f.write("\n ],\n")
461461

462462
# 0x10000..0x110000 trie
463463
(mid, r6) = compute_trie(chunks[0x10000 // chunk_size : 0x110000 // chunk_size], 64 // chunk_size)
464464
(r4, r5) = compute_trie(mid, 64)
465465
f.write(" r4: [\n")
466-
data = ','.join(str(node) for node in r4)
466+
data = ",".join(str(node) for node in r4)
467467
format_table_content(f, data, 12)
468468
f.write("\n ],\n")
469469
f.write(" r5: &[\n")
470-
data = ','.join(str(node) for node in r5)
470+
data = ",".join(str(node) for node in r5)
471471
format_table_content(f, data, 12)
472472
f.write("\n ],\n")
473473
f.write(" r6: &[\n")
474-
data = ','.join('0x%016x' % chunk for chunk in r6)
474+
data = ",".join("0x%016x" % chunk for chunk in r6)
475475
format_table_content(f, data, 12)
476476
f.write("\n ],\n")
477477

@@ -497,12 +497,12 @@ def emit_small_bool_trie(f, name, t_data, is_pub=True):
497497
(r1, r2) = compute_trie(chunks, 1)
498498

499499
f.write(" r1: &[\n")
500-
data = ','.join(str(node) for node in r1)
500+
data = ",".join(str(node) for node in r1)
501501
format_table_content(f, data, 12)
502502
f.write("\n ],\n")
503503

504504
f.write(" r2: &[\n")
505-
data = ','.join('0x%016x' % node for node in r2)
505+
data = ",".join("0x%016x" % node for node in r2)
506506
format_table_content(f, data, 12)
507507
f.write("\n ],\n")
508508

@@ -599,11 +599,9 @@ def main():
599599
print("Using Unicode version: {}".format(unicode_version.as_str))
600600

601601
tables_rs_path = os.path.join(THIS_DIR, "tables.rs")
602-
if os.path.exists(tables_rs_path):
603-
os.remove(tables_rs_path)
604602

603+
# will overwrite the file if it exists
605604
with open(tables_rs_path, "w") as rf:
606-
# write the file's preamble
607605
rf.write(PREAMBLE)
608606

609607
unicode_version_notice = textwrap.dedent("""

0 commit comments

Comments
 (0)