28
28
# we don't use enum.Enum because of Python 2.7 compatibility
29
29
class UnicodeFiles (object ):
30
30
# ReadMe does not contain any unicode data, we
31
- # use it to extract versions.
31
+ # only use it to extract versions.
32
32
README = "ReadMe.txt"
33
33
34
34
DERIVED_CORE_PROPERTIES = "DerivedCoreProperties.txt"
35
35
DERIVED_NORMALIZATION_PROPS = "DerivedNormalizationProps.txt"
36
- SPECIAL_CASING = "SpecialCasing.txt"
37
- SCRIPTS = "Scripts.txt"
38
36
PROPS = "PropList.txt"
37
+ SCRIPTS = "Scripts.txt"
38
+ SPECIAL_CASING = "SpecialCasing.txt"
39
39
UNICODE_DATA = "UnicodeData.txt"
40
40
41
41
@@ -66,15 +66,15 @@ class UnicodeFiles(object):
66
66
# Mapping taken from Table 12 from:
67
67
# http://www.unicode.org/reports/tr44/#General_Category_Values
68
68
EXPANDED_CATEGORIES = {
69
- 'Lu' : ['LC' , 'L' ], 'Ll' : ['LC' , 'L' ], 'Lt' : ['LC' , 'L' ],
70
- 'Lm' : ['L' ], 'Lo' : ['L' ],
71
- 'Mn' : ['M' ], 'Mc' : ['M' ], 'Me' : ['M' ],
72
- 'Nd' : ['N' ], 'Nl' : ['N' ], 'No' : ['N' ],
73
- 'Pc' : ['P' ], 'Pd' : ['P' ], 'Ps' : ['P' ], 'Pe' : ['P' ],
74
- 'Pi' : ['P' ], 'Pf' : ['P' ], 'Po' : ['P' ],
75
- 'Sm' : ['S' ], 'Sc' : ['S' ], 'Sk' : ['S' ], 'So' : ['S' ],
76
- 'Zs' : ['Z' ], 'Zl' : ['Z' ], 'Zp' : ['Z' ],
77
- 'Cc' : ['C' ], 'Cf' : ['C' ], 'Cs' : ['C' ], 'Co' : ['C' ], 'Cn' : ['C' ],
69
+ "Lu" : ["LC" , "L" ], "Ll" : ["LC" , "L" ], "Lt" : ["LC" , "L" ],
70
+ "Lm" : ["L" ], "Lo" : ["L" ],
71
+ "Mn" : ["M" ], "Mc" : ["M" ], "Me" : ["M" ],
72
+ "Nd" : ["N" ], "Nl" : ["N" ], "No" : ["N" ],
73
+ "Pc" : ["P" ], "Pd" : ["P" ], "Ps" : ["P" ], "Pe" : ["P" ],
74
+ "Pi" : ["P" ], "Pf" : ["P" ], "Po" : ["P" ],
75
+ "Sm" : ["S" ], "Sc" : ["S" ], "Sk" : ["S" ], "So" : ["S" ],
76
+ "Zs" : ["Z" ], "Zl" : ["Z" ], "Zp" : ["Z" ],
77
+ "Cc" : ["C" ], "Cf" : ["C" ], "Cs" : ["C" ], "Co" : ["C" ], "Cn" : ["C" ],
78
78
}
79
79
80
80
# these are the surrogate codepoints, which are not valid rust characters
@@ -115,7 +115,7 @@ def fetch_files(version=None):
115
115
readme_content = subprocess .check_output (("curl" , readme_url ))
116
116
117
117
unicode_version = parse_unicode_version (
118
- str ( readme_content , "utf8" )
118
+ readme_content . decode ( "utf8" )
119
119
)
120
120
121
121
download_dir = os .path .join (FETCH_DIR , unicode_version .as_str )
@@ -415,7 +415,7 @@ def compute_trie(rawdata, chunksize):
415
415
child_data = []
416
416
for i in range (len (rawdata ) // chunksize ):
417
417
data = rawdata [i * chunksize : (i + 1 ) * chunksize ]
418
- child = '|' .join (map (str , data ))
418
+ child = "|" .join (map (str , data ))
419
419
if child not in childmap :
420
420
childmap [child ] = len (childmap )
421
421
child_data .extend (data )
@@ -444,34 +444,34 @@ def emit_bool_trie(f, name, t_data, is_pub=True):
444
444
pub_string = "pub "
445
445
f .write (" %sconst %s: &super::BoolTrie = &super::BoolTrie {\n " % (pub_string , name ))
446
446
f .write (" r1: [\n " )
447
- data = ',' .join (' 0x%016x' % chunk for chunk in chunks [0 :0x800 // chunk_size ])
447
+ data = "," .join (" 0x%016x" % chunk for chunk in chunks [0 :0x800 // chunk_size ])
448
448
format_table_content (f , data , 12 )
449
449
f .write ("\n ],\n " )
450
450
451
451
# 0x800..0x10000 trie
452
452
(r2 , r3 ) = compute_trie (chunks [0x800 // chunk_size : 0x10000 // chunk_size ], 64 // chunk_size )
453
453
f .write (" r2: [\n " )
454
- data = ',' .join (str (node ) for node in r2 )
454
+ data = "," .join (str (node ) for node in r2 )
455
455
format_table_content (f , data , 12 )
456
456
f .write ("\n ],\n " )
457
457
f .write (" r3: &[\n " )
458
- data = ',' .join (' 0x%016x' % chunk for chunk in r3 )
458
+ data = "," .join (" 0x%016x" % chunk for chunk in r3 )
459
459
format_table_content (f , data , 12 )
460
460
f .write ("\n ],\n " )
461
461
462
462
# 0x10000..0x110000 trie
463
463
(mid , r6 ) = compute_trie (chunks [0x10000 // chunk_size : 0x110000 // chunk_size ], 64 // chunk_size )
464
464
(r4 , r5 ) = compute_trie (mid , 64 )
465
465
f .write (" r4: [\n " )
466
- data = ',' .join (str (node ) for node in r4 )
466
+ data = "," .join (str (node ) for node in r4 )
467
467
format_table_content (f , data , 12 )
468
468
f .write ("\n ],\n " )
469
469
f .write (" r5: &[\n " )
470
- data = ',' .join (str (node ) for node in r5 )
470
+ data = "," .join (str (node ) for node in r5 )
471
471
format_table_content (f , data , 12 )
472
472
f .write ("\n ],\n " )
473
473
f .write (" r6: &[\n " )
474
- data = ',' .join (' 0x%016x' % chunk for chunk in r6 )
474
+ data = "," .join (" 0x%016x" % chunk for chunk in r6 )
475
475
format_table_content (f , data , 12 )
476
476
f .write ("\n ],\n " )
477
477
@@ -497,12 +497,12 @@ def emit_small_bool_trie(f, name, t_data, is_pub=True):
497
497
(r1 , r2 ) = compute_trie (chunks , 1 )
498
498
499
499
f .write (" r1: &[\n " )
500
- data = ',' .join (str (node ) for node in r1 )
500
+ data = "," .join (str (node ) for node in r1 )
501
501
format_table_content (f , data , 12 )
502
502
f .write ("\n ],\n " )
503
503
504
504
f .write (" r2: &[\n " )
505
- data = ',' .join (' 0x%016x' % node for node in r2 )
505
+ data = "," .join (" 0x%016x" % node for node in r2 )
506
506
format_table_content (f , data , 12 )
507
507
f .write ("\n ],\n " )
508
508
@@ -599,11 +599,9 @@ def main():
599
599
print ("Using Unicode version: {}" .format (unicode_version .as_str ))
600
600
601
601
tables_rs_path = os .path .join (THIS_DIR , "tables.rs" )
602
- if os .path .exists (tables_rs_path ):
603
- os .remove (tables_rs_path )
604
602
603
+ # will overwrite the file if it exists
605
604
with open (tables_rs_path , "w" ) as rf :
606
- # write the file's preamble
607
605
rf .write (PREAMBLE )
608
606
609
607
unicode_version_notice = textwrap .dedent ("""
0 commit comments