Skip to content

Commit 4adc062

Browse files
committed
---
yaml --- r: 146904 b: refs/heads/try2 c: dfe38db h: refs/heads/master v: v3
1 parent baed18e commit 4adc062

File tree

6 files changed

+690
-30
lines changed

6 files changed

+690
-30
lines changed

[refs]

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ refs/heads/snap-stage3: 78a7676898d9f80ab540c6df5d4c9ce35bb50463
55
refs/heads/try: 519addf6277dbafccbb4159db4b710c37eaa2ec5
66
refs/tags/release-0.1: 1f5c5126e96c79d22cb7862f75304136e204f105
77
refs/heads/ndm: f3868061cd7988080c30d6d5bf352a5a5fe2460b
8-
refs/heads/try2: c234614950c8349607f08fab754057dfe047ca0c
8+
refs/heads/try2: dfe38dbca4b62132d7512f767bca6ebe6ddfe931
99
refs/heads/dist-snap: ba4081a5a8573875fed17545846f6f6902c8ba8d
1010
refs/tags/release-0.2: c870d2dffb391e14efb05aa27898f1f6333a9596
1111
refs/tags/release-0.3: b5f0d0f648d9a6153664837026ba1be43d3e2503

branches/try2/src/etc/unicode.py

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ def load_unicode_data(f):
4141
continue
4242
[code, name, gencat, combine, bidi,
4343
decomp, deci, digit, num, mirror,
44-
old, iso, upcase, lowcsae, titlecase ] = fields
44+
old, iso, upcase, lowcase, titlecase ] = fields
4545

4646
code = int(code, 16)
4747

@@ -89,11 +89,9 @@ def load_unicode_data(f):
8989

9090
return (canon_decomp, compat_decomp, gencats, combines)
9191

92-
93-
def load_derived_core_properties(f):
92+
def load_properties(f, interestingprops):
9493
fetch(f)
95-
derivedprops = {}
96-
interestingprops = ["XID_Start", "XID_Continue", "Alphabetic"]
94+
props = {}
9795
re1 = re.compile("^([0-9A-F]+) +; (\w+)")
9896
re2 = re.compile("^([0-9A-F]+)\.\.([0-9A-F]+) +; (\w+)")
9997

@@ -118,10 +116,10 @@ def load_derived_core_properties(f):
118116
continue
119117
d_lo = int(d_lo, 16)
120118
d_hi = int(d_hi, 16)
121-
if prop not in derivedprops:
122-
derivedprops[prop] = []
123-
derivedprops[prop].append((d_lo, d_hi))
124-
return derivedprops
119+
if prop not in props:
120+
props[prop] = []
121+
props[prop].append((d_lo, d_hi))
122+
return props
125123

126124
def escape_char(c):
127125
if c <= 0xff:
@@ -376,5 +374,9 @@ def emit_decomp_module(f, canon, compat, combine):
376374

377375
emit_decomp_module(rf, canon_decomp, compat_decomp, combines)
378376

379-
derived = load_derived_core_properties("DerivedCoreProperties.txt")
377+
derived = load_properties("DerivedCoreProperties.txt",
378+
["XID_Start", "XID_Continue", "Alphabetic", "Lowercase", "Uppercase"])
380379
emit_property_module(rf, "derived_property", derived)
380+
381+
props = load_properties("PropList.txt", ["White_Space"])
382+
emit_property_module(rf, "property", props)

branches/try2/src/libstd/char.rs

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ use cast::transmute;
1414
use option::{None, Option, Some};
1515
use iter::{Iterator, range_step};
1616
use str::StrSlice;
17-
use unicode::{derived_property, general_category, decompose};
17+
use unicode::{derived_property, property, general_category, decompose};
1818
use to_str::ToStr;
1919
use str;
2020

@@ -89,30 +89,28 @@ pub fn is_XID_continue(c: char) -> bool { derived_property::XID_Continue(c) }
8989

9090
///
9191
/// Indicates whether a character is in lower case, defined
92-
/// in terms of the Unicode General Category 'Ll'
92+
/// in terms of the Unicode Derived Core Property 'Lowercase'.
9393
///
9494
#[inline]
95-
pub fn is_lowercase(c: char) -> bool { general_category::Ll(c) }
95+
pub fn is_lowercase(c: char) -> bool { derived_property::Lowercase(c) }
9696

9797
///
9898
/// Indicates whether a character is in upper case, defined
99-
/// in terms of the Unicode General Category 'Lu'.
99+
/// in terms of the Unicode Derived Core Property 'Uppercase'.
100100
///
101101
#[inline]
102-
pub fn is_uppercase(c: char) -> bool { general_category::Lu(c) }
102+
pub fn is_uppercase(c: char) -> bool { derived_property::Uppercase(c) }
103103

104104
///
105105
/// Indicates whether a character is whitespace. Whitespace is defined in
106-
/// terms of the Unicode General Categories 'Zs', 'Zl', 'Zp'
107-
/// additional 'Cc'-category control codes in the range [0x09, 0x0d]
106+
/// terms of the Unicode Property 'White_Space'.
108107
///
109108
#[inline]
110109
pub fn is_whitespace(c: char) -> bool {
110+
// As an optimization ASCII whitespace characters are checked separately
111111
c == ' '
112112
|| ('\x09' <= c && c <= '\x0d')
113-
|| general_category::Zs(c)
114-
|| general_category::Zl(c)
115-
|| general_category::Zp(c)
113+
|| property::White_Space(c)
116114
}
117115

118116
///

0 commit comments

Comments
 (0)