Skip to content

Commit 72858d8

Browse files
committed
---
yaml --- r: 154074 b: refs/heads/try2 c: 157459b h: refs/heads/master v: v3
1 parent 3dd06ea commit 72858d8

File tree

38 files changed

+1045
-188
lines changed

38 files changed

+1045
-188
lines changed

[refs]

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ refs/heads/snap-stage3: 78a7676898d9f80ab540c6df5d4c9ce35bb50463
55
refs/heads/try: 519addf6277dbafccbb4159db4b710c37eaa2ec5
66
refs/tags/release-0.1: 1f5c5126e96c79d22cb7862f75304136e204f105
77
refs/heads/ndm: f3868061cd7988080c30d6d5bf352a5a5fe2460b
8-
refs/heads/try2: bf2d98e1900d854b88192a439993161c665881fe
8+
refs/heads/try2: 157459be59da13081737c92c085de3d3fe5bbbea
99
refs/heads/dist-snap: ba4081a5a8573875fed17545846f6f6902c8ba8d
1010
refs/tags/release-0.2: c870d2dffb391e14efb05aa27898f1f6333a9596
1111
refs/tags/release-0.3: b5f0d0f648d9a6153664837026ba1be43d3e2503

branches/try2/.gitignore

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,12 @@
6464
/nd/
6565
/rt/
6666
/rustllvm/
67+
/src/libunicode/DerivedCoreProperties.txt
68+
/src/libunicode/EastAsianWidth.txt
69+
/src/libunicode/HangulSyllableType.txt
70+
/src/libunicode/PropList.txt
71+
/src/libunicode/Scripts.txt
72+
/src/libunicode/UnicodeData.txt
6773
/stage0/
6874
/stage1/
6975
/stage2/

branches/try2/src/doc/guide.md

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1626,10 +1626,6 @@ Check out the generated `Cargo.toml`:
16261626
name = "guessing_game"
16271627
version = "0.1.0"
16281628
authors = ["Your Name <[email protected]>"]
1629-
1630-
[[bin]]
1631-
1632-
name = "guessing_game"
16331629
```
16341630

16351631
Cargo gets this information from your environment. If it's not correct, go ahead
@@ -2569,9 +2565,9 @@ It gives an error:
25692565

25702566
```{notrust,ignore}
25712567
Compiling modules v0.1.0 (file:/home/you/projects/modules)
2572-
src/modules.rs:2:5: 2:23 error: function `print_hello` is private
2573-
src/modules.rs:2 hello::print_hello();
2574-
^~~~~~~~~~~~~~~~~~
2568+
src/main.rs:2:5: 2:23 error: function `print_hello` is private
2569+
src/main.rs:2 hello::print_hello();
2570+
^~~~~~~~~~~~~~~~~~
25752571
```
25762572

25772573
To make it public, we use the `pub` keyword:

branches/try2/src/etc/unicode.py

Lines changed: 33 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -464,13 +464,26 @@ def emit_charwidth_module(f, width_table):
464464
pfun=lambda x: "(%s,%s,%s,%s)" % (escape_char(x[0]), escape_char(x[1]), x[2], x[3]))
465465
f.write("}\n\n")
466466

467-
def emit_norm_module(f, canon, compat, combine):
467+
def emit_norm_module(f, canon, compat, combine, norm_props):
468468
canon_keys = canon.keys()
469469
canon_keys.sort()
470470

471471
compat_keys = compat.keys()
472472
compat_keys.sort()
473473

474+
canon_comp = {}
475+
comp_exclusions = norm_props["Full_Composition_Exclusion"]
476+
for char in canon_keys:
477+
if True in map(lambda (lo, hi): lo <= char <= hi, comp_exclusions):
478+
continue
479+
decomp = canon[char]
480+
if len(decomp) == 2:
481+
if not canon_comp.has_key(decomp[0]):
482+
canon_comp[decomp[0]] = []
483+
canon_comp[decomp[0]].append( (decomp[1], char) )
484+
canon_comp_keys = canon_comp.keys()
485+
canon_comp_keys.sort()
486+
474487
f.write("pub mod normalization {\n")
475488

476489
def mkdata_fun(table):
@@ -494,6 +507,22 @@ def f(char):
494507
emit_table(f, "compatibility_table", compat_keys, "&'static [(char, &'static [char])]",
495508
pfun=mkdata_fun(compat))
496509

510+
def comp_pfun(char):
511+
data = "(%s,&[" % escape_char(char)
512+
canon_comp[char].sort(lambda x, y: x[0] - y[0])
513+
first = True
514+
for pair in canon_comp[char]:
515+
if not first:
516+
data += ","
517+
first = False
518+
data += "(%s,%s)" % (escape_char(pair[0]), escape_char(pair[1]))
519+
data += "])"
520+
return data
521+
522+
f.write(" // Canonical compositions\n")
523+
emit_table(f, "composition_table", canon_comp_keys,
524+
"&'static [(char, &'static [(char, char)])]", pfun=comp_pfun)
525+
497526
f.write("""
498527
fn bsearch_range_value_table(c: char, r: &'static [(char, char, u8)]) -> u8 {
499528
use core::option::{Some, None};
@@ -579,6 +608,8 @@ def optimize_width_table(wtable):
579608
scripts = load_properties("Scripts.txt", [])
580609
props = load_properties("PropList.txt",
581610
["White_Space", "Join_Control", "Noncharacter_Code_Point"])
611+
norm_props = load_properties("DerivedNormalizationProps.txt",
612+
["Full_Composition_Exclusion"])
582613

583614
# grapheme cluster category from DerivedCoreProperties
584615
# the rest are defined below
@@ -612,7 +643,7 @@ def optimize_width_table(wtable):
612643
emit_regex_module(rf, allcats, perl_words)
613644

614645
# normalizations and conversions module
615-
emit_norm_module(rf, canon_decomp, compat_decomp, combines)
646+
emit_norm_module(rf, canon_decomp, compat_decomp, combines, norm_props)
616647
emit_conversions_module(rf, lowerupper, upperlower)
617648

618649
### character width module

branches/try2/src/liballoc/heap.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,9 @@ mod imp {
136136
use libc::{c_char, c_int, c_void, size_t};
137137

138138
#[link(name = "jemalloc", kind = "static")]
139+
#[cfg(not(test))]
140+
extern {}
141+
139142
extern {
140143
fn je_mallocx(size: size_t, flags: c_int) -> *mut c_void;
141144
fn je_rallocx(ptr: *mut c_void, size: size_t,

branches/try2/src/libcollections/str.rs

Lines changed: 198 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -77,8 +77,9 @@ use core::cmp;
7777
use core::iter::AdditiveIterator;
7878
use core::mem;
7979

80-
use {Collection, MutableSeq};
80+
use {Collection, Deque, MutableSeq};
8181
use hash;
82+
use ringbuf::RingBuf;
8283
use string::String;
8384
use unicode;
8485
use vec::Vec;
@@ -302,6 +303,106 @@ impl<'a> Iterator<char> for Decompositions<'a> {
302303
}
303304
}
304305

306+
#[deriving(Clone)]
307+
enum RecompositionState {
308+
Composing,
309+
Purging,
310+
Finished
311+
}
312+
313+
/// External iterator for a string's recomposition's characters.
314+
/// Use with the `std::iter` module.
315+
#[deriving(Clone)]
316+
pub struct Recompositions<'a> {
317+
iter: Decompositions<'a>,
318+
state: RecompositionState,
319+
buffer: RingBuf<char>,
320+
composee: Option<char>,
321+
last_ccc: Option<u8>
322+
}
323+
324+
impl<'a> Iterator<char> for Recompositions<'a> {
325+
#[inline]
326+
fn next(&mut self) -> Option<char> {
327+
loop {
328+
match self.state {
329+
Composing => {
330+
for ch in self.iter {
331+
let ch_class = unicode::char::canonical_combining_class(ch);
332+
if self.composee.is_none() {
333+
if ch_class != 0 {
334+
return Some(ch);
335+
}
336+
self.composee = Some(ch);
337+
continue;
338+
}
339+
let k = self.composee.clone().unwrap();
340+
341+
match self.last_ccc {
342+
None => {
343+
match unicode::char::compose(k, ch) {
344+
Some(r) => {
345+
self.composee = Some(r);
346+
continue;
347+
}
348+
None => {
349+
if ch_class == 0 {
350+
self.composee = Some(ch);
351+
return Some(k);
352+
}
353+
self.buffer.push(ch);
354+
self.last_ccc = Some(ch_class);
355+
}
356+
}
357+
}
358+
Some(l_class) => {
359+
if l_class >= ch_class {
360+
// `ch` is blocked from `composee`
361+
if ch_class == 0 {
362+
self.composee = Some(ch);
363+
self.last_ccc = None;
364+
self.state = Purging;
365+
return Some(k);
366+
}
367+
self.buffer.push(ch);
368+
self.last_ccc = Some(ch_class);
369+
continue;
370+
}
371+
match unicode::char::compose(k, ch) {
372+
Some(r) => {
373+
self.composee = Some(r);
374+
continue;
375+
}
376+
None => {
377+
self.buffer.push(ch);
378+
self.last_ccc = Some(ch_class);
379+
}
380+
}
381+
}
382+
}
383+
}
384+
self.state = Finished;
385+
if self.composee.is_some() {
386+
return self.composee.take();
387+
}
388+
}
389+
Purging => {
390+
match self.buffer.pop_front() {
391+
None => self.state = Composing,
392+
s => return s
393+
}
394+
}
395+
Finished => {
396+
match self.buffer.pop_front() {
397+
None => return self.composee.take(),
398+
s => return s
399+
}
400+
}
401+
}
402+
}
403+
}
404+
}
405+
305406
/// Replace all occurrences of one string with another
306407
///
307408
/// # Arguments
@@ -744,6 +845,32 @@ pub trait StrAllocating: Str {
744845
kind: Compatible
745846
}
746847
}
848+
849+
/// An Iterator over the string in Unicode Normalization Form C
850+
/// (canonical decomposition followed by canonical composition).
851+
#[inline]
852+
fn nfc_chars<'a>(&'a self) -> Recompositions<'a> {
853+
Recompositions {
854+
iter: self.nfd_chars(),
855+
state: Composing,
856+
buffer: RingBuf::new(),
857+
composee: None,
858+
last_ccc: None
859+
}
860+
}
861+
862+
/// An Iterator over the string in Unicode Normalization Form KC
863+
/// (compatibility decomposition followed by canonical composition).
864+
#[inline]
865+
fn nfkc_chars<'a>(&'a self) -> Recompositions<'a> {
866+
Recompositions {
867+
iter: self.nfkd_chars(),
868+
state: Composing,
869+
buffer: RingBuf::new(),
870+
composee: None,
871+
last_ccc: None
872+
}
873+
}
747874
}
748875

749876
impl<'a> StrAllocating for &'a str {
@@ -1754,39 +1881,80 @@ mod tests {
17541881

17551882
#[test]
17561883
fn test_nfd_chars() {
1757-
assert_eq!("abc".nfd_chars().collect::<String>(), String::from_str("abc"));
1758-
assert_eq!("\u1e0b\u01c4".nfd_chars().collect::<String>(),
1759-
String::from_str("d\u0307\u01c4"));
1760-
assert_eq!("\u2026".nfd_chars().collect::<String>(), String::from_str("\u2026"));
1761-
assert_eq!("\u2126".nfd_chars().collect::<String>(), String::from_str("\u03a9"));
1762-
assert_eq!("\u1e0b\u0323".nfd_chars().collect::<String>(),
1763-
String::from_str("d\u0323\u0307"));
1764-
assert_eq!("\u1e0d\u0307".nfd_chars().collect::<String>(),
1765-
String::from_str("d\u0323\u0307"));
1766-
assert_eq!("a\u0301".nfd_chars().collect::<String>(), String::from_str("a\u0301"));
1767-
assert_eq!("\u0301a".nfd_chars().collect::<String>(), String::from_str("\u0301a"));
1768-
assert_eq!("\ud4db".nfd_chars().collect::<String>(),
1769-
String::from_str("\u1111\u1171\u11b6"));
1770-
assert_eq!("\uac1c".nfd_chars().collect::<String>(), String::from_str("\u1100\u1162"));
1884+
macro_rules! t {
1885+
($input: expr, $expected: expr) => {
1886+
assert_eq!($input.nfd_chars().collect::<String>(), $expected.into_string());
1887+
}
1888+
}
1889+
t!("abc", "abc");
1890+
t!("\u1e0b\u01c4", "d\u0307\u01c4");
1891+
t!("\u2026", "\u2026");
1892+
t!("\u2126", "\u03a9");
1893+
t!("\u1e0b\u0323", "d\u0323\u0307");
1894+
t!("\u1e0d\u0307", "d\u0323\u0307");
1895+
t!("a\u0301", "a\u0301");
1896+
t!("\u0301a", "\u0301a");
1897+
t!("\ud4db", "\u1111\u1171\u11b6");
1898+
t!("\uac1c", "\u1100\u1162");
17711899
}
17721900

17731901
#[test]
17741902
fn test_nfkd_chars() {
1775-
assert_eq!("abc".nfkd_chars().collect::<String>(), String::from_str("abc"));
1776-
assert_eq!("\u1e0b\u01c4".nfkd_chars().collect::<String>(),
1777-
String::from_str("d\u0307DZ\u030c"));
1778-
assert_eq!("\u2026".nfkd_chars().collect::<String>(), String::from_str("..."));
1779-
assert_eq!("\u2126".nfkd_chars().collect::<String>(), String::from_str("\u03a9"));
1780-
assert_eq!("\u1e0b\u0323".nfkd_chars().collect::<String>(),
1781-
String::from_str("d\u0323\u0307"));
1782-
assert_eq!("\u1e0d\u0307".nfkd_chars().collect::<String>(),
1783-
String::from_str("d\u0323\u0307"));
1784-
assert_eq!("a\u0301".nfkd_chars().collect::<String>(), String::from_str("a\u0301"));
1785-
assert_eq!("\u0301a".nfkd_chars().collect::<String>(),
1786-
String::from_str("\u0301a"));
1787-
assert_eq!("\ud4db".nfkd_chars().collect::<String>(),
1788-
String::from_str("\u1111\u1171\u11b6"));
1789-
assert_eq!("\uac1c".nfkd_chars().collect::<String>(), String::from_str("\u1100\u1162"));
1903+
macro_rules! t {
1904+
($input: expr, $expected: expr) => {
1905+
assert_eq!($input.nfkd_chars().collect::<String>(), $expected.into_string());
1906+
}
1907+
}
1908+
t!("abc", "abc");
1909+
t!("\u1e0b\u01c4", "d\u0307DZ\u030c");
1910+
t!("\u2026", "...");
1911+
t!("\u2126", "\u03a9");
1912+
t!("\u1e0b\u0323", "d\u0323\u0307");
1913+
t!("\u1e0d\u0307", "d\u0323\u0307");
1914+
t!("a\u0301", "a\u0301");
1915+
t!("\u0301a", "\u0301a");
1916+
t!("\ud4db", "\u1111\u1171\u11b6");
1917+
t!("\uac1c", "\u1100\u1162");
1918+
}
1919+
1920+
#[test]
1921+
fn test_nfc_chars() {
1922+
macro_rules! t {
1923+
($input: expr, $expected: expr) => {
1924+
assert_eq!($input.nfc_chars().collect::<String>(), $expected.into_string());
1925+
}
1926+
}
1927+
t!("abc", "abc");
1928+
t!("\u1e0b\u01c4", "\u1e0b\u01c4");
1929+
t!("\u2026", "\u2026");
1930+
t!("\u2126", "\u03a9");
1931+
t!("\u1e0b\u0323", "\u1e0d\u0307");
1932+
t!("\u1e0d\u0307", "\u1e0d\u0307");
1933+
t!("a\u0301", "\xe1");
1934+
t!("\u0301a", "\u0301a");
1935+
t!("\ud4db", "\ud4db");
1936+
t!("\uac1c", "\uac1c");
1937+
t!("a\u0300\u0305\u0315\u05aeb", "\xe0\u05ae\u0305\u0315b");
1938+
}
1939+
1940+
#[test]
1941+
fn test_nfkc_chars() {
1942+
macro_rules! t {
1943+
($input: expr, $expected: expr) => {
1944+
assert_eq!($input.nfkc_chars().collect::<String>(), $expected.into_string());
1945+
}
1946+
}
1947+
t!("abc", "abc");
1948+
t!("\u1e0b\u01c4", "\u1e0bD\u017d");
1949+
t!("\u2026", "...");
1950+
t!("\u2126", "\u03a9");
1951+
t!("\u1e0b\u0323", "\u1e0d\u0307");
1952+
t!("\u1e0d\u0307", "\u1e0d\u0307");
1953+
t!("a\u0301", "\xe1");
1954+
t!("\u0301a", "\u0301a");
1955+
t!("\ud4db", "\ud4db");
1956+
t!("\uac1c", "\uac1c");
1957+
t!("a\u0300\u0305\u0315\u05aeb", "\xe0\u05ae\u0305\u0315b");
17901958
}
17911959

17921960
#[test]

branches/try2/src/libcore/fmt/mod.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -127,9 +127,9 @@ impl<'a> Arguments<'a> {
127127
/// to prevent modification.
128128
///
129129
/// The `format_args!` macro will safely create an instance of this structure
130-
/// and pass it to a user-supplied function. The macro validates the format
131-
/// string at compile-time so usage of the `write` and `format` functions can
132-
/// be safely performed.
130+
/// and pass it to a function or closure, passed as the first argument. The
131+
/// macro validates the format string at compile-time so usage of the `write`
132+
/// and `format` functions can be safely performed.
133133
pub struct Arguments<'a> {
134134
fmt: &'a [rt::Piece<'a>],
135135
args: &'a [Argument<'a>],

0 commit comments

Comments
 (0)