Skip to content

Commit ccb1f86

Browse files
committed
---
yaml --- r: 144252 b: refs/heads/try2 c: 4e3dbf9 h: refs/heads/master v: v3
1 parent 94160fb commit ccb1f86

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

67 files changed

+4198
-744
lines changed

[refs]

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ refs/heads/snap-stage3: 78a7676898d9f80ab540c6df5d4c9ce35bb50463
55
refs/heads/try: 519addf6277dbafccbb4159db4b710c37eaa2ec5
66
refs/tags/release-0.1: 1f5c5126e96c79d22cb7862f75304136e204f105
77
refs/heads/ndm: f3868061cd7988080c30d6d5bf352a5a5fe2460b
8-
refs/heads/try2: ffb6404c5ade3af113738c3cb72fd853e82a379e
8+
refs/heads/try2: 4e3dbf959a5124481d1e3ec9b0b30f48ac6dd4f0
99
refs/heads/dist-snap: ba4081a5a8573875fed17545846f6f6902c8ba8d
1010
refs/tags/release-0.2: c870d2dffb391e14efb05aa27898f1f6333a9596
1111
refs/tags/release-0.3: b5f0d0f648d9a6153664837026ba1be43d3e2503

branches/try2/src/etc/unicode.py

Lines changed: 150 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -26,11 +26,15 @@ def fetch(f):
2626
def load_unicode_data(f):
2727
fetch(f)
2828
gencats = {}
29+
combines = []
2930
canon_decomp = {}
3031
compat_decomp = {}
3132
curr_cat = ""
33+
curr_combine = ""
3234
c_lo = 0
3335
c_hi = 0
36+
com_lo = 0
37+
com_hi = 0
3438
for line in fileinput.input(f):
3539
fields = line.split(";")
3640
if len(fields) != 15:
@@ -69,7 +73,21 @@ def load_unicode_data(f):
6973
c_lo = code
7074
c_hi = code
7175

72-
return (canon_decomp, compat_decomp, gencats)
76+
if curr_combine == "":
77+
curr_combine = combine
78+
com_lo = code
79+
com_hi = code
80+
81+
if curr_combine == combine:
82+
com_hi = code
83+
else:
84+
if curr_combine != "0":
85+
combines.append((com_lo, com_hi, curr_combine))
86+
curr_combine = combine
87+
com_lo = code
88+
com_hi = code
89+
90+
return (canon_decomp, compat_decomp, gencats, combines)
7391

7492

7593
def load_derived_core_properties(f):
@@ -178,50 +196,149 @@ def emit_property_module_old(f, mod, tbl):
178196
f.write(" }\n\n")
179197
f.write("}\n")
180198

181-
def emit_decomp_module(f, canon, compat):
199+
def format_table_content(f, content, indent):
200+
line = " "*indent
201+
first = True
202+
for chunk in content.split(","):
203+
if len(line) + len(chunk) < 98:
204+
if first:
205+
line += chunk
206+
else:
207+
line += ", " + chunk
208+
first = False
209+
else:
210+
f.write(line + ",\n")
211+
line = " "*indent + chunk
212+
f.write(line)
213+
214+
def emit_decomp_module(f, canon, compat, combine):
182215
canon_keys = canon.keys()
183216
canon_keys.sort()
184217

185218
compat_keys = compat.keys()
186219
compat_keys.sort()
187-
f.write("mod decompose {\n\n");
188-
f.write(" export canonical, compatibility;\n\n")
189-
f.write(" fn canonical(c: char, i: block(char)) "
190-
+ "{ d(c, i, false); }\n\n")
191-
f.write(" fn compatibility(c: char, i: block(char)) "
192-
+"{ d(c, i, true); }\n\n")
193-
f.write(" fn d(c: char, i: block(char), k: bool) {\n")
220+
f.write("pub mod decompose {\n");
221+
f.write(" use option::Option;\n");
222+
f.write(" use option::{Some, None};\n");
223+
f.write(" use vec::ImmutableVector;\n");
224+
f.write("""
225+
fn bsearch_table(c: char, r: &'static [(char, &'static [char])]) -> Option<&'static [char]> {
226+
use cmp::{Equal, Less, Greater};
227+
match r.bsearch(|&(val, _)| {
228+
if c == val { Equal }
229+
else if val < c { Less }
230+
else { Greater }
231+
}) {
232+
Some(idx) => {
233+
let (_, result) = r[idx];
234+
Some(result)
235+
}
236+
None => None
237+
}
238+
}\n
239+
""")
194240

195-
f.write(" if c <= '\\x7f' { i(c); ret; }\n")
241+
f.write("""
242+
fn bsearch_range_value_table(c: char, r: &'static [(char, char, u8)]) -> u8 {
243+
use cmp::{Equal, Less, Greater};
244+
match r.bsearch(|&(lo, hi, _)| {
245+
if lo <= c && c <= hi { Equal }
246+
else if hi < c { Less }
247+
else { Greater }
248+
}) {
249+
Some(idx) => {
250+
let (_, _, result) = r[idx];
251+
result
252+
}
253+
None => 0
254+
}
255+
}\n\n
256+
""")
196257

197-
# First check the canonical decompositions
198-
f.write(" // Canonical decomposition\n")
199-
f.write(" alt c {\n")
258+
f.write(" // Canonical decompositions\n")
259+
f.write(" static canonical_table : &'static [(char, &'static [char])] = &[\n")
260+
data = ""
261+
first = True
200262
for char in canon_keys:
201-
f.write(" %s {\n" % escape_char(char))
263+
if not first:
264+
data += ","
265+
first = False
266+
data += "(%s,&[" % escape_char(char)
267+
first2 = True
202268
for d in canon[char]:
203-
f.write(" d(%s, i, k);\n"
204-
% escape_char(d))
205-
f.write(" }\n")
269+
if not first2:
270+
data += ","
271+
first2 = False
272+
data += escape_char(d)
273+
data += "])"
274+
format_table_content(f, data, 8)
275+
f.write("\n ];\n\n")
276+
277+
f.write(" // Compatibility decompositions\n")
278+
f.write(" static compatibility_table : &'static [(char, &'static [char])] = &[\n")
279+
data = ""
280+
first = True
281+
for char in compat_keys:
282+
if not first:
283+
data += ","
284+
first = False
285+
data += "(%s,&[" % escape_char(char)
286+
first2 = True
287+
for d in compat[char]:
288+
if not first2:
289+
data += ","
290+
first2 = False
291+
data += escape_char(d)
292+
data += "])"
293+
format_table_content(f, data, 8)
294+
f.write("\n ];\n\n")
295+
296+
f.write(" static combining_class_table : &'static [(char, char, u8)] = &[\n")
297+
ix = 0
298+
for pair in combine:
299+
f.write(ch_prefix(ix))
300+
f.write("(%s, %s, %s)" % (escape_char(pair[0]), escape_char(pair[1]), pair[2]))
301+
ix += 1
302+
f.write("\n ];\n")
303+
304+
f.write(" pub fn canonical(c: char, i: &fn(char)) "
305+
+ "{ d(c, i, false); }\n\n")
306+
f.write(" pub fn compatibility(c: char, i: &fn(char)) "
307+
+"{ d(c, i, true); }\n\n")
308+
f.write(" pub fn canonical_combining_class(c: char) -> u8 {\n"
309+
+ " bsearch_range_value_table(c, combining_class_table)\n"
310+
+ " }\n\n")
311+
f.write(" fn d(c: char, i: &fn(char), k: bool) {\n")
312+
f.write(" use iterator::Iterator;\n");
206313

207-
f.write(" _ { }\n")
208-
f.write(" }\n\n")
314+
f.write(" if c <= '\\x7f' { i(c); return; }\n")
315+
316+
# First check the canonical decompositions
317+
f.write("""
318+
match bsearch_table(c, canonical_table) {
319+
Some(canon) => {
320+
for x in canon.iter() {
321+
d(*x, |b| i(b), k);
322+
}
323+
return;
324+
}
325+
None => ()
326+
}\n\n""")
209327

210328
# Bottom out if we're not doing compat.
211-
f.write(" if !k { i(c); ret; }\n\n ")
329+
f.write(" if !k { i(c); return; }\n")
212330

213331
# Then check the compatibility decompositions
214-
f.write(" // Compatibility decomposition\n")
215-
f.write(" alt c {\n")
216-
for char in compat_keys:
217-
f.write(" %s {\n" % escape_char(char))
218-
for d in compat[char]:
219-
f.write(" d(%s, i, k);\n"
220-
% escape_char(d))
221-
f.write(" }\n")
222-
223-
f.write(" _ { }\n")
224-
f.write(" }\n\n")
332+
f.write("""
333+
match bsearch_table(c, compatibility_table) {
334+
Some(compat) => {
335+
for x in compat.iter() {
336+
d(*x, |b| i(b), k);
337+
}
338+
return;
339+
}
340+
None => ()
341+
}\n\n""")
225342

226343
# Finally bottom out.
227344
f.write(" i(c);\n")
@@ -234,7 +351,7 @@ def emit_decomp_module(f, canon, compat):
234351
os.remove(i);
235352
rf = open(r, "w")
236353

237-
(canon_decomp, compat_decomp, gencats) = load_unicode_data("UnicodeData.txt")
354+
(canon_decomp, compat_decomp, gencats, combines) = load_unicode_data("UnicodeData.txt")
238355

239356
# Preamble
240357
rf.write('''// Copyright 2012-2013 The Rust Project Developers. See the COPYRIGHT
@@ -256,7 +373,7 @@ def emit_decomp_module(f, canon, compat):
256373

257374
emit_property_module(rf, "general_category", gencats)
258375

259-
#emit_decomp_module(rf, canon_decomp, compat_decomp)
376+
emit_decomp_module(rf, canon_decomp, compat_decomp, combines)
260377

261378
derived = load_derived_core_properties("DerivedCoreProperties.txt")
262379
emit_property_module(rf, "derived_property", derived)

branches/try2/src/libextra/fileinput.rs

Lines changed: 25 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ reset once it has been finished, so attempting to iterate on `[None,
2727
None]` will only take input once unless `io::stdin().seek(0, SeekSet)`
2828
is called between.
2929
30-
The `pathify` function handles converting a list of file paths as
30+
The `make_path_option_vec` function handles converting a list of file paths as
3131
strings to the appropriate format, including the (optional) conversion
3232
of `"-"` to `stdin`.
3333
@@ -42,7 +42,7 @@ to handle any `FileInput` structs. E.g. a simple `cat` program
4242
4343
or a program that numbers lines after concatenating two files
4444
45-
for input_vec_state(pathify([~"a.txt", ~"b.txt"])) |line, state| {
45+
for input_vec_state(make_path_option_vec([~"a.txt", ~"b.txt"])) |line, state| {
4646
io::println(fmt!("%u: %s", state.line_num,
4747
line));
4848
}
@@ -145,8 +145,14 @@ struct FileInput_ {
145145
previous_was_newline: bool
146146
}
147147

148-
// XXX: remove this when Reader has &mut self. Should be removable via
149-
// "self.fi." -> "self." and renaming FileInput_. Documentation above
148+
149+
// FIXME #5723: remove this when Reader has &mut self.
150+
// Removing it would mean giving read_byte in the Reader impl for
151+
// FileInput &mut self, which in turn means giving most of the
152+
// io::Reader trait methods &mut self. That can't be done right now
153+
// because of io::with_bytes_reader and #5723.
154+
// Should be removable via
155+
// "self.fi" -> "self." and renaming FileInput_. Documentation above
150156
// will likely have to be updated to use `let mut in = ...`.
151157
pub struct FileInput {
152158
fi: @mut FileInput_
@@ -194,7 +200,7 @@ impl FileInput {
194200
*/
195201
pub fn from_args() -> FileInput {
196202
let args = os::args();
197-
let pathed = pathify(args.tail(), true);
203+
let pathed = make_path_option_vec(args.tail(), true);
198204
FileInput::from_vec(pathed)
199205
}
200206

@@ -351,8 +357,7 @@ Convert a list of strings to an appropriate form for a `FileInput`
351357
instance. `stdin_hyphen` controls whether `-` represents `stdin` or
352358
a literal `-`.
353359
*/
354-
// XXX: stupid, unclear name
355-
pub fn pathify(vec: &[~str], stdin_hyphen : bool) -> ~[Option<Path>] {
360+
pub fn make_path_option_vec(vec: &[~str], stdin_hyphen : bool) -> ~[Option<Path>] {
356361
vec.iter().map(|str| {
357362
if stdin_hyphen && "-" == *str {
358363
None
@@ -410,7 +415,7 @@ pub fn input_vec_state(files: ~[Option<Path>],
410415
#[cfg(test)]
411416
mod test {
412417

413-
use super::{FileInput, pathify, input_vec, input_vec_state};
418+
use super::{FileInput, make_path_option_vec, input_vec, input_vec_state};
414419

415420
use std::io;
416421
use std::uint;
@@ -426,22 +431,22 @@ mod test {
426431
}
427432

428433
#[test]
429-
fn test_pathify() {
434+
fn test_make_path_option_vec() {
430435
let strs = [~"some/path",
431436
~"some/other/path"];
432437
let paths = ~[Some(Path("some/path")),
433438
Some(Path("some/other/path"))];
434439

435-
assert_eq!(pathify(strs, true), paths.clone());
436-
assert_eq!(pathify(strs, false), paths);
440+
assert_eq!(make_path_option_vec(strs, true), paths.clone());
441+
assert_eq!(make_path_option_vec(strs, false), paths);
437442

438-
assert_eq!(pathify([~"-"], true), ~[None]);
439-
assert_eq!(pathify([~"-"], false), ~[Some(Path("-"))]);
443+
assert_eq!(make_path_option_vec([~"-"], true), ~[None]);
444+
assert_eq!(make_path_option_vec([~"-"], false), ~[Some(Path("-"))]);
440445
}
441446
442447
#[test]
443448
fn test_fileinput_read_byte() {
444-
let filenames = pathify(vec::from_fn(
449+
let filenames = make_path_option_vec(vec::from_fn(
445450
3,
446451
|i| fmt!("tmp/lib-fileinput-test-fileinput-read-byte-%u.tmp", i)), true);
447452
@@ -471,7 +476,7 @@ mod test {
471476
472477
#[test]
473478
fn test_fileinput_read() {
474-
let filenames = pathify(vec::from_fn(
479+
let filenames = make_path_option_vec(vec::from_fn(
475480
3,
476481
|i| fmt!("tmp/lib-fileinput-test-fileinput-read-%u.tmp", i)), true);
477482
@@ -492,7 +497,7 @@ mod test {
492497
#[test]
493498
fn test_input_vec() {
494499
let mut all_lines = ~[];
495-
let filenames = pathify(vec::from_fn(
500+
let filenames = make_path_option_vec(vec::from_fn(
496501
3,
497502
|i| fmt!("tmp/lib-fileinput-test-input-vec-%u.tmp", i)), true);
498503

@@ -514,7 +519,7 @@ mod test {
514519

515520
#[test]
516521
fn test_input_vec_state() {
517-
let filenames = pathify(vec::from_fn(
522+
let filenames = make_path_option_vec(vec::from_fn(
518523
3,
519524
|i| fmt!("tmp/lib-fileinput-test-input-vec-state-%u.tmp", i)),true);
520525

@@ -536,7 +541,7 @@ mod test {
536541

537542
#[test]
538543
fn test_empty_files() {
539-
let filenames = pathify(vec::from_fn(
544+
let filenames = make_path_option_vec(vec::from_fn(
540545
3,
541546
|i| fmt!("tmp/lib-fileinput-test-empty-files-%u.tmp", i)),true);
542547

@@ -583,7 +588,7 @@ mod test {
583588
584589
#[test]
585590
fn test_next_file() {
586-
let filenames = pathify(vec::from_fn(
591+
let filenames = make_path_option_vec(vec::from_fn(
587592
3,
588593
|i| fmt!("tmp/lib-fileinput-test-next-file-%u.tmp", i)),true);
589594
@@ -614,7 +619,7 @@ mod test {
614619
#[test]
615620
#[should_fail]
616621
fn test_input_vec_missing_file() {
617-
do input_vec(pathify([~"this/file/doesnt/exist"], true)) |line| {
622+
do input_vec(make_path_option_vec([~"this/file/doesnt/exist"], true)) |line| {
618623
println(line);
619624
true
620625
};

0 commit comments

Comments
 (0)