Skip to content

Commit 1deebee

Browse files
committed
Fix #3961 : use char range methods instead of byte offsets to detect whitespace.
1 parent 541c657 commit 1deebee

File tree

2 files changed

+132
-14
lines changed

2 files changed

+132
-14
lines changed

src/libsyntax/parse/comments.rs

Lines changed: 23 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -198,26 +198,35 @@ fn read_line_comments(rdr: @mut StringReader, code_to_the_left: bool,
198198
}
199199
}
200200

201-
// FIXME #3961: This is not the right way to convert string byte
202-
// offsets to characters.
203-
fn all_whitespace(s: &str, begin: uint, end: uint) -> bool {
204-
let mut i: uint = begin;
205-
while i != end {
206-
if !is_whitespace(s[i] as char) { return false; } i += 1u;
201+
// Returns None if the first col chars of s contain a non-whitespace char.
202+
// Otherwise returns Some(k) where k is first char offset after that leading
203+
// whitespace. Note k may be outside bounds of s.
204+
fn all_whitespace(s: &str, col: CharPos) -> Option<uint> {
205+
let len = s.len();
206+
let mut col = col.to_uint();
207+
let mut cursor: uint = 0;
208+
while col > 0 && cursor < len {
209+
let r: str::CharRange = str::char_range_at(s, cursor);
210+
if !r.ch.is_whitespace() {
211+
return None;
212+
}
213+
cursor = r.next;
214+
col -= 1;
207215
}
208-
return true;
216+
return Some(cursor);
209217
}
210218

211219
fn trim_whitespace_prefix_and_push_line(lines: &mut ~[~str],
212220
s: ~str, col: CharPos) {
213221
let len = s.len();
214-
// FIXME #3961: Doing bytewise comparison and slicing with CharPos
215-
let col = col.to_uint();
216-
let s1 = if all_whitespace(s, 0, uint::min(len, col)) {
217-
if col < len {
218-
s.slice(col, len).to_owned()
219-
} else { ~"" }
220-
} else { s };
222+
let s1 = match all_whitespace(s, col) {
223+
Some(col) => {
224+
if col < len {
225+
s.slice(col, len).to_owned()
226+
} else { ~"" }
227+
}
228+
None => s,
229+
};
221230
debug!("pushing line: %s", s1);
222231
lines.push(s1);
223232
}

src/test/pretty/block-comment-wchar.rs

Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
// Copyright 2012 The Rust Project Developers. See the COPYRIGHT
2+
// file at the top-level directory of this distribution and at
3+
// http://rust-lang.org/COPYRIGHT.
4+
//
5+
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6+
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7+
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8+
// option. This file may not be copied, modified, or distributed
9+
// except according to those terms.
10+
11+
// This is meant as a test case for Issue 3961.
12+
//
13+
// Test via: rustc --pretty normal src/test/pretty/block-comment-wchar.rs
14+
15+
fn f() {
16+
fn nested() {
17+
/*
18+
Spaced2
19+
*/
20+
/*
21+
Spaced10
22+
*/
23+
/*
24+
Tabbed8+2
25+
*/
26+
/*
27+
CR8+2
28+
*/
29+
}
30+
/*
31+
Spaced2: (prefixed so start of space aligns with comment)
32+
*/
33+
/*
34+
Tabbed2: (more indented b/c *start* of space will align with comment)
35+
*/
36+
/*
37+
Spaced6: (Alignment removed and realigning spaces inserted)
38+
*/
39+
/*
40+
Tabbed4+2: (Alignment removed and realigning spaces inserted)
41+
*/
42+
43+
/*
44+
VT4+2: (should align)
45+
*/
46+
/*
47+
FF4+2: (should align)
48+
*/
49+
/*
50+
CR4+2: (should align)
51+
*/
52+
/*
53+
// (NEL deliberately omitted)
54+
*/
55+
/*
56+
     Ogham Space Mark 4+2: (should align)
57+
*/
58+
/*
59+
᠎᠎᠎᠎ Mongolian Vowel Separator 4+2: (should align)
60+
*/
61+
/*
62+
     Four-per-em space 4+2: (should align)
63+
*/
64+
65+
/*
66+
᠎ Mongolian Vowel Sep count 1: (should align)
67+
᠎ Mongolian Vowel Sep count 2: (should align)
68+
᠎᠎ Mongolian Vowel Sep count 3: (should align)
69+
᠎ Mongolian Vowel Sep count 4: (should align)
70+
᠎ ᠎ Mongolian Vowel Sep count 5: (should align)
71+
᠎᠎ Mongolian Vowel Sep count 6: (should align)
72+
᠎᠎᠎ Mongolian Vowel Sep count 7: (should align)
73+
᠎ Mongolian Vowel Sep count 8: (should align)
74+
᠎ ᠎ Mongolian Vowel Sep count 9: (should align)
75+
᠎ ᠎ Mongolian Vowel Sep count A: (should align)
76+
᠎ ᠎᠎ Mongolian Vowel Sep count B: (should align)
77+
᠎᠎ Mongolian Vowel Sep count C: (should align)
78+
᠎᠎ ᠎ Mongolian Vowel Sep count D: (should align)
79+
᠎᠎᠎ Mongolian Vowel Sep count E: (should align)
80+
᠎᠎᠎᠎ Mongolian Vowel Sep count F: (should align)
81+
*/
82+
83+
/* */ /*
84+
Hello from offset 6
85+
Space 6+2: compare A
86+
᠎᠎᠎᠎᠎᠎ Mongolian Vowel Separator 6+2: compare B
87+
*/
88+
/*᠎*/ /*
89+
Hello from another offset 6 with wchars establishing column offset
90+
Space 6+2: compare C
91+
᠎᠎᠎᠎᠎᠎ Mongolian Vowel Separator 6+2: compare D
92+
*/
93+
}
94+
95+
fn main() {
96+
// Taken from http://en.wikipedia.org/wiki/Whitespace_character
97+
let chars = [ '\x0A', '\x0B', '\x0C', '\x0D', '\x20',
98+
// '\x85', // for some reason Rust thinks NEL isn't whitespace
99+
'\xA0', '\u1680', '\u180E',
100+
'\u2000', '\u2001', '\u2002', '\u2003',
101+
'\u2004', '\u2005', '\u2006', '\u2007',
102+
'\u2008', '\u2009', '\u200A',
103+
'\u2028', '\u2029', '\u202F', '\u205F',
104+
'\u3000'
105+
];
106+
for vec::each(chars) |c| {
107+
io::println(fmt!("%? %?", c, c.is_whitespace()));
108+
}
109+
}

0 commit comments

Comments
 (0)