Skip to content

Commit e2b4fce

Browse files
committed
---
yaml --- r: 13160 b: refs/heads/master c: 57f399b h: refs/heads/master v: v3
1 parent c1f7f31 commit e2b4fce

File tree

6 files changed

+137
-34
lines changed

6 files changed

+137
-34
lines changed

[refs]

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
---
2-
refs/heads/master: c2ce2741a773b94d3a8f7293cb598322bc61f89d
2+
refs/heads/master: 57f399bd638c211005e8565609e928db33ebf864
33
refs/heads/snap-stage1: e33de59e47c5076a89eadeb38f4934f58a3618a6
44
refs/heads/snap-stage3: 4a81779abd786ff22d71434c6d9a5917ea4cdfff
55
refs/heads/try: 2898dcc5d97da9427ac367542382b6239d9c0bbf

trunk/src/libcore/char.rs

Lines changed: 76 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,8 @@ export is_alphabetic,
3838
is_lowercase, is_uppercase,
3939
is_whitespace, is_alphanumeric,
4040
is_ascii, is_digit,
41-
to_digit, cmp;
41+
to_digit, cmp,
42+
escape_default, escape_unicode;
4243

4344
import is_alphabetic = unicode::derived_property::Alphabetic;
4445
import is_XID_start = unicode::derived_property::XID_Start;
@@ -122,6 +123,53 @@ pure fn to_digit(c: char, radix: uint) -> option<uint> {
122123
else { none }
123124
}
124125

126+
#[doc = "
127+
Return the hexadecimal unicode escape of a char.
128+
129+
The rules are as follows:
130+
131+
- chars in [0,0xff] get 2-digit escapes: `\\xNN`
132+
- chars in [0x100,0xffff] get 4-digit escapes: `\\uNNNN`
133+
- chars above 0x10000 get 8-digit escapes: `\\UNNNNNNNN`
134+
"]
135+
fn escape_unicode(c: char) -> str {
136+
let s = u32::to_str(c as u32, 16u);
137+
let (c, pad) = (if c <= '\xff' { ('x', 2u) }
138+
else if c <= '\uffff' { ('u', 4u) }
139+
else { ('U', 8u) });
140+
assert str::len(s) <= pad;
141+
let mut out = "\\";
142+
out += str::from_char(c);
143+
for uint::range(str::len(s), pad) {|_i| out += "0"; }
144+
out += s;
145+
ret out;
146+
}
147+
148+
#[doc = "
149+
Return a 'default' ASCII and C++11-like char-literal escape of a char.
150+
151+
The default is chosen with a bias toward producing literals that are
152+
legal in a variety of languages, including C++11 and similar C-family
153+
languages. The exact rules are:
154+
155+
- Tab, CR and LF are escaped as '\t', '\r' and '\n' respectively.
156+
- Single-quote, double-quote and backslash chars are backslash-escaped.
157+
- Any other chars in the range [0x20,0x7e] are not escaped.
158+
- Any other chars are given hex unicode escapes; see `escape_unicode`.
159+
"]
160+
fn escape_default(c: char) -> str {
161+
alt c {
162+
'\t' { "\\t" }
163+
'\r' { "\\r" }
164+
'\n' { "\\n" }
165+
'\\' { "\\\\" }
166+
'\'' { "\\'" }
167+
'"' { "\\\"" }
168+
'\x20' to '\x7e' { str::from_char(c) }
169+
_ { escape_unicode(c) }
170+
}
171+
}
172+
125173
#[doc = "
126174
Compare two chars
127175
@@ -198,3 +246,30 @@ fn test_is_digit() {
198246
assert ! is_digit('Q');
199247
}
200248

249+
#[test]
250+
fn test_escape_default() {
251+
assert escape_default('\n') == "\\n";
252+
assert escape_default('\r') == "\\r";
253+
assert escape_default('\'') == "\\'";
254+
assert escape_default('"') == "\\\"";
255+
assert escape_default(' ') == " ";
256+
assert escape_default('a') == "a";
257+
assert escape_default('~') == "~";
258+
assert escape_default('\x00') == "\\x00";
259+
assert escape_default('\x1f') == "\\x1f";
260+
assert escape_default('\x7f') == "\\x7f";
261+
assert escape_default('\xff') == "\\xff";
262+
assert escape_default('\u011b') == "\\u011b";
263+
assert escape_default('\U0001d4b6') == "\\U0001d4b6";
264+
}
265+
266+
267+
#[test]
268+
fn test_escape_unicode() {
269+
assert escape_unicode('\x00') == "\\x00";
270+
assert escape_unicode('\n') == "\\x0a";
271+
assert escape_unicode(' ') == "\\x20";
272+
assert escape_unicode('a') == "\\x61";
273+
assert escape_unicode('\u011b') == "\\u011b";
274+
assert escape_unicode('\U0001d4b6') == "\\U0001d4b6";
275+
}

trunk/src/libcore/str.rs

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,8 @@ export
9797
reserve,
9898
reserve_at_least,
9999
capacity,
100+
escape_default,
101+
escape_unicode,
100102

101103
unsafe,
102104
extensions;
@@ -1625,6 +1627,22 @@ fn capacity(&&s: str) -> uint unsafe {
16251627
}
16261628
}
16271629

1630+
#[doc = "Escape each char in `s` with char::escape_default."]
1631+
fn escape_default(s: str) -> str {
1632+
let mut out: str = "";
1633+
reserve_at_least(out, str::len(s));
1634+
chars_iter(s) {|c| out += char::escape_default(c); }
1635+
ret out;
1636+
}
1637+
1638+
#[doc = "Escape each char in `s` with char::escape_unicode."]
1639+
fn escape_unicode(s: str) -> str {
1640+
let mut out: str = "";
1641+
reserve_at_least(out, str::len(s));
1642+
chars_iter(s) {|c| out += char::escape_unicode(c); }
1643+
ret out;
1644+
}
1645+
16281646
#[doc = "Unsafe operations"]
16291647
mod unsafe {
16301648
export
@@ -1866,6 +1884,12 @@ impl extensions for str {
18661884
#[doc = "Returns a string with trailing whitespace removed"]
18671885
#[inline]
18681886
fn trim_right() -> str { trim_right(self) }
1887+
#[doc = "Escape each char in `s` with char::escape_default."]
1888+
#[inline]
1889+
fn escape_default() -> str { escape_default(self) }
1890+
#[doc = "Escape each char in `s` with char::escape_unicode."]
1891+
#[inline]
1892+
fn escape_unicode() -> str { escape_unicode(self) }
18691893
}
18701894

18711895
#[cfg(test)]
@@ -2748,4 +2772,32 @@ mod tests {
27482772
assert *ptr::offset(buf,5u) == 0u8;
27492773
}
27502774
}
2775+
2776+
#[test]
2777+
fn test_escape_unicode() {
2778+
assert escape_unicode("abc") == "\\x61\\x62\\x63";
2779+
assert escape_unicode("a c") == "\\x61\\x20\\x63";
2780+
assert escape_unicode("\r\n\t") == "\\x0d\\x0a\\x09";
2781+
assert escape_unicode("'\"\\") == "\\x27\\x22\\x5c";
2782+
assert escape_unicode("\x00\x01\xfe\xff") == "\\x00\\x01\\xfe\\xff";
2783+
assert escape_unicode("\u0100\uffff") == "\\u0100\\uffff";
2784+
assert escape_unicode("\U00010000\U0010ffff") ==
2785+
"\\U00010000\\U0010ffff";
2786+
assert escape_unicode("ab\ufb00") == "\\x61\\x62\\ufb00";
2787+
assert escape_unicode("\U0001d4ea\r") == "\\U0001d4ea\\x0d";
2788+
}
2789+
2790+
#[test]
2791+
fn test_escape_default() {
2792+
assert escape_default("abc") == "abc";
2793+
assert escape_default("a c") == "a c";
2794+
assert escape_default("\r\n\t") == "\\r\\n\\t";
2795+
assert escape_default("'\"\\") == "\\'\\\"\\\\";
2796+
assert escape_default("\u0100\uffff") == "\\u0100\\uffff";
2797+
assert escape_default("\U00010000\U0010ffff") ==
2798+
"\\U00010000\\U0010ffff";
2799+
assert escape_default("ab\ufb00") == "ab\\ufb00";
2800+
assert escape_default("\U0001d4ea\r") == "\\U0001d4ea\\r";
2801+
}
2802+
27512803
}

trunk/src/libsyntax/parse/lexer.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -434,6 +434,7 @@ fn next_token_inner(rdr: reader) -> token::token {
434434
't' { c2 = '\t'; }
435435
'\\' { c2 = '\\'; }
436436
'\'' { c2 = '\''; }
437+
'"' { c2 = '"'; }
437438
'x' { c2 = scan_numeric_escape(rdr, 2u); }
438439
'u' { c2 = scan_numeric_escape(rdr, 4u); }
439440
'U' { c2 = scan_numeric_escape(rdr, 8u); }

trunk/src/libsyntax/parse/token.rs

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -122,11 +122,7 @@ fn to_str(in: interner<str>, t: token) -> str {
122122

123123
/* Literals */
124124
LIT_INT(c, ast::ty_char) {
125-
// FIXME: escape.
126-
let mut tmp = "'";
127-
str::push_char(tmp, c as char);
128-
str::push_char(tmp, '\'');
129-
ret tmp;
125+
ret "'" + char::escape_default(c as char) + "'";
130126
}
131127
LIT_INT(i, t) {
132128
ret int::to_str(i as int, 10u) + ast_util::int_ty_to_str(t);
@@ -138,10 +134,11 @@ fn to_str(in: interner<str>, t: token) -> str {
138134
ret interner::get::<str>(in, s) +
139135
ast_util::float_ty_to_str(t);
140136
}
141-
LIT_STR(s) { // FIXME: escape.
142-
ret "\"" + interner::get::<str>(in, s) + "\"";
137+
LIT_STR(s) {
138+
ret "\""
139+
+ str::escape_default(interner::get::<str>(in, s))
140+
+ "\"";
143141
}
144-
145142
/* Name components */
146143
IDENT(s, _) {
147144
ret interner::get::<str>(in, s);

trunk/src/libsyntax/print/pprust.rs

Lines changed: 2 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1609,7 +1609,7 @@ fn print_literal(s: ps, &&lit: @ast::lit) {
16091609
alt lit.node {
16101610
ast::lit_str(st) { print_string(s, st); }
16111611
ast::lit_int(ch, ast::ty_char) {
1612-
word(s.s, "'" + escape_str(str::from_char(ch as char), '\'') + "'");
1612+
word(s.s, "'" + char::escape_default(ch as char) + "'");
16131613
}
16141614
ast::lit_int(i, t) {
16151615
if i < 0_i64 {
@@ -1714,32 +1714,10 @@ fn print_comment(s: ps, cmnt: comments::cmnt) {
17141714

17151715
fn print_string(s: ps, st: str) {
17161716
word(s.s, "\"");
1717-
word(s.s, escape_str(st, '"'));
1717+
word(s.s, str::escape_default(st));
17181718
word(s.s, "\"");
17191719
}
17201720

1721-
fn escape_str(st: str, to_escape: char) -> str {
1722-
let mut out: str = "";
1723-
let len = str::len(st);
1724-
let mut i = 0u;
1725-
while i < len {
1726-
alt st[i] as char {
1727-
'\n' { out += "\\n"; }
1728-
'\t' { out += "\\t"; }
1729-
'\r' { out += "\\r"; }
1730-
'\\' { out += "\\\\"; }
1731-
cur {
1732-
if cur == to_escape { out += "\\"; }
1733-
// FIXME some (or all?) non-ascii things should be escaped
1734-
// (See #2306)
1735-
str::push_char(out, cur);
1736-
}
1737-
}
1738-
i += 1u;
1739-
}
1740-
ret out;
1741-
}
1742-
17431721
fn to_str<T>(t: T, f: fn@(ps, T)) -> str {
17441722
let buffer = io::mem_buffer();
17451723
let s = rust_printer(io::mem_buffer_writer(buffer));

0 commit comments

Comments
 (0)