Skip to content

Commit 602a94a

Browse files
committed
---
yaml --- r: 152611 b: refs/heads/try2 c: d7e01b5 h: refs/heads/master i: 152609: 5d0ec82 152607: d404430 v: v3
1 parent e255b03 commit 602a94a

File tree

13 files changed

+187
-72
lines changed

13 files changed

+187
-72
lines changed

[refs]

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ refs/heads/snap-stage3: 78a7676898d9f80ab540c6df5d4c9ce35bb50463
55
refs/heads/try: 519addf6277dbafccbb4159db4b710c37eaa2ec5
66
refs/tags/release-0.1: 1f5c5126e96c79d22cb7862f75304136e204f105
77
refs/heads/ndm: f3868061cd7988080c30d6d5bf352a5a5fe2460b
8-
refs/heads/try2: bccdba02960b3cd428addbc2c856065ebb81eb04
8+
refs/heads/try2: d7e01b5809cd600a30bab29da698acb3d1b52409
99
refs/heads/dist-snap: ba4081a5a8573875fed17545846f6f6902c8ba8d
1010
refs/tags/release-0.2: c870d2dffb391e14efb05aa27898f1f6333a9596
1111
refs/tags/release-0.3: b5f0d0f648d9a6153664837026ba1be43d3e2503

branches/try2/src/libcore/str.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -560,6 +560,8 @@ Section: Comparing strings
560560

561561
// share the implementation of the lang-item vs. non-lang-item
562562
// eq_slice.
563+
/// NOTE: This function is (ab)used in rustc::middle::trans::_match
564+
/// to compare &[u8] byte slices that are not necessarily valid UTF-8.
563565
#[inline]
564566
fn eq_slice_(a: &str, b: &str) -> bool {
565567
#[allow(ctypes)]
@@ -572,6 +574,8 @@ fn eq_slice_(a: &str, b: &str) -> bool {
572574
}
573575

574576
/// Bytewise slice equality
577+
/// NOTE: This function is (ab)used in rustc::middle::trans::_match
578+
/// to compare &[u8] byte slices that are not necessarily valid UTF-8.
575579
#[cfg(not(test))]
576580
#[lang="str_eq"]
577581
#[inline]

branches/try2/src/libregex_macros/lib.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -182,7 +182,7 @@ fn exec<'t>(which: ::regex::native::MatchKind, input: &'t str,
182182
#[allow(unused_variable)]
183183
fn run(&mut self, start: uint, end: uint) -> Vec<Option<uint>> {
184184
let mut matched = false;
185-
let prefix_bytes: &[u8] = &$prefix_bytes;
185+
let prefix_bytes: &[u8] = $prefix_bytes;
186186
let mut clist = &mut Threads::new(self.which);
187187
let mut nlist = &mut Threads::new(self.which);
188188

branches/try2/src/librustc/middle/const_eval.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -529,6 +529,7 @@ pub fn compare_const_vals(a: &const_val, b: &const_val) -> Option<int> {
529529
(&const_float(a), &const_float(b)) => compare_vals(a, b),
530530
(&const_str(ref a), &const_str(ref b)) => compare_vals(a, b),
531531
(&const_bool(a), &const_bool(b)) => compare_vals(a, b),
532+
(&const_binary(ref a), &const_binary(ref b)) => compare_vals(a, b),
532533
_ => None
533534
}
534535
}

branches/try2/src/librustc/middle/trans/_match.rs

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1273,13 +1273,24 @@ fn compare_values<'a>(
12731273
val: bool_to_i1(result.bcx, result.val)
12741274
}
12751275
}
1276-
_ => cx.sess().bug("only scalars and strings supported in compare_values"),
1276+
_ => cx.sess().bug("only strings supported in compare_values"),
12771277
},
12781278
ty::ty_rptr(_, mt) => match ty::get(mt.ty).sty {
12791279
ty::ty_str => compare_str(cx, lhs, rhs, rhs_t),
1280-
_ => cx.sess().bug("only scalars and strings supported in compare_values"),
1280+
ty::ty_vec(mt, _) => match ty::get(mt.ty).sty {
1281+
ty::ty_uint(ast::TyU8) => {
1282+
// NOTE: cast &[u8] to &str and abuse the str_eq lang item,
1283+
// which calls memcmp().
1284+
let t = ty::mk_str_slice(cx.tcx(), ty::ReStatic, ast::MutImmutable);
1285+
let lhs = BitCast(cx, lhs, type_of::type_of(cx.ccx(), t).ptr_to());
1286+
let rhs = BitCast(cx, rhs, type_of::type_of(cx.ccx(), t).ptr_to());
1287+
compare_str(cx, lhs, rhs, rhs_t)
1288+
},
1289+
_ => cx.sess().bug("only byte strings supported in compare_values"),
1290+
},
1291+
_ => cx.sess().bug("on string and byte strings supported in compare_values"),
12811292
},
1282-
_ => cx.sess().bug("only scalars and strings supported in compare_values"),
1293+
_ => cx.sess().bug("only scalars, byte strings, and strings supported in compare_values"),
12831294
}
12841295
}
12851296

branches/try2/src/librustdoc/html/highlight.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -140,7 +140,8 @@ fn doit(sess: &parse::ParseSess, mut lexer: lexer::StringReader,
140140
}
141141

142142
// text literals
143-
t::LIT_BYTE(..) | t::LIT_CHAR(..) | t::LIT_STR(..) | t::LIT_STR_RAW(..) => "string",
143+
t::LIT_BYTE(..) | t::LIT_BINARY(..) |
144+
t::LIT_CHAR(..) | t::LIT_STR(..) | t::LIT_STR_RAW(..) => "string",
144145

145146
// number literals
146147
t::LIT_INT(..) | t::LIT_UINT(..) | t::LIT_INT_UNSUFFIXED(..) |

branches/try2/src/libsyntax/parse/lexer/mod.rs

Lines changed: 111 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -654,7 +654,8 @@ impl<'a> StringReader<'a> {
654654
// Note: r as in r" or r#" is part of a raw string literal,
655655
// b as in b' is part of a byte literal.
656656
// They are not identifiers, and are handled further down.
657-
('r', Some('"')) | ('r', Some('#')) | ('b', Some('\'')) => false,
657+
('r', Some('"')) | ('r', Some('#')) |
658+
('b', Some('"')) | ('b', Some('\'')) => false,
658659
_ => true
659660
} {
660661
let start = self.last_pos;
@@ -859,62 +860,124 @@ impl<'a> StringReader<'a> {
859860
}
860861
'b' => {
861862
self.bump();
862-
assert!(self.curr_is('\''), "Should have been a token::IDENT");
863-
self.bump();
864-
let start = self.last_pos;
865-
866-
// the eof will be picked up by the final `'` check below
867-
let mut c2 = self.curr.unwrap_or('\x00');
868-
self.bump();
863+
return match self.curr {
864+
Some('\'') => parse_byte(self),
865+
Some('"') => parse_byte_string(self),
866+
_ => unreachable!() // Should have been a token::IDENT above.
867+
};
869868

870-
match c2 {
871-
'\\' => {
872-
// '\X' for some X must be a character constant:
873-
let escaped = self.curr;
874-
let escaped_pos = self.last_pos;
875-
self.bump();
876-
match escaped {
877-
None => {}
878-
Some(e) => {
879-
c2 = match e {
880-
'n' => '\n',
881-
'r' => '\r',
882-
't' => '\t',
883-
'\\' => '\\',
884-
'\'' => '\'',
885-
'"' => '"',
886-
'0' => '\x00',
887-
'x' => self.scan_numeric_escape(2u, '\''),
888-
c2 => {
889-
self.err_span_char(escaped_pos, self.last_pos,
890-
"unknown byte escape", c2);
891-
c2
869+
fn parse_byte(self_: &mut StringReader) -> token::Token {
870+
self_.bump();
871+
let start = self_.last_pos;
872+
873+
// the eof will be picked up by the final `'` check below
874+
let mut c2 = self_.curr.unwrap_or('\x00');
875+
self_.bump();
876+
877+
match c2 {
878+
'\\' => {
879+
// '\X' for some X must be a character constant:
880+
let escaped = self_.curr;
881+
let escaped_pos = self_.last_pos;
882+
self_.bump();
883+
match escaped {
884+
None => {}
885+
Some(e) => {
886+
c2 = match e {
887+
'n' => '\n',
888+
'r' => '\r',
889+
't' => '\t',
890+
'\\' => '\\',
891+
'\'' => '\'',
892+
'"' => '"',
893+
'0' => '\x00',
894+
'x' => self_.scan_numeric_escape(2u, '\''),
895+
c2 => {
896+
self_.err_span_char(
897+
escaped_pos, self_.last_pos,
898+
"unknown byte escape", c2);
899+
c2
900+
}
892901
}
893902
}
894903
}
895904
}
905+
'\t' | '\n' | '\r' | '\'' => {
906+
self_.err_span_char( start, self_.last_pos,
907+
"byte constant must be escaped", c2);
908+
}
909+
_ => if c2 > '\x7F' {
910+
self_.err_span_char( start, self_.last_pos,
911+
"byte constant must be ASCII. \
912+
Use a \\xHH escape for a non-ASCII byte", c2);
913+
}
896914
}
897-
'\t' | '\n' | '\r' | '\'' => {
898-
self.err_span_char( start, self.last_pos,
899-
"byte constant must be escaped", c2);
900-
}
901-
_ if c2 > '\x7F' => {
902-
self.err_span_char( start, self.last_pos,
903-
"byte constant must be ASCII. \
904-
Use a \\xHH escape for a non-ASCII byte", c2);
915+
if !self_.curr_is('\'') {
916+
// Byte offsetting here is okay because the
917+
// character before position `start` are an
918+
// ascii single quote and ascii 'b'.
919+
self_.fatal_span_verbose(
920+
start - BytePos(2), self_.last_pos,
921+
"unterminated byte constant".to_string());
905922
}
906-
_ => {}
923+
self_.bump(); // advance curr past token
924+
return token::LIT_BYTE(c2 as u8);
907925
}
908-
if !self.curr_is('\'') {
909-
self.fatal_span_verbose(
910-
// Byte offsetting here is okay because the
911-
// character before position `start` are an
912-
// ascii single quote and ascii 'b'.
913-
start - BytePos(2), self.last_pos,
914-
"unterminated byte constant".to_string());
926+
927+
fn parse_byte_string(self_: &mut StringReader) -> token::Token {
928+
self_.bump();
929+
let start = self_.last_pos;
930+
let mut value = Vec::new();
931+
while !self_.curr_is('"') {
932+
if self_.is_eof() {
933+
self_.fatal_span(start, self_.last_pos,
934+
"unterminated double quote byte string");
935+
}
936+
937+
let ch = self_.curr.unwrap();
938+
self_.bump();
939+
match ch {
940+
'\\' => {
941+
if self_.is_eof() {
942+
self_.fatal_span(start, self_.last_pos,
943+
"unterminated double quote byte string");
944+
}
945+
946+
let escaped = self_.curr.unwrap();
947+
let escaped_pos = self_.last_pos;
948+
self_.bump();
949+
match escaped {
950+
'n' => value.push('\n' as u8),
951+
'r' => value.push('\r' as u8),
952+
't' => value.push('\t' as u8),
953+
'\\' => value.push('\\' as u8),
954+
'\'' => value.push('\'' as u8),
955+
'"' => value.push('"' as u8),
956+
'\n' => self_.consume_whitespace(),
957+
'0' => value.push(0),
958+
'x' => {
959+
value.push(self_.scan_numeric_escape(2u, '"') as u8);
960+
}
961+
c2 => {
962+
self_.err_span_char(escaped_pos, self_.last_pos,
963+
"unknown byte string escape", c2);
964+
}
965+
}
966+
}
967+
_ => {
968+
if ch <= '\x7F' {
969+
value.push(ch as u8)
970+
} else {
971+
self_.err_span_char(self_.last_pos, self_.last_pos,
972+
"byte string must be ASCII. \
973+
Use a \\xHH escape for a non-ASCII byte", ch);
974+
}
975+
}
976+
}
977+
}
978+
self_.bump();
979+
return token::LIT_BINARY(Rc::new(value));
915980
}
916-
self.bump(); // advance curr past token
917-
return token::LIT_BYTE(c2 as u8);
918981
}
919982
'"' => {
920983
let mut accum_str = String::new();

branches/try2/src/libsyntax/parse/parser.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ use ast::{ForeignItem, ForeignItemStatic, ForeignItemFn, ForeignMod};
3333
use ast::{Ident, NormalFn, Inherited, Item, Item_, ItemStatic};
3434
use ast::{ItemEnum, ItemFn, ItemForeignMod, ItemImpl};
3535
use ast::{ItemMac, ItemMod, ItemStruct, ItemTrait, ItemTy, Lit, Lit_};
36-
use ast::{LitBool, LitFloat, LitFloatUnsuffixed, LitInt, LitChar, LitByte};
36+
use ast::{LitBool, LitFloat, LitFloatUnsuffixed, LitInt, LitChar, LitByte, LitBinary};
3737
use ast::{LitIntUnsuffixed, LitNil, LitStr, LitUint, Local, LocalLet};
3838
use ast::{MutImmutable, MutMutable, Mac_, MacInvocTT, Matcher, MatchNonterminal};
3939
use ast::{MatchSeq, MatchTok, Method, MutTy, BiMul, Mutability};
@@ -1529,6 +1529,7 @@ impl<'a> Parser<'a> {
15291529
token::LIT_STR_RAW(s, n) => {
15301530
LitStr(self.id_to_interned_str(s), ast::RawStr(n))
15311531
}
1532+
token::LIT_BINARY(ref v) => LitBinary(v.clone()),
15321533
token::LPAREN => { self.expect(&token::RPAREN); LitNil },
15331534
_ => { self.unexpected_last(tok); }
15341535
}

branches/try2/src/libsyntax/parse/token.rs

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,7 @@ pub enum Token {
8787
LIT_FLOAT_UNSUFFIXED(ast::Ident),
8888
LIT_STR(ast::Ident),
8989
LIT_STR_RAW(ast::Ident, uint), /* raw str delimited by n hash symbols */
90+
LIT_BINARY(Rc<Vec<u8>>),
9091

9192
/* Name components */
9293
// an identifier contains an "is_mod_name" boolean,
@@ -231,17 +232,22 @@ pub fn to_str(t: &Token) -> String {
231232
body
232233
}
233234
LIT_STR(s) => {
234-
(format!("\"{}\"", get_ident(s).get().escape_default())).to_string()
235+
format!("\"{}\"", get_ident(s).get().escape_default())
235236
}
236237
LIT_STR_RAW(s, n) => {
237-
(format!("r{delim}\"{string}\"{delim}",
238-
delim="#".repeat(n), string=get_ident(s))).to_string()
238+
format!("r{delim}\"{string}\"{delim}",
239+
delim="#".repeat(n), string=get_ident(s))
240+
}
241+
LIT_BINARY(ref v) => {
242+
format!(
243+
"b\"{}\"",
244+
v.iter().map(|&b| b as char).collect::<String>().escape_default())
239245
}
240246

241247
/* Name components */
242248
IDENT(s, _) => get_ident(s).get().to_string(),
243249
LIFETIME(s) => {
244-
(format!("{}", get_ident(s))).to_string()
250+
format!("{}", get_ident(s))
245251
}
246252
UNDERSCORE => "_".to_string(),
247253

@@ -291,6 +297,7 @@ pub fn can_begin_expr(t: &Token) -> bool {
291297
LIT_FLOAT_UNSUFFIXED(_) => true,
292298
LIT_STR(_) => true,
293299
LIT_STR_RAW(_, _) => true,
300+
LIT_BINARY(_) => true,
294301
POUND => true,
295302
AT => true,
296303
NOT => true,
@@ -330,6 +337,7 @@ pub fn is_lit(t: &Token) -> bool {
330337
LIT_FLOAT_UNSUFFIXED(_) => true,
331338
LIT_STR(_) => true,
332339
LIT_STR_RAW(_, _) => true,
340+
LIT_BINARY(_) => true,
333341
_ => false
334342
}
335343
}

branches/try2/src/libsyntax/print/pprust.rs

Lines changed: 3 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2342,19 +2342,9 @@ impl<'a> State<'a> {
23422342
ast::LitBool(val) => {
23432343
if val { word(&mut self.s, "true") } else { word(&mut self.s, "false") }
23442344
}
2345-
ast::LitBinary(ref arr) => {
2346-
try!(self.ibox(indent_unit));
2347-
try!(word(&mut self.s, "["));
2348-
try!(self.commasep_cmnt(Inconsistent,
2349-
arr.as_slice(),
2350-
|s, u| {
2351-
word(&mut s.s,
2352-
format!("{}",
2353-
*u).as_slice())
2354-
},
2355-
|_| lit.span));
2356-
try!(word(&mut self.s, "]"));
2357-
self.end()
2345+
ast::LitBinary(ref v) => {
2346+
let escaped: String = v.iter().map(|&b| b as char).collect();
2347+
word(&mut self.s, format!("b\"{}\"", escaped.escape_default()).as_slice())
23582348
}
23592349
}
23602350
}
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
2+
// file at the top-level directory of this distribution and at
3+
// http://rust-lang.org/COPYRIGHT.
4+
//
5+
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6+
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7+
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8+
// option. This file may not be copied, modified, or distributed
9+
// except according to those terms.
10+
11+
12+
// ignore-tidy-tab
13+
14+
static FOO: &'static [u8] = b"\f"; //~ ERROR unknown byte escape
15+
16+
pub fn main() {
17+
b"\f"; //~ ERROR unknown byte escape
18+
b"\x0Z"; //~ ERROR illegal character in numeric character escape: Z
19+
b"é"; //~ ERROR byte constant must be ASCII
20+
b"a //~ ERROR unterminated double quote byte string
21+
}
22+
23+

branches/try2/src/test/compile-fail/concat.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010

1111
fn main() {
1212
concat!(b'f'); //~ ERROR: cannot concatenate a binary literal
13+
concat!(b"foo"); //~ ERROR: cannot concatenate a binary literal
1314
concat!(foo); //~ ERROR: expected a literal
1415
concat!(foo()); //~ ERROR: expected a literal
1516
}

0 commit comments

Comments
 (0)