Skip to content

Commit 904c6c4

Browse files
committed
lex raw string literals, like r#"blah"#
Raw string literals are lexed into regular string literals. This is okay for them to "work" and be usable/testable, but the pretty-printer does not know about them yet and will just emit regular string literals.
1 parent e007f94 commit 904c6c4

File tree

5 files changed

+107
-2
lines changed

5 files changed

+107
-2
lines changed

src/libsyntax/parse/lexer.rs

Lines changed: 66 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -213,10 +213,22 @@ fn byte_offset(rdr: &StringReader, pos: BytePos) -> BytePos {
213213
(pos - rdr.filemap.start_pos)
214214
}
215215

216+
/// Calls `f` with a string slice of the source text spanning from `start`
217+
/// up to but excluding `rdr.last_pos`, meaning the slice does not include
218+
/// the character `rdr.curr`.
216219
pub fn with_str_from<T>(rdr: @mut StringReader, start: BytePos, f: &fn(s: &str) -> T) -> T {
220+
with_str_from_to(rdr, start, rdr.last_pos, f)
221+
}
222+
223+
/// Calls `f` with astring slice of the source text spanning from `start`
224+
/// up to but excluding `end`.
225+
fn with_str_from_to<T>(rdr: @mut StringReader,
226+
start: BytePos,
227+
end: BytePos,
228+
f: &fn(s: &str) -> T) -> T {
217229
f(rdr.src.slice(
218230
byte_offset(rdr, start).to_uint(),
219-
byte_offset(rdr, rdr.last_pos).to_uint()))
231+
byte_offset(rdr, end).to_uint()))
220232
}
221233

222234
// EFFECT: advance the StringReader by one character. If a newline is
@@ -612,7 +624,10 @@ fn ident_continue(c: char) -> bool {
612624
// EFFECT: updates the interner
613625
fn next_token_inner(rdr: @mut StringReader) -> token::Token {
614626
let c = rdr.curr;
615-
if ident_start(c) {
627+
if ident_start(c) && nextch(rdr) != '"' && nextch(rdr) != '#' {
628+
// Note: r as in r" or r#" is part of a raw string literal,
629+
// not an identifier, and is handled further down.
630+
616631
let start = rdr.last_pos;
617632
while ident_continue(rdr.curr) {
618633
bump(rdr);
@@ -829,6 +844,47 @@ fn next_token_inner(rdr: @mut StringReader) -> token::Token {
829844
bump(rdr);
830845
return token::LIT_STR(str_to_ident(accum_str));
831846
}
847+
'r' => {
848+
let start_bpos = rdr.last_pos;
849+
bump(rdr);
850+
let mut hash_count = 0u;
851+
while rdr.curr == '#' {
852+
bump(rdr);
853+
hash_count += 1;
854+
}
855+
if rdr.curr != '"' {
856+
fatal_span_char(rdr, start_bpos, rdr.last_pos,
857+
~"only `#` is allowed in raw string delimitation; \
858+
found illegal character",
859+
rdr.curr);
860+
}
861+
bump(rdr);
862+
let content_start_bpos = rdr.last_pos;
863+
let mut content_end_bpos;
864+
'outer: loop {
865+
if is_eof(rdr) {
866+
fatal_span(rdr, start_bpos, rdr.last_pos,
867+
~"unterminated raw string");
868+
}
869+
if rdr.curr == '"' {
870+
content_end_bpos = rdr.last_pos;
871+
for _ in range(0, hash_count) {
872+
bump(rdr);
873+
if rdr.curr != '#' {
874+
continue 'outer;
875+
}
876+
}
877+
break;
878+
}
879+
bump(rdr);
880+
}
881+
bump(rdr);
882+
let str_content = with_str_from_to(rdr,
883+
content_start_bpos,
884+
content_end_bpos,
885+
str_to_ident);
886+
return token::LIT_STR(str_content);
887+
}
832888
'-' => {
833889
if nextch(rdr) == '>' {
834890
bump(rdr);
@@ -987,6 +1043,14 @@ mod test {
9871043
assert_eq!(tok, token::LIFETIME(id));
9881044
}
9891045

1046+
#[test] fn raw_string() {
1047+
let env = setup(@"r###\"\"#a\\b\x00c\"\"###");
1048+
let TokenAndSpan {tok, sp: _} =
1049+
env.string_reader.next_token();
1050+
let id = token::str_to_ident("\"#a\\b\x00c\"");
1051+
assert_eq!(tok, token::LIT_STR(id));
1052+
}
1053+
9901054
#[test] fn line_doc_comments() {
9911055
assert!(!is_line_non_doc_comment("///"));
9921056
assert!(!is_line_non_doc_comment("/// blah"));
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
// Copyright 2013 The Rust Project Developers. See the COPYRIGHT
2+
// file at the top-level directory of this distribution and at
3+
// http://rust-lang.org/COPYRIGHT.
4+
//
5+
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6+
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7+
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8+
// option. This file may not be copied, modified, or distributed
9+
// except according to those terms.
10+
11+
static s: &'static str =
12+
r#x"#"x# //~ ERROR only `#` is allowed in raw string delimitation; found illegal character
13+
;
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
// Copyright 2013 The Rust Project Developers. See the COPYRIGHT
2+
// file at the top-level directory of this distribution and at
3+
// http://rust-lang.org/COPYRIGHT.
4+
//
5+
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6+
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7+
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8+
// option. This file may not be copied, modified, or distributed
9+
// except according to those terms.
10+
11+
static s: &'static str =
12+
r#"
13+
"## //~ ERROR expected `;` but found `#`
14+
;
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
// Copyright 2013 The Rust Project Developers. See the COPYRIGHT
2+
// file at the top-level directory of this distribution and at
3+
// http://rust-lang.org/COPYRIGHT.
4+
//
5+
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6+
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7+
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8+
// option. This file may not be copied, modified, or distributed
9+
// except according to those terms.
10+
11+
static s: &'static str =
12+
r#" string literal goes on
13+
and on
14+
//~^^ ERROR unterminated raw string

src/test/run-pass/raw-str.rs

1.28 KB
Binary file not shown.

0 commit comments

Comments
 (0)