Skip to content

Commit fa2e57b

Browse files
committed
---
yaml --- r: 152679 b: refs/heads/try2 c: 8a8e497 h: refs/heads/master i: 152677: 506db60 152675: f0fefc4 152671: a57a034 v: v3
1 parent 8e6fbbc commit fa2e57b

File tree

6 files changed

+216
-24
lines changed

6 files changed

+216
-24
lines changed

[refs]

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ refs/heads/snap-stage3: 78a7676898d9f80ab540c6df5d4c9ce35bb50463
55
refs/heads/try: 519addf6277dbafccbb4159db4b710c37eaa2ec5
66
refs/tags/release-0.1: 1f5c5126e96c79d22cb7862f75304136e204f105
77
refs/heads/ndm: f3868061cd7988080c30d6d5bf352a5a5fe2460b
8-
refs/heads/try2: d41058ed39fcd7e15ce9d0e7705643da85c94271
8+
refs/heads/try2: 8a8e497ae786ffc032c1e68fc23da0edcf6fa5e3
99
refs/heads/dist-snap: ba4081a5a8573875fed17545846f6f6902c8ba8d
1010
refs/tags/release-0.2: c870d2dffb391e14efb05aa27898f1f6333a9596
1111
refs/tags/release-0.3: b5f0d0f648d9a6153664837026ba1be43d3e2503

branches/try2/src/libsyntax/parse/lexer/mod.rs

Lines changed: 118 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -225,6 +225,47 @@ impl<'a> StringReader<'a> {
225225
self.byte_offset(end).to_uint()))
226226
}
227227

228+
/// Converts CRLF to LF in the given string, raising an error on bare CR.
229+
fn translate_crlf<'a>(&self, start: BytePos,
230+
s: &'a str, errmsg: &'a str) -> str::MaybeOwned<'a> {
231+
let mut i = 0u;
232+
while i < s.len() {
233+
let str::CharRange { ch, next } = s.char_range_at(i);
234+
if ch == '\r' {
235+
if next < s.len() && s.char_at(next) == '\n' {
236+
return translate_crlf_(self, start, s, errmsg, i).into_maybe_owned();
237+
}
238+
let pos = start + BytePos(i as u32);
239+
let end_pos = start + BytePos(next as u32);
240+
self.err_span_(pos, end_pos, errmsg);
241+
}
242+
i = next;
243+
}
244+
return s.into_maybe_owned();
245+
246+
fn translate_crlf_(rdr: &StringReader, start: BytePos,
247+
s: &str, errmsg: &str, mut i: uint) -> String {
248+
let mut buf = String::with_capacity(s.len());
249+
let mut j = 0;
250+
while i < s.len() {
251+
let str::CharRange { ch, next } = s.char_range_at(i);
252+
if ch == '\r' {
253+
if j < i { buf.push_str(s.slice(j, i)); }
254+
j = next;
255+
if next >= s.len() || s.char_at(next) != '\n' {
256+
let pos = start + BytePos(i as u32);
257+
let end_pos = start + BytePos(next as u32);
258+
rdr.err_span_(pos, end_pos, errmsg);
259+
}
260+
}
261+
i = next;
262+
}
263+
if j < s.len() { buf.push_str(s.slice_from(j)); }
264+
buf
265+
}
266+
}
267+
268+
228269
/// Advance the StringReader by one character. If a newline is
229270
/// discovered, add it to the FileMap's list of line start offsets.
230271
pub fn bump(&mut self) {
@@ -305,7 +346,20 @@ impl<'a> StringReader<'a> {
305346
// line comments starting with "///" or "//!" are doc-comments
306347
if self.curr_is('/') || self.curr_is('!') {
307348
let start_bpos = self.pos - BytePos(3);
308-
while !self.curr_is('\n') && !self.is_eof() {
349+
while !self.is_eof() {
350+
match self.curr.unwrap() {
351+
'\n' => break,
352+
'\r' => {
353+
if self.nextch_is('\n') {
354+
// CRLF
355+
break
356+
} else {
357+
self.err_span_(self.last_pos, self.pos,
358+
"bare CR not allowed in doc-comment");
359+
}
360+
}
361+
_ => ()
362+
}
309363
self.bump();
310364
}
311365
let ret = self.with_str_from(start_bpos, |string| {
@@ -370,6 +424,7 @@ impl<'a> StringReader<'a> {
370424
let start_bpos = self.last_pos - BytePos(2);
371425

372426
let mut level: int = 1;
427+
let mut has_cr = false;
373428
while level > 0 {
374429
if self.is_eof() {
375430
let msg = if is_doc_comment {
@@ -379,25 +434,35 @@ impl<'a> StringReader<'a> {
379434
};
380435
let last_bpos = self.last_pos;
381436
self.fatal_span_(start_bpos, last_bpos, msg);
382-
} else if self.curr_is('/') && self.nextch_is('*') {
383-
level += 1;
384-
self.bump();
385-
self.bump();
386-
} else if self.curr_is('*') && self.nextch_is('/') {
387-
level -= 1;
388-
self.bump();
389-
self.bump();
390-
} else {
391-
self.bump();
392437
}
438+
let n = self.curr.unwrap();
439+
match n {
440+
'/' if self.nextch_is('*') => {
441+
level += 1;
442+
self.bump();
443+
}
444+
'*' if self.nextch_is('/') => {
445+
level -= 1;
446+
self.bump();
447+
}
448+
'\r' => {
449+
has_cr = true;
450+
}
451+
_ => ()
452+
}
453+
self.bump();
393454
}
394455

395456
let res = if is_doc_comment {
396457
self.with_str_from(start_bpos, |string| {
397458
// but comments with only "*"s between two "/"s are not
398459
if !is_block_non_doc_comment(string) {
460+
let string = if has_cr {
461+
self.translate_crlf(start_bpos, string,
462+
"bare CR not allowed in block doc-comment")
463+
} else { string.into_maybe_owned() };
399464
Some(TokenAndSpan{
400-
tok: token::DOC_COMMENT(str_to_ident(string)),
465+
tok: token::DOC_COMMENT(str_to_ident(string.as_slice())),
401466
sp: codemap::mk_sp(start_bpos, self.last_pos)
402467
})
403468
} else {
@@ -675,6 +740,10 @@ impl<'a> StringReader<'a> {
675740
self.consume_whitespace();
676741
return None
677742
},
743+
'\r' if delim == '"' && self.curr_is('\n') => {
744+
self.consume_whitespace();
745+
return None
746+
}
678747
c => {
679748
let last_pos = self.last_pos;
680749
self.err_span_char(
@@ -696,6 +765,15 @@ impl<'a> StringReader<'a> {
696765
else { "character constant must be escaped" },
697766
first_source_char);
698767
}
768+
'\r' => {
769+
if self.curr_is('\n') {
770+
self.bump();
771+
return Some('\n');
772+
} else {
773+
self.err_span_(start, self.last_pos,
774+
"bare CR not allowed in string, use \\r instead");
775+
}
776+
}
699777
_ => if ascii_only && first_source_char > '\x7F' {
700778
let last_pos = self.last_pos;
701779
self.err_span_char(
@@ -1042,28 +1120,45 @@ impl<'a> StringReader<'a> {
10421120
self.bump();
10431121
let content_start_bpos = self.last_pos;
10441122
let mut content_end_bpos;
1123+
let mut has_cr = false;
10451124
'outer: loop {
10461125
if self.is_eof() {
10471126
let last_bpos = self.last_pos;
10481127
self.fatal_span_(start_bpos, last_bpos, "unterminated raw string");
10491128
}
1050-
if self.curr_is('"') {
1051-
content_end_bpos = self.last_pos;
1052-
for _ in range(0, hash_count) {
1053-
self.bump();
1054-
if !self.curr_is('#') {
1055-
continue 'outer;
1129+
//if self.curr_is('"') {
1130+
//content_end_bpos = self.last_pos;
1131+
//for _ in range(0, hash_count) {
1132+
//self.bump();
1133+
//if !self.curr_is('#') {
1134+
//continue 'outer;
1135+
let c = self.curr.unwrap();
1136+
match c {
1137+
'"' => {
1138+
content_end_bpos = self.last_pos;
1139+
for _ in range(0, hash_count) {
1140+
self.bump();
1141+
if !self.curr_is('#') {
1142+
continue 'outer;
1143+
}
10561144
}
1145+
break;
1146+
}
1147+
'\r' => {
1148+
has_cr = true;
10571149
}
1058-
break;
1150+
_ => ()
10591151
}
10601152
self.bump();
10611153
}
10621154
self.bump();
1063-
let str_content = self.with_str_from_to(
1064-
content_start_bpos,
1065-
content_end_bpos,
1066-
str_to_ident);
1155+
let str_content = self.with_str_from_to(content_start_bpos, content_end_bpos, |string| {
1156+
let string = if has_cr {
1157+
self.translate_crlf(content_start_bpos, string,
1158+
"bare CR not allowed in raw string")
1159+
} else { string.into_maybe_owned() };
1160+
str_to_ident(string.as_slice())
1161+
});
10671162
return token::LIT_STR_RAW(str_content, hash_count);
10681163
}
10691164
'-' => {

branches/try2/src/libsyntax/parse/mod.rs

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -288,6 +288,8 @@ mod test {
288288
use owned_slice::OwnedSlice;
289289
use ast;
290290
use abi;
291+
use attr;
292+
use attr::AttrMetaMethods;
291293
use parse::parser::Parser;
292294
use parse::token::{str_to_ident};
293295
use util::parser_testing::{string_to_tts, string_to_parser};
@@ -726,4 +728,24 @@ mod test {
726728
}".to_string());
727729
}
728730

731+
#[test] fn crlf_doc_comments() {
732+
let sess = new_parse_sess();
733+
734+
let name = "<source>".to_string();
735+
let source = "/// doc comment\r\nfn foo() {}".to_string();
736+
let item = parse_item_from_source_str(name.clone(), source, Vec::new(), &sess).unwrap();
737+
let doc = attr::first_attr_value_str_by_name(item.attrs.as_slice(), "doc").unwrap();
738+
assert_eq!(doc.get(), "/// doc comment");
739+
740+
let source = "/// doc comment\r\n/// line 2\r\nfn foo() {}".to_string();
741+
let item = parse_item_from_source_str(name.clone(), source, Vec::new(), &sess).unwrap();
742+
let docs = item.attrs.iter().filter(|a| a.name().get() == "doc")
743+
.map(|a| a.value_str().unwrap().get().to_string()).collect::<Vec<_>>();
744+
assert_eq!(docs.as_slice(), &["/// doc comment".to_string(), "/// line 2".to_string()]);
745+
746+
let source = "/** doc comment\r\n * with CRLF */\r\nfn foo() {}".to_string();
747+
let item = parse_item_from_source_str(name, source, Vec::new(), &sess).unwrap();
748+
let doc = attr::first_attr_value_str_by_name(item.attrs.as_slice(), "doc").unwrap();
749+
assert_eq!(doc.get(), "/** doc comment\n * with CRLF */");
750+
}
729751
}

branches/try2/src/test/compile-fail/lex-bare-cr-string-literal-doc-comment.rs

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
2+
// file at the top-level directory of this distribution and at
3+
// http://rust-lang.org/COPYRIGHT.
4+
//
5+
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6+
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7+
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8+
// option. This file may not be copied, modified, or distributed
9+
// except according to those terms.
10+
11+
// ignore-tidy-cr
12+
13+
/// doc comment with bare CR: ''
14+
pub fn foo() {}
15+
//~^^ ERROR: bare CR not allowed in doc-comment
16+
17+
/** block doc comment with bare CR: '' */
18+
pub fn bar() {}
19+
//~^^ ERROR: bare CR not allowed in block doc-comment
20+
21+
fn main() {
22+
// the following string literal has a bare CR in it
23+
let _s = "foobar"; //~ ERROR: bare CR not allowed in string
24+
25+
// the following string literal has a bare CR in it
26+
let _s = r"barfoo"; //~ ERROR: bare CR not allowed in raw string
27+
28+
// the following string literal has a bare CR in it
29+
let _s = "foo\bar"; //~ ERROR: unknown character escape: \r
30+
}
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
lexer-crlf-line-endings-string-literal-doc-comment.rs -text
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
// ignore-tidy-cr ignore-license
2+
// ignore-tidy-cr (repeated again because of tidy bug)
3+
// license is ignored because tidy can't handle the CRLF here properly.
4+
5+
// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
6+
// file at the top-level directory of this distribution and at
7+
// http://rust-lang.org/COPYRIGHT.
8+
//
9+
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
10+
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
11+
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
12+
// option. This file may not be copied, modified, or distributed
13+
// except according to those terms.
14+
15+
// NB: this file needs CRLF line endings. The .gitattributes file in
16+
// this directory should enforce it.
17+
18+
// ignore-pretty
19+
20+
/// Doc comment that ends in CRLF
21+
pub fn foo() {}
22+
23+
/** Block doc comment that
24+
* contains CRLF characters
25+
*/
26+
pub fn bar() {}
27+
28+
fn main() {
29+
let s = "string
30+
literal";
31+
assert_eq!(s, "string\nliteral");
32+
33+
let s = "literal with \
34+
escaped newline";
35+
assert_eq!(s, "literal with escaped newline");
36+
37+
let s = r"string
38+
literal";
39+
assert_eq!(s, "string\nliteral");
40+
41+
// validate that our source file has CRLF endings
42+
let source = include_str!("lexer-crlf-line-endings-string-literal-doc-comment.rs");
43+
assert!(source.contains("string\r\nliteral"));
44+
}

0 commit comments

Comments
 (0)