Skip to content

Commit 7496134

Browse files
committed
---
yaml --- r: 123707 b: refs/heads/try c: f512779 h: refs/heads/master i: 123705: 36f075b 123703: 1b1cdee v: v3
1 parent 082959f commit 7496134

File tree

7 files changed

+135
-88
lines changed

7 files changed

+135
-88
lines changed

[refs]

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
refs/heads/master: da4e4e4e0a7778a85748aa4a303b13f603e96b4b
33
refs/heads/snap-stage1: e33de59e47c5076a89eadeb38f4934f58a3618a6
44
refs/heads/snap-stage3: 8ddd286ea4ba4384a0dc9eae393ed515460a986e
5-
refs/heads/try: cc4213418e3ab225867d8e3911f592481b1bbffc
5+
refs/heads/try: f512779554a436d11dd9ffde4c198da6241dfd58
66
refs/tags/release-0.1: 1f5c5126e96c79d22cb7862f75304136e204f105
77
refs/heads/ndm: f3868061cd7988080c30d6d5bf352a5a5fe2460b
88
refs/heads/try2: 147ecfdd8221e4a4d4e090486829a06da1e0ca3c

branches/try/src/librustdoc/html/highlight.rs

Lines changed: 16 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@ use std::io;
1818

1919
use syntax::parse;
2020
use syntax::parse::lexer;
21-
use syntax::codemap::{BytePos, Span};
2221

2322
use html::escape::Escape;
2423

@@ -59,38 +58,30 @@ fn doit(sess: &parse::ParseSess, mut lexer: lexer::StringReader,
5958
None => {}
6059
}
6160
try!(write!(out, "class='rust {}'>\n", class.unwrap_or("")));
62-
let mut last = BytePos(0);
6361
let mut is_attribute = false;
6462
let mut is_macro = false;
6563
let mut is_macro_nonterminal = false;
6664
loop {
6765
let next = lexer.next_token();
68-
let test = if next.tok == t::EOF {lexer.pos} else {next.sp.lo};
69-
70-
// The lexer consumes all whitespace and non-doc-comments when iterating
71-
// between tokens. If this token isn't directly adjacent to our last
72-
// token, then we need to emit the whitespace/comment.
73-
//
74-
// If the gap has any '/' characters then we consider the whole thing a
75-
// comment. This will classify some whitespace as a comment, but that
76-
// doesn't matter too much for syntax highlighting purposes.
77-
if test > last {
78-
let snip = sess.span_diagnostic.cm.span_to_snippet(Span {
79-
lo: last,
80-
hi: test,
81-
expn_info: None,
82-
}).unwrap();
83-
if snip.as_slice().contains("/") {
84-
try!(write!(out, "<span class='comment'>{}</span>",
85-
Escape(snip.as_slice())));
86-
} else {
87-
try!(write!(out, "{}", Escape(snip.as_slice())));
88-
}
89-
}
90-
last = next.sp.hi;
66+
67+
let snip = |sp| sess.span_diagnostic.cm.span_to_snippet(sp).unwrap();
68+
9169
if next.tok == t::EOF { break }
9270

9371
let klass = match next.tok {
72+
t::WS => {
73+
try!(write!(out, "{}", Escape(snip(next.sp).as_slice())));
74+
continue
75+
},
76+
t::COMMENT => {
77+
try!(write!(out, "<span class='comment'>{}</span>",
78+
Escape(snip(next.sp).as_slice())));
79+
continue
80+
},
81+
t::SHEBANG(s) => {
82+
try!(write!(out, "{}", Escape(s.as_str())));
83+
continue
84+
},
9485
// If this '&' token is directly adjacent to another token, assume
9586
// that it's the address-of operator instead of the and-operator.
9687
// This allows us to give all pointers their own class (`Box` and

branches/try/src/libsyntax/parse/attr.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ impl<'a> ParserAttr for Parser<'a> {
3434
fn parse_outer_attributes(&mut self) -> Vec<ast::Attribute> {
3535
let mut attrs: Vec<ast::Attribute> = Vec::new();
3636
loop {
37-
debug!("parse_outer_attributes: self.token={:?}",
37+
debug!("parse_outer_attributes: self.token={}",
3838
self.token);
3939
match self.token {
4040
token::POUND => {

branches/try/src/libsyntax/parse/lexer/comments.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ use codemap::{BytePos, CharPos, CodeMap, Pos};
1313
use diagnostic;
1414
use parse::lexer::{is_whitespace, Reader};
1515
use parse::lexer::{StringReader, TokenAndSpan};
16-
use parse::lexer::{is_line_non_doc_comment, is_block_non_doc_comment};
16+
use parse::lexer::is_block_doc_comment;
1717
use parse::lexer;
1818
use parse::token;
1919

@@ -42,9 +42,9 @@ pub struct Comment {
4242
}
4343

4444
pub fn is_doc_comment(s: &str) -> bool {
45-
(s.starts_with("///") && !is_line_non_doc_comment(s)) ||
45+
(s.starts_with("///") && super::is_doc_comment(s)) ||
4646
s.starts_with("//!") ||
47-
(s.starts_with("/**") && !is_block_non_doc_comment(s)) ||
47+
(s.starts_with("/**") && is_block_doc_comment(s)) ||
4848
s.starts_with("/*!")
4949
}
5050

@@ -260,7 +260,7 @@ fn read_block_comment(rdr: &mut StringReader,
260260
rdr.bump();
261261
rdr.bump();
262262
}
263-
if !is_block_non_doc_comment(curr_line.as_slice()) {
263+
if is_block_doc_comment(curr_line.as_slice()) {
264264
return
265265
}
266266
assert!(!curr_line.as_slice().contains_char('\n'));

branches/try/src/libsyntax/parse/lexer/mod.rs

Lines changed: 81 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -187,7 +187,7 @@ impl<'a> StringReader<'a> {
187187
/// Advance peek_tok and peek_span to refer to the next token, and
188188
/// possibly update the interner.
189189
fn advance_token(&mut self) {
190-
match self.consume_whitespace_and_comments() {
190+
match self.scan_whitespace_or_comment() {
191191
Some(comment) => {
192192
self.peek_span = comment.sp;
193193
self.peek_tok = comment.tok;
@@ -339,8 +339,7 @@ impl<'a> StringReader<'a> {
339339

340340
/// PRECONDITION: self.curr is not whitespace
341341
/// Eats any kind of comment.
342-
/// Returns a Some(sugared-doc-attr) if one exists, None otherwise
343-
fn consume_any_line_comment(&mut self) -> Option<TokenAndSpan> {
342+
fn scan_comment(&mut self) -> Option<TokenAndSpan> {
344343
match self.curr {
345344
Some(c) => {
346345
if c.is_whitespace() {
@@ -375,28 +374,32 @@ impl<'a> StringReader<'a> {
375374
}
376375
self.bump();
377376
}
378-
let ret = self.with_str_from(start_bpos, |string| {
377+
return self.with_str_from(start_bpos, |string| {
379378
// but comments with only more "/"s are not
380-
if !is_line_non_doc_comment(string) {
381-
Some(TokenAndSpan{
382-
tok: token::DOC_COMMENT(str_to_ident(string)),
383-
sp: codemap::mk_sp(start_bpos, self.last_pos)
384-
})
379+
let tok = if is_doc_comment(string) {
380+
token::DOC_COMMENT(str_to_ident(string))
385381
} else {
386-
None
387-
}
388-
});
382+
token::COMMENT
383+
};
389384

390-
if ret.is_some() {
391-
return ret;
392-
}
385+
return Some(TokenAndSpan{
386+
tok: tok,
387+
sp: codemap::mk_sp(start_bpos, self.last_pos)
388+
});
389+
});
393390
} else {
391+
let start_bpos = self.last_pos - BytePos(2);
394392
while !self.curr_is('\n') && !self.is_eof() { self.bump(); }
393+
return Some(TokenAndSpan {
394+
tok: token::COMMENT,
395+
sp: codemap::mk_sp(start_bpos, self.last_pos)
396+
});
395397
}
396-
// Restart whitespace munch.
397-
self.consume_whitespace_and_comments()
398398
}
399-
Some('*') => { self.bump(); self.bump(); self.consume_block_comment() }
399+
Some('*') => {
400+
self.bump(); self.bump();
401+
self.scan_block_comment()
402+
}
400403
_ => None
401404
}
402405
} else if self.curr_is('#') {
@@ -412,9 +415,15 @@ impl<'a> StringReader<'a> {
412415
let cmap = CodeMap::new();
413416
cmap.files.borrow_mut().push(self.filemap.clone());
414417
let loc = cmap.lookup_char_pos_adj(self.last_pos);
418+
debug!("Skipping a shebang");
415419
if loc.line == 1u && loc.col == CharPos(0u) {
420+
// FIXME: Add shebang "token", return it
421+
let start = self.last_pos;
416422
while !self.curr_is('\n') && !self.is_eof() { self.bump(); }
417-
return self.consume_whitespace_and_comments();
423+
return Some(TokenAndSpan {
424+
tok: token::SHEBANG(self.ident_from(start)),
425+
sp: codemap::mk_sp(start, self.last_pos)
426+
});
418427
}
419428
}
420429
None
@@ -423,15 +432,33 @@ impl<'a> StringReader<'a> {
423432
}
424433
}
425434

426-
/// EFFECT: eats whitespace and comments.
427-
/// Returns a Some(sugared-doc-attr) if one exists, None otherwise.
428-
fn consume_whitespace_and_comments(&mut self) -> Option<TokenAndSpan> {
429-
while is_whitespace(self.curr) { self.bump(); }
430-
return self.consume_any_line_comment();
435+
/// If there is whitespace, shebang, or a comment, scan it. Otherwise,
436+
/// return None.
437+
fn scan_whitespace_or_comment(&mut self) -> Option<TokenAndSpan> {
438+
match self.curr.unwrap_or('\0') {
439+
// # to handle shebang at start of file -- this is the entry point
440+
// for skipping over all "junk"
441+
'/' | '#' => {
442+
let c = self.scan_comment();
443+
debug!("scanning a comment {}", c);
444+
c
445+
},
446+
c if is_whitespace(Some(c)) => {
447+
let start_bpos = self.last_pos;
448+
while is_whitespace(self.curr) { self.bump(); }
449+
let c = Some(TokenAndSpan {
450+
tok: token::WS,
451+
sp: codemap::mk_sp(start_bpos, self.last_pos)
452+
});
453+
debug!("scanning whitespace: {}", c);
454+
c
455+
},
456+
_ => None
457+
}
431458
}
432459

433460
/// Might return a sugared-doc-attr
434-
fn consume_block_comment(&mut self) -> Option<TokenAndSpan> {
461+
fn scan_block_comment(&mut self) -> Option<TokenAndSpan> {
435462
// block comments starting with "/**" or "/*!" are doc-comments
436463
let is_doc_comment = self.curr_is('*') || self.curr_is('!');
437464
let start_bpos = self.last_pos - BytePos(2);
@@ -466,28 +493,23 @@ impl<'a> StringReader<'a> {
466493
self.bump();
467494
}
468495

469-
let res = if is_doc_comment {
470-
self.with_str_from(start_bpos, |string| {
471-
// but comments with only "*"s between two "/"s are not
472-
if !is_block_non_doc_comment(string) {
473-
let string = if has_cr {
474-
self.translate_crlf(start_bpos, string,
475-
"bare CR not allowed in block doc-comment")
476-
} else { string.into_maybe_owned() };
477-
Some(TokenAndSpan{
478-
tok: token::DOC_COMMENT(str_to_ident(string.as_slice())),
479-
sp: codemap::mk_sp(start_bpos, self.last_pos)
480-
})
481-
} else {
482-
None
483-
}
484-
})
485-
} else {
486-
None
487-
};
496+
self.with_str_from(start_bpos, |string| {
497+
// but comments with only "*"s between two "/"s are not
498+
let tok = if is_block_doc_comment(string) {
499+
let string = if has_cr {
500+
self.translate_crlf(start_bpos, string,
501+
"bare CR not allowed in block doc-comment")
502+
} else { string.into_maybe_owned() };
503+
token::DOC_COMMENT(str_to_ident(string.as_slice()))
504+
} else {
505+
token::COMMENT
506+
};
488507

489-
// restart whitespace munch.
490-
if res.is_some() { res } else { self.consume_whitespace_and_comments() }
508+
Some(TokenAndSpan{
509+
tok: tok,
510+
sp: codemap::mk_sp(start_bpos, self.last_pos)
511+
})
512+
})
491513
}
492514

493515
/// Scan through any digits (base `radix`) or underscores, and return how
@@ -1242,12 +1264,18 @@ fn in_range(c: Option<char>, lo: char, hi: char) -> bool {
12421264

12431265
fn is_dec_digit(c: Option<char>) -> bool { return in_range(c, '0', '9'); }
12441266

1245-
pub fn is_line_non_doc_comment(s: &str) -> bool {
1246-
s.starts_with("////")
1267+
pub fn is_doc_comment(s: &str) -> bool {
1268+
let res = (s.starts_with("///") && *s.as_bytes().get(3).unwrap_or(&b' ') != b'/')
1269+
|| s.starts_with("//!");
1270+
debug!("is `{}` a doc comment? {}", s, res);
1271+
res
12471272
}
12481273

1249-
pub fn is_block_non_doc_comment(s: &str) -> bool {
1250-
s.starts_with("/***")
1274+
pub fn is_block_doc_comment(s: &str) -> bool {
1275+
let res = (s.starts_with("/**") && *s.as_bytes().get(3).unwrap_or(&b' ') != b'*')
1276+
|| s.starts_with("/*!");
1277+
debug!("is `{}` a doc comment? {}", s, res);
1278+
res
12511279
}
12521280

12531281
fn ident_start(c: Option<char>) -> bool {
@@ -1383,9 +1411,9 @@ mod test {
13831411
}
13841412

13851413
#[test] fn line_doc_comments() {
1386-
assert!(!is_line_non_doc_comment("///"));
1387-
assert!(!is_line_non_doc_comment("/// blah"));
1388-
assert!(is_line_non_doc_comment("////"));
1414+
assert!(is_doc_comment("///"));
1415+
assert!(is_doc_comment("/// blah"));
1416+
assert!(!is_doc_comment("////"));
13891417
}
13901418

13911419
#[test] fn nested_block_comments() {

branches/try/src/libsyntax/parse/parser.rs

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -325,10 +325,24 @@ fn is_plain_ident_or_underscore(t: &token::Token) -> bool {
325325
is_plain_ident(t) || *t == token::UNDERSCORE
326326
}
327327

328+
/// Get a token the parser cares about
329+
fn real_token(rdr: &mut Reader) -> TokenAndSpan {
330+
let mut t = rdr.next_token();
331+
loop {
332+
match t.tok {
333+
token::WS | token::COMMENT | token::SHEBANG(_) => {
334+
t = rdr.next_token();
335+
},
336+
_ => break
337+
}
338+
}
339+
t
340+
}
341+
328342
impl<'a> Parser<'a> {
329343
pub fn new(sess: &'a ParseSess, cfg: ast::CrateConfig,
330344
mut rdr: Box<Reader>) -> Parser<'a> {
331-
let tok0 = rdr.next_token();
345+
let tok0 = real_token(rdr);
332346
let span = tok0.sp;
333347
let placeholder = TokenAndSpan {
334348
tok: token::UNDERSCORE,
@@ -864,7 +878,7 @@ impl<'a> Parser<'a> {
864878
None
865879
};
866880
let next = if self.buffer_start == self.buffer_end {
867-
self.reader.next_token()
881+
real_token(self.reader)
868882
} else {
869883
// Avoid token copies with `replace`.
870884
let buffer_start = self.buffer_start as uint;
@@ -908,7 +922,7 @@ impl<'a> Parser<'a> {
908922
-> R {
909923
let dist = distance as int;
910924
while self.buffer_length() < dist {
911-
self.buffer[self.buffer_end as uint] = self.reader.next_token();
925+
self.buffer[self.buffer_end as uint] = real_token(self.reader);
912926
self.buffer_end = (self.buffer_end + 1) & 3;
913927
}
914928
f(&self.buffer[((self.buffer_start + dist - 1) & 3) as uint].tok)

branches/try/src/libsyntax/parse/token.rs

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,8 +97,18 @@ pub enum Token {
9797

9898
/* For interpolation */
9999
INTERPOLATED(Nonterminal),
100-
101100
DOC_COMMENT(Ident),
101+
102+
// Junk. These carry no data because we don't really care about the data
103+
// they *would* carry, and don't really want to allocate a new ident for
104+
// them. Instead, users could extract that from the associated span.
105+
106+
/// Whitespace
107+
WS,
108+
/// Comment
109+
COMMENT,
110+
SHEBANG(Ident),
111+
102112
EOF,
103113
}
104114

@@ -231,6 +241,10 @@ pub fn to_string(t: &Token) -> String {
231241
/* Other */
232242
DOC_COMMENT(s) => get_ident(s).get().to_string(),
233243
EOF => "<eof>".to_string(),
244+
WS => " ".to_string(),
245+
COMMENT => "/* */".to_string(),
246+
SHEBANG(s) => format!("/* shebang: {}*/", s.as_str()),
247+
234248
INTERPOLATED(ref nt) => {
235249
match nt {
236250
&NtExpr(ref e) => ::print::pprust::expr_to_string(&**e),

0 commit comments

Comments
 (0)