@@ -187,7 +187,7 @@ impl<'a> StringReader<'a> {
187
187
/// Advance peek_tok and peek_span to refer to the next token, and
188
188
/// possibly update the interner.
189
189
fn advance_token ( & mut self ) {
190
- match self . consume_whitespace_and_comments ( ) {
190
+ match self . scan_whitespace_or_comment ( ) {
191
191
Some ( comment) => {
192
192
self . peek_span = comment. sp ;
193
193
self . peek_tok = comment. tok ;
@@ -339,8 +339,7 @@ impl<'a> StringReader<'a> {
339
339
340
340
/// PRECONDITION: self.curr is not whitespace
341
341
/// Eats any kind of comment.
342
- /// Returns a Some(sugared-doc-attr) if one exists, None otherwise
343
- fn consume_any_line_comment ( & mut self ) -> Option < TokenAndSpan > {
342
+ fn scan_comment ( & mut self ) -> Option < TokenAndSpan > {
344
343
match self . curr {
345
344
Some ( c) => {
346
345
if c. is_whitespace ( ) {
@@ -375,28 +374,32 @@ impl<'a> StringReader<'a> {
375
374
}
376
375
self . bump ( ) ;
377
376
}
378
- let ret = self . with_str_from ( start_bpos, |string| {
377
+ return self . with_str_from ( start_bpos, |string| {
379
378
// but comments with only more "/"s are not
380
- if !is_line_non_doc_comment ( string) {
381
- Some ( TokenAndSpan {
382
- tok : token:: DOC_COMMENT ( str_to_ident ( string) ) ,
383
- sp : codemap:: mk_sp ( start_bpos, self . last_pos )
384
- } )
379
+ let tok = if is_doc_comment ( string) {
380
+ token:: DOC_COMMENT ( str_to_ident ( string) )
385
381
} else {
386
- None
387
- }
388
- } ) ;
382
+ token:: COMMENT
383
+ } ;
389
384
390
- if ret. is_some ( ) {
391
- return ret;
392
- }
385
+ return Some ( TokenAndSpan {
386
+ tok : tok,
387
+ sp : codemap:: mk_sp ( start_bpos, self . last_pos )
388
+ } ) ;
389
+ } ) ;
393
390
} else {
391
+ let start_bpos = self . last_pos - BytePos ( 2 ) ;
394
392
while !self . curr_is ( '\n' ) && !self . is_eof ( ) { self . bump ( ) ; }
393
+ return Some ( TokenAndSpan {
394
+ tok : token:: COMMENT ,
395
+ sp : codemap:: mk_sp ( start_bpos, self . last_pos )
396
+ } ) ;
395
397
}
396
- // Restart whitespace munch.
397
- self . consume_whitespace_and_comments ( )
398
398
}
399
- Some ( '*' ) => { self . bump ( ) ; self . bump ( ) ; self . consume_block_comment ( ) }
399
+ Some ( '*' ) => {
400
+ self . bump ( ) ; self . bump ( ) ;
401
+ self . scan_block_comment ( )
402
+ }
400
403
_ => None
401
404
}
402
405
} else if self . curr_is ( '#' ) {
@@ -412,9 +415,15 @@ impl<'a> StringReader<'a> {
412
415
let cmap = CodeMap :: new ( ) ;
413
416
cmap. files . borrow_mut ( ) . push ( self . filemap . clone ( ) ) ;
414
417
let loc = cmap. lookup_char_pos_adj ( self . last_pos ) ;
418
+ debug ! ( "Skipping a shebang" ) ;
415
419
if loc. line == 1 u && loc. col == CharPos ( 0 u) {
420
+ // FIXME: Add shebang "token", return it
421
+ let start = self . last_pos ;
416
422
while !self . curr_is ( '\n' ) && !self . is_eof ( ) { self . bump ( ) ; }
417
- return self . consume_whitespace_and_comments ( ) ;
423
+ return Some ( TokenAndSpan {
424
+ tok : token:: SHEBANG ( self . ident_from ( start) ) ,
425
+ sp : codemap:: mk_sp ( start, self . last_pos )
426
+ } ) ;
418
427
}
419
428
}
420
429
None
@@ -423,15 +432,33 @@ impl<'a> StringReader<'a> {
423
432
}
424
433
}
425
434
426
- /// EFFECT: eats whitespace and comments.
427
- /// Returns a Some(sugared-doc-attr) if one exists, None otherwise.
428
- fn consume_whitespace_and_comments ( & mut self ) -> Option < TokenAndSpan > {
429
- while is_whitespace ( self . curr ) { self . bump ( ) ; }
430
- return self . consume_any_line_comment ( ) ;
435
+ /// If there is whitespace, shebang, or a comment, scan it. Otherwise,
436
+ /// return None.
437
+ fn scan_whitespace_or_comment ( & mut self ) -> Option < TokenAndSpan > {
438
+ match self . curr . unwrap_or ( '\0' ) {
439
+ // # to handle shebang at start of file -- this is the entry point
440
+ // for skipping over all "junk"
441
+ '/' | '#' => {
442
+ let c = self . scan_comment ( ) ;
443
+ debug ! ( "scanning a comment {}" , c) ;
444
+ c
445
+ } ,
446
+ c if is_whitespace ( Some ( c) ) => {
447
+ let start_bpos = self . last_pos ;
448
+ while is_whitespace ( self . curr ) { self . bump ( ) ; }
449
+ let c = Some ( TokenAndSpan {
450
+ tok : token:: WS ,
451
+ sp : codemap:: mk_sp ( start_bpos, self . last_pos )
452
+ } ) ;
453
+ debug ! ( "scanning whitespace: {}" , c) ;
454
+ c
455
+ } ,
456
+ _ => None
457
+ }
431
458
}
432
459
433
460
/// Might return a sugared-doc-attr
434
- fn consume_block_comment ( & mut self ) -> Option < TokenAndSpan > {
461
+ fn scan_block_comment ( & mut self ) -> Option < TokenAndSpan > {
435
462
// block comments starting with "/**" or "/*!" are doc-comments
436
463
let is_doc_comment = self . curr_is ( '*' ) || self . curr_is ( '!' ) ;
437
464
let start_bpos = self . last_pos - BytePos ( 2 ) ;
@@ -466,28 +493,23 @@ impl<'a> StringReader<'a> {
466
493
self . bump ( ) ;
467
494
}
468
495
469
- let res = if is_doc_comment {
470
- self . with_str_from ( start_bpos, |string| {
471
- // but comments with only "*"s between two "/"s are not
472
- if !is_block_non_doc_comment ( string) {
473
- let string = if has_cr {
474
- self . translate_crlf ( start_bpos, string,
475
- "bare CR not allowed in block doc-comment" )
476
- } else { string. into_maybe_owned ( ) } ;
477
- Some ( TokenAndSpan {
478
- tok : token:: DOC_COMMENT ( str_to_ident ( string. as_slice ( ) ) ) ,
479
- sp : codemap:: mk_sp ( start_bpos, self . last_pos )
480
- } )
481
- } else {
482
- None
483
- }
484
- } )
485
- } else {
486
- None
487
- } ;
496
+ self . with_str_from ( start_bpos, |string| {
497
+ // but comments with only "*"s between two "/"s are not
498
+ let tok = if is_block_doc_comment ( string) {
499
+ let string = if has_cr {
500
+ self . translate_crlf ( start_bpos, string,
501
+ "bare CR not allowed in block doc-comment" )
502
+ } else { string. into_maybe_owned ( ) } ;
503
+ token:: DOC_COMMENT ( str_to_ident ( string. as_slice ( ) ) )
504
+ } else {
505
+ token:: COMMENT
506
+ } ;
488
507
489
- // restart whitespace munch.
490
- if res. is_some ( ) { res } else { self . consume_whitespace_and_comments ( ) }
508
+ Some ( TokenAndSpan {
509
+ tok : tok,
510
+ sp : codemap:: mk_sp ( start_bpos, self . last_pos )
511
+ } )
512
+ } )
491
513
}
492
514
493
515
/// Scan through any digits (base `radix`) or underscores, and return how
@@ -1242,12 +1264,18 @@ fn in_range(c: Option<char>, lo: char, hi: char) -> bool {
1242
1264
1243
1265
fn is_dec_digit ( c : Option < char > ) -> bool { return in_range ( c, '0' , '9' ) ; }
1244
1266
1245
- pub fn is_line_non_doc_comment ( s : & str ) -> bool {
1246
- s. starts_with ( "////" )
1267
+ pub fn is_doc_comment ( s : & str ) -> bool {
1268
+ let res = ( s. starts_with ( "///" ) && * s. as_bytes ( ) . get ( 3 ) . unwrap_or ( & b' ' ) != b'/' )
1269
+ || s. starts_with ( "//!" ) ;
1270
+ debug ! ( "is `{}` a doc comment? {}" , s, res) ;
1271
+ res
1247
1272
}
1248
1273
1249
- pub fn is_block_non_doc_comment ( s : & str ) -> bool {
1250
- s. starts_with ( "/***" )
1274
+ pub fn is_block_doc_comment ( s : & str ) -> bool {
1275
+ let res = ( s. starts_with ( "/**" ) && * s. as_bytes ( ) . get ( 3 ) . unwrap_or ( & b' ' ) != b'*' )
1276
+ || s. starts_with ( "/*!" ) ;
1277
+ debug ! ( "is `{}` a doc comment? {}" , s, res) ;
1278
+ res
1251
1279
}
1252
1280
1253
1281
fn ident_start ( c : Option < char > ) -> bool {
@@ -1383,9 +1411,9 @@ mod test {
1383
1411
}
1384
1412
1385
1413
#[ test] fn line_doc_comments ( ) {
1386
- assert ! ( !is_line_non_doc_comment ( "///" ) ) ;
1387
- assert ! ( !is_line_non_doc_comment ( "/// blah" ) ) ;
1388
- assert ! ( is_line_non_doc_comment ( "////" ) ) ;
1414
+ assert ! ( is_doc_comment ( "///" ) ) ;
1415
+ assert ! ( is_doc_comment ( "/// blah" ) ) ;
1416
+ assert ! ( !is_doc_comment ( "////" ) ) ;
1389
1417
}
1390
1418
1391
1419
#[ test] fn nested_block_comments ( ) {
0 commit comments