Skip to content

Commit 6a29f41

Browse files
committed
---
yaml --- r: 2087 b: refs/heads/master c: 8e0ecb4 h: refs/heads/master i: 2085: 201bece 2083: 668fe8c 2079: 5a5f8df v: v3
1 parent 55298d1 commit 6a29f41

File tree

2 files changed

+78
-14
lines changed

2 files changed

+78
-14
lines changed

[refs]

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
---
2-
refs/heads/master: 3e081d20e53d58b457ddb3ef62ac2ce2b69abda0
2+
refs/heads/master: 8e0ecb4d1a3106dff57b9d949768df41d570ae67

trunk/src/comp/front/lexer.rs

Lines changed: 77 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ impure fn new_reader(io.reader rdr, str filename, codemap.filemap filemap)
5252
if (pos < len) {ret _str.char_at(file, pos);}
5353
else {ret -1 as char;}
5454
}
55-
55+
5656
impure fn init() {
5757
if (pos < len) {
5858
auto next = _str.char_range_at(file, pos);
@@ -368,7 +368,7 @@ impure fn scan_exponent(reader rdr) -> option.t[str] {
368368
}
369369

370370
impure fn scan_dec_digits(reader rdr) -> str {
371-
371+
372372
auto c = rdr.curr();
373373
let str res = "";
374374

@@ -417,11 +417,11 @@ impure fn scan_number(mutable char c, reader rdr) -> token.token {
417417
dec_str = scan_dec_digits(rdr);
418418
is_dec_integer = true;
419419
}
420-
420+
421421
if (is_dec_integer) {
422422
accum_int = digits_to_string(dec_str);
423423
}
424-
424+
425425
c = rdr.curr();
426426
n = rdr.next();
427427

@@ -526,6 +526,44 @@ impure fn scan_number(mutable char c, reader rdr) -> token.token {
526526
}
527527
}
528528

529+
impure fn scan_numeric_escape(reader rdr) -> char {
530+
531+
auto n_hex_digits = 0;
532+
533+
check (rdr.curr() == '\\');
534+
535+
alt (rdr.next()) {
536+
case ('x') { n_hex_digits = 2; }
537+
case ('u') { n_hex_digits = 4; }
538+
case ('U') { n_hex_digits = 8; }
539+
case (?c) {
540+
log "unknown numeric character escape";
541+
log c;
542+
fail;
543+
}
544+
}
545+
546+
rdr.bump(); // advance curr past \
547+
548+
auto n = rdr.next();
549+
auto accum_int = 0;
550+
551+
while (n_hex_digits != 0) {
552+
if (!is_hex_digit(n)) {
553+
log "illegal numeric character escape";
554+
log n;
555+
fail;
556+
}
557+
accum_int *= 16;
558+
accum_int += hex_digit_val(n);
559+
rdr.bump();
560+
n = rdr.next();
561+
n_hex_digits -= 1;
562+
}
563+
ret accum_int as char;
564+
}
565+
566+
529567
impure fn next_token(reader rdr) -> token.token {
530568
auto accum_str = "";
531569

@@ -666,26 +704,31 @@ impure fn next_token(reader rdr) -> token.token {
666704
auto c2 = rdr.curr();
667705
if (c2 == '\\') {
668706
alt (rdr.next()) {
669-
case ('n') { rdr.bump(); c2 = '\n'; }
670-
case ('r') { rdr.bump(); c2 = '\r'; }
671-
case ('t') { rdr.bump(); c2 = '\t'; }
672-
case ('\\') { rdr.bump(); c2 = '\\'; }
673-
case ('\'') { rdr.bump(); c2 = '\''; }
674-
// FIXME: unicode numeric escapes.
707+
case ('n') { c2 = '\n'; }
708+
case ('r') { c2 = '\r'; }
709+
case ('t') { c2 = '\t'; }
710+
case ('\\') { c2 = '\\'; }
711+
case ('\'') { c2 = '\''; }
712+
713+
case ('x') { c2 = scan_numeric_escape(rdr); }
714+
case ('u') { c2 = scan_numeric_escape(rdr); }
715+
case ('U') { c2 = scan_numeric_escape(rdr); }
716+
675717
case (?c2) {
676718
log "unknown character escape";
677719
log c2;
678720
fail;
679721
}
680722
}
723+
rdr.bump();
681724
}
682725

683726
if (rdr.next() != '\'') {
684727
log "unterminated character constant";
685728
fail;
686729
}
687-
rdr.bump();
688-
rdr.bump();
730+
rdr.bump(); // advance curr to closing '
731+
rdr.bump(); // advance curr past token
689732
ret token.LIT_CHAR(c2);
690733
}
691734

@@ -715,7 +758,22 @@ impure fn next_token(reader rdr) -> token.token {
715758
rdr.bump();
716759
_str.push_byte(accum_str, '"' as u8);
717760
}
718-
// FIXME: unicode numeric escapes.
761+
762+
case ('x') {
763+
_str.push_char(accum_str,
764+
scan_numeric_escape(rdr));
765+
}
766+
767+
case ('u') {
768+
_str.push_char(accum_str,
769+
scan_numeric_escape(rdr));
770+
}
771+
772+
case ('U') {
773+
_str.push_char(accum_str,
774+
scan_numeric_escape(rdr));
775+
}
776+
719777
case (?c2) {
720778
log "unknown string escape";
721779
log c2;
@@ -782,6 +840,12 @@ impure fn next_token(reader rdr) -> token.token {
782840
case ('%') {
783841
ret binop(rdr, token.PERCENT);
784842
}
843+
844+
case (?c) {
845+
log "unkown start of token";
846+
log c;
847+
fail;
848+
}
785849
}
786850

787851
fail;

0 commit comments

Comments
 (0)