|
| 1 | +import io::reader_util; |
| 2 | +import util::interner; |
| 3 | +import lexer::{ reader, new_reader, next_token, is_whitespace }; |
| 4 | + |
| 5 | +enum cmnt_style { |
| 6 | + isolated, // No code on either side of each line of the comment |
| 7 | + trailing, // Code exists to the left of the comment |
| 8 | + mixed, // Code before /* foo */ and after the comment |
| 9 | + blank_line, // Just a manual blank line "\n\n", for layout |
| 10 | +} |
| 11 | + |
| 12 | +type cmnt = {style: cmnt_style, lines: [str], pos: uint}; |
| 13 | + |
| 14 | +fn read_to_eol(rdr: reader) -> str { |
| 15 | + let mut val = ""; |
| 16 | + while rdr.curr != '\n' && !rdr.is_eof() { |
| 17 | + str::push_char(val, rdr.curr); |
| 18 | + rdr.bump(); |
| 19 | + } |
| 20 | + if rdr.curr == '\n' { rdr.bump(); } |
| 21 | + ret val; |
| 22 | +} |
| 23 | + |
| 24 | +fn read_one_line_comment(rdr: reader) -> str { |
| 25 | + let val = read_to_eol(rdr); |
| 26 | + assert (val[0] == '/' as u8 && val[1] == '/' as u8); |
| 27 | + ret val; |
| 28 | +} |
| 29 | + |
| 30 | +fn consume_non_eol_whitespace(rdr: reader) { |
| 31 | + while is_whitespace(rdr.curr) && rdr.curr != '\n' && !rdr.is_eof() { |
| 32 | + rdr.bump(); |
| 33 | + } |
| 34 | +} |
| 35 | + |
| 36 | +fn push_blank_line_comment(rdr: reader, &comments: [cmnt]) { |
| 37 | + #debug(">>> blank-line comment"); |
| 38 | + let v: [str] = []; |
| 39 | + comments += [{style: blank_line, lines: v, pos: rdr.chpos}]; |
| 40 | +} |
| 41 | + |
| 42 | +fn consume_whitespace_counting_blank_lines(rdr: reader, &comments: [cmnt]) { |
| 43 | + while is_whitespace(rdr.curr) && !rdr.is_eof() { |
| 44 | + if rdr.col == 0u && rdr.curr == '\n' { |
| 45 | + push_blank_line_comment(rdr, comments); |
| 46 | + } |
| 47 | + rdr.bump(); |
| 48 | + } |
| 49 | +} |
| 50 | + |
| 51 | +fn read_line_comments(rdr: reader, code_to_the_left: bool) -> cmnt { |
| 52 | + #debug(">>> line comments"); |
| 53 | + let p = rdr.chpos; |
| 54 | + let mut lines: [str] = []; |
| 55 | + while rdr.curr == '/' && rdr.next() == '/' { |
| 56 | + let line = read_one_line_comment(rdr); |
| 57 | + log(debug, line); |
| 58 | + lines += [line]; |
| 59 | + consume_non_eol_whitespace(rdr); |
| 60 | + } |
| 61 | + #debug("<<< line comments"); |
| 62 | + ret {style: if code_to_the_left { trailing } else { isolated }, |
| 63 | + lines: lines, |
| 64 | + pos: p}; |
| 65 | +} |
| 66 | + |
| 67 | +fn all_whitespace(s: str, begin: uint, end: uint) -> bool { |
| 68 | + let mut i: uint = begin; |
| 69 | + while i != end { if !is_whitespace(s[i] as char) { ret false; } i += 1u; } |
| 70 | + ret true; |
| 71 | +} |
| 72 | + |
| 73 | +fn trim_whitespace_prefix_and_push_line(&lines: [str], |
| 74 | + s: str, col: uint) unsafe { |
| 75 | + let mut s1; |
| 76 | + if all_whitespace(s, 0u, col) { |
| 77 | + if col < str::len(s) { |
| 78 | + s1 = str::slice(s, col, str::len(s)); |
| 79 | + } else { s1 = ""; } |
| 80 | + } else { s1 = s; } |
| 81 | + log(debug, "pushing line: " + s1); |
| 82 | + lines += [s1]; |
| 83 | +} |
| 84 | + |
| 85 | +fn read_block_comment(rdr: reader, code_to_the_left: bool) -> cmnt { |
| 86 | + #debug(">>> block comment"); |
| 87 | + let p = rdr.chpos; |
| 88 | + let mut lines: [str] = []; |
| 89 | + let mut col: uint = rdr.col; |
| 90 | + rdr.bump(); |
| 91 | + rdr.bump(); |
| 92 | + let mut curr_line = "/*"; |
| 93 | + let mut level: int = 1; |
| 94 | + while level > 0 { |
| 95 | + #debug("=== block comment level %d", level); |
| 96 | + if rdr.is_eof() { rdr.fatal("unterminated block comment"); } |
| 97 | + if rdr.curr == '\n' { |
| 98 | + trim_whitespace_prefix_and_push_line(lines, curr_line, col); |
| 99 | + curr_line = ""; |
| 100 | + rdr.bump(); |
| 101 | + } else { |
| 102 | + str::push_char(curr_line, rdr.curr); |
| 103 | + if rdr.curr == '/' && rdr.next() == '*' { |
| 104 | + rdr.bump(); |
| 105 | + rdr.bump(); |
| 106 | + curr_line += "*"; |
| 107 | + level += 1; |
| 108 | + } else { |
| 109 | + if rdr.curr == '*' && rdr.next() == '/' { |
| 110 | + rdr.bump(); |
| 111 | + rdr.bump(); |
| 112 | + curr_line += "/"; |
| 113 | + level -= 1; |
| 114 | + } else { rdr.bump(); } |
| 115 | + } |
| 116 | + } |
| 117 | + } |
| 118 | + if str::len(curr_line) != 0u { |
| 119 | + trim_whitespace_prefix_and_push_line(lines, curr_line, col); |
| 120 | + } |
| 121 | + let mut style = if code_to_the_left { trailing } else { isolated }; |
| 122 | + consume_non_eol_whitespace(rdr); |
| 123 | + if !rdr.is_eof() && rdr.curr != '\n' && vec::len(lines) == 1u { |
| 124 | + style = mixed; |
| 125 | + } |
| 126 | + #debug("<<< block comment"); |
| 127 | + ret {style: style, lines: lines, pos: p}; |
| 128 | +} |
| 129 | + |
| 130 | +fn peeking_at_comment(rdr: reader) -> bool { |
| 131 | + ret rdr.curr == '/' && rdr.next() == '/' || |
| 132 | + rdr.curr == '/' && rdr.next() == '*'; |
| 133 | +} |
| 134 | + |
| 135 | +fn consume_comment(rdr: reader, code_to_the_left: bool, &comments: [cmnt]) { |
| 136 | + #debug(">>> consume comment"); |
| 137 | + if rdr.curr == '/' && rdr.next() == '/' { |
| 138 | + comments += [read_line_comments(rdr, code_to_the_left)]; |
| 139 | + } else if rdr.curr == '/' && rdr.next() == '*' { |
| 140 | + comments += [read_block_comment(rdr, code_to_the_left)]; |
| 141 | + } else { fail; } |
| 142 | + #debug("<<< consume comment"); |
| 143 | +} |
| 144 | + |
| 145 | +fn is_lit(t: token::token) -> bool { |
| 146 | + ret alt t { |
| 147 | + token::LIT_INT(_, _) { true } |
| 148 | + token::LIT_UINT(_, _) { true } |
| 149 | + token::LIT_FLOAT(_, _) { true } |
| 150 | + token::LIT_STR(_) { true } |
| 151 | + token::LIT_BOOL(_) { true } |
| 152 | + _ { false } |
| 153 | + } |
| 154 | +} |
| 155 | + |
| 156 | +type lit = {lit: str, pos: uint}; |
| 157 | + |
| 158 | +fn gather_comments_and_literals(cm: codemap::codemap, |
| 159 | + span_diagnostic: diagnostic::span_handler, |
| 160 | + path: str, |
| 161 | + srdr: io::reader) -> |
| 162 | + {cmnts: [cmnt], lits: [lit]} { |
| 163 | + let src = @str::from_bytes(srdr.read_whole_stream()); |
| 164 | + let itr = @interner::mk::<str>(str::hash, str::eq); |
| 165 | + let rdr = new_reader(cm, span_diagnostic, |
| 166 | + codemap::new_filemap(path, src, 0u, 0u), itr); |
| 167 | + let mut comments: [cmnt] = []; |
| 168 | + let mut literals: [lit] = []; |
| 169 | + let mut first_read: bool = true; |
| 170 | + while !rdr.is_eof() { |
| 171 | + loop { |
| 172 | + let mut code_to_the_left = !first_read; |
| 173 | + consume_non_eol_whitespace(rdr); |
| 174 | + if rdr.curr == '\n' { |
| 175 | + code_to_the_left = false; |
| 176 | + consume_whitespace_counting_blank_lines(rdr, comments); |
| 177 | + } |
| 178 | + while peeking_at_comment(rdr) { |
| 179 | + consume_comment(rdr, code_to_the_left, comments); |
| 180 | + consume_whitespace_counting_blank_lines(rdr, comments); |
| 181 | + } |
| 182 | + break; |
| 183 | + } |
| 184 | + let tok = next_token(rdr); |
| 185 | + if is_lit(tok.tok) { |
| 186 | + let s = rdr.get_str_from(tok.bpos); |
| 187 | + literals += [{lit: s, pos: tok.chpos}]; |
| 188 | + log(debug, "tok lit: " + s); |
| 189 | + } else { |
| 190 | + log(debug, "tok: " + token::to_str(rdr, tok.tok)); |
| 191 | + } |
| 192 | + first_read = false; |
| 193 | + } |
| 194 | + ret {cmnts: comments, lits: literals}; |
| 195 | +} |
0 commit comments