Skip to content

Commit b6072ca

Browse files
committed
Add a one-token cache.
This will avoid duplicating tokenization work, hopefully in most uses of `Parser::try`.
1 parent 49e8ab4 commit b6072ca

File tree

1 file changed

+51
-5
lines changed

1 file changed

+51
-5
lines changed

src/parser.rs

Lines changed: 51 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -61,15 +61,23 @@ impl<'a, T> ParseError<'a, T> {
6161
}
6262

6363
/// The owned input for a parser.
64-
pub struct ParserInput<'t> {
65-
tokenizer: Tokenizer<'t>,
64+
pub struct ParserInput<'i> {
65+
tokenizer: Tokenizer<'i>,
66+
cached_token: Option<CachedToken<'i>>,
6667
}
6768

68-
impl<'t> ParserInput<'t> {
69+
struct CachedToken<'i> {
70+
token: Token<'i>,
71+
start_position: tokenizer::SourcePosition,
72+
end_position: tokenizer::SourcePosition,
73+
}
74+
75+
impl<'i> ParserInput<'i> {
6976
/// Create a new input for a parser.
70-
pub fn new(input: &'t str) -> ParserInput<'t> {
77+
pub fn new(input: &'i str) -> ParserInput<'i> {
7178
ParserInput {
7279
tokenizer: Tokenizer::new(input),
80+
cached_token: None,
7381
}
7482
}
7583
}
@@ -348,11 +356,49 @@ impl<'i: 't, 't> Parser<'i, 't> {
348356
if let Some(block_type) = self.at_start_of.take() {
349357
consume_until_end_of_block(block_type, &mut self.input.tokenizer);
350358
}
359+
351360
let byte = self.input.tokenizer.next_byte();
352361
if self.stop_before.contains(Delimiters::from_byte(byte)) {
353362
return Err(BasicParseError::EndOfInput)
354363
}
355-
let token = self.input.tokenizer.next().map_err(|()| BasicParseError::EndOfInput)?;
364+
365+
let token_start_position = self.input.tokenizer.position();
366+
let token;
367+
match self.input.cached_token {
368+
Some(ref cached_token) if cached_token.start_position == token_start_position => {
369+
self.input.tokenizer.reset(cached_token.end_position);
370+
token = cached_token.token.clone();
371+
}
372+
_ => {
373+
token = self.input.tokenizer.next().map_err(|()| BasicParseError::EndOfInput)?;
374+
match token {
375+
// Don’t cache whitespace or comment tokens.
376+
// A typical pattern is:
377+
//
378+
// ```
379+
// parser.try(|parser| {
380+
// match parser.next() { … }
381+
// }).or_else(|| {
382+
// match parser.next() { … }
383+
// })
384+
// ```
385+
//
386+
// If the curren position at the start of this code is at a whitespace token,
387+
// the "interesting" token (returned by `next`) comes later.
388+
// So in the second call to `next`, we don’t want "uninteresting" tokens
389+
// to overwrite the cache.
390+
Token::WhiteSpace(_) | Token::Comment(_) => {}
391+
_ => {
392+
self.input.cached_token = Some(CachedToken {
393+
token: token.clone(),
394+
start_position: token_start_position,
395+
end_position: self.input.tokenizer.position(),
396+
})
397+
}
398+
}
399+
}
400+
}
401+
356402
if let Some(block_type) = BlockType::opening(&token) {
357403
self.at_start_of = Some(block_type);
358404
}

0 commit comments

Comments
 (0)