Skip to content

Commit 17ab719

Browse files
committed
Improve lexer performance
1 parent 8294514 commit 17ab719

File tree

2 files changed

+57
-44
lines changed

2 files changed

+57
-44
lines changed

parser/lexer/lexer.go

Lines changed: 29 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,11 @@ func Lex(source *file.Source) ([]Token, error) {
1313
input: source.Content(),
1414
tokens: make([]Token, 0),
1515
}
16+
17+
l.loc = file.Location{1, 0}
18+
l.prev = l.loc
19+
l.startLoc = l.loc
20+
1621
for state := root; state != nil; {
1722
state = state(l)
1823
}
@@ -28,8 +33,10 @@ type lexer struct {
2833
input string
2934
state stateFn
3035
tokens []Token
31-
start, end int // current position in input
32-
width int // last rune with
36+
start, end int // current position in input
37+
width int // last rune with
38+
startLoc file.Location // start location
39+
prev, loc file.Location // prev location of end location, end location
3340
err *file.Error
3441
}
3542

@@ -43,6 +50,15 @@ func (l *lexer) next() rune {
4350
r, w := utf8.DecodeRuneInString(l.input[l.end:])
4451
l.width = w
4552
l.end += w
53+
54+
l.prev = l.loc
55+
if r == '\n' {
56+
l.loc.Line++
57+
l.loc.Column = 0
58+
} else {
59+
l.loc.Column++
60+
}
61+
4662
return r
4763
}
4864

@@ -54,6 +70,7 @@ func (l *lexer) peek() rune {
5470

5571
func (l *lexer) backup() {
5672
l.end -= l.width
73+
l.loc = l.prev
5774
}
5875

5976
func (l *lexer) emit(t Kind) {
@@ -62,19 +79,21 @@ func (l *lexer) emit(t Kind) {
6279

6380
func (l *lexer) emitValue(t Kind, value string) {
6481
l.tokens = append(l.tokens, Token{
65-
Location: l.loc(l.start),
82+
Location: l.startLoc,
6683
Kind: t,
6784
Value: value,
6885
})
6986
l.start = l.end
87+
l.startLoc = l.loc
7088
}
7189

7290
func (l *lexer) emitEOF() {
7391
l.tokens = append(l.tokens, Token{
74-
Location: l.loc(l.start - 1), // Point to previous position for better error messages.
92+
Location: l.prev, // Point to previous position for better error messages.
7593
Kind: EOF,
7694
})
7795
l.start = l.end
96+
l.startLoc = l.loc
7897
}
7998

8099
func (l *lexer) word() string {
@@ -83,6 +102,7 @@ func (l *lexer) word() string {
83102

84103
func (l *lexer) ignore() {
85104
l.start = l.end
105+
l.startLoc = l.loc
86106
}
87107

88108
func (l *lexer) accept(valid string) bool {
@@ -101,9 +121,13 @@ func (l *lexer) acceptRun(valid string) {
101121

102122
func (l *lexer) acceptWord(word string) bool {
103123
pos := l.end
124+
loc := l.loc
125+
prev := l.prev
104126
for _, ch := range word {
105127
if l.next() != ch {
106128
l.end = pos
129+
l.loc = loc
130+
l.prev = prev
107131
return false
108132
}
109133
}
@@ -113,32 +137,13 @@ func (l *lexer) acceptWord(word string) bool {
113137
func (l *lexer) error(format string, args ...interface{}) stateFn {
114138
if l.err == nil { // show first error
115139
l.err = &file.Error{
116-
Location: l.loc(l.end - 1),
140+
Location: l.loc,
117141
Message: fmt.Sprintf(format, args...),
118142
}
119143
}
120144
return nil
121145
}
122146

123-
func (l *lexer) loc(pos int) file.Location {
124-
line, column := 1, 0
125-
for i, ch := range []rune(l.input) {
126-
if i == pos {
127-
break
128-
}
129-
if ch == '\n' {
130-
line++
131-
column = 0
132-
} else {
133-
column++
134-
}
135-
}
136-
return file.Location{
137-
Line: line,
138-
Column: column,
139-
}
140-
}
141-
142147
func digitVal(ch rune) int {
143148
switch {
144149
case '0' <= ch && ch <= '9':

parser/lexer/lexer_test.go

Lines changed: 28 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
package lexer_test
22

33
import (
4+
"fmt"
5+
"github.com/stretchr/testify/assert"
46
"strings"
57
"testing"
68

@@ -91,19 +93,6 @@ var lexTests = []lexTest{
9193
},
9294
}
9395

94-
var lexErrorTests = []lexErrorTest{
95-
{
96-
`
97-
"\xQA"
98-
`,
99-
`invalid char escape (2:6)`,
100-
},
101-
{
102-
`id "hello`,
103-
`literal not terminated (1:9)`,
104-
},
105-
}
106-
10796
func compareTokens(i1, i2 []Token) bool {
10897
if len(i1) != len(i2) {
10998
return false
@@ -132,15 +121,34 @@ func TestLex(t *testing.T) {
132121
}
133122
}
134123

124+
const errorTests = `
125+
"\xQA"
126+
invalid char escape (1:5)
127+
| "\xQA"
128+
| ....^
129+
130+
id "hello
131+
literal not terminated (1:10)
132+
| id "hello
133+
| .........^
134+
`
135+
135136
func TestLex_error(t *testing.T) {
136-
for _, test := range lexErrorTests {
137-
out, err := Lex(file.NewSource(test.input))
138-
if err == nil {
139-
t.Errorf("%s:\nexpected error\n%v", test.input, out)
140-
continue
137+
tests := strings.Split(strings.Trim(errorTests, "\n"), "\n\n")
138+
139+
for _, test := range tests {
140+
141+
input := strings.SplitN(test, "\n", 2)
142+
if len(input) != 2 {
143+
t.Errorf("syntax error in test: %q", test)
144+
break
141145
}
142-
if !strings.HasPrefix(err.Error(), test.err) || test.err == "" {
143-
t.Errorf("%s:\ngot\n\t%+v\nexpected\n\t%v", test.input, err.Error(), test.err)
146+
147+
_, err := Lex(file.NewSource(input[0]))
148+
if err == nil {
149+
err = fmt.Errorf("<nil>")
144150
}
151+
152+
assert.Equal(t, input[1], err.Error(), input[0])
145153
}
146154
}

0 commit comments

Comments
 (0)