Skip to content

Commit 1659c23

Browse files
authored
Refactor lexer (#653)
1 parent 6cf0edb commit 1659c23

File tree

12 files changed

+135
-159
lines changed

12 files changed

+135
-159
lines changed

expr_test.go

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1623,7 +1623,10 @@ func TestCompile_exposed_error(t *testing.T) {
16231623

16241624
b, err := json.Marshal(err)
16251625
require.NoError(t, err)
1626-
require.Equal(t, `{"Line":1,"Column":2,"Message":"invalid operation: == (mismatched types int and bool)","Snippet":"\n | 1 == true\n | ..^","Prev":null}`, string(b))
1626+
require.Equal(t,
1627+
`{"from":2,"to":4,"line":1,"column":2,"message":"invalid operation: == (mismatched types int and bool)","snippet":"\n | 1 == true\n | ..^","prev":null}`,
1628+
string(b),
1629+
)
16271630
}
16281631

16291632
func TestAsBool_exposed_error(t *testing.T) {
@@ -2667,3 +2670,11 @@ func TestIssue_integer_truncated_by_compiler(t *testing.T) {
26672670
_, err = expr.Compile("fn(256)", expr.Env(env))
26682671
require.Error(t, err)
26692672
}
2673+
2674+
func TestExpr_crash(t *testing.T) {
2675+
content, err := os.ReadFile("testdata/crash.txt")
2676+
require.NoError(t, err)
2677+
2678+
_, err = expr.Compile(string(content))
2679+
require.Error(t, err)
2680+
}

file/error.go

Lines changed: 21 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8,22 +8,36 @@ import (
88

99
type Error struct {
1010
Location
11-
Message string
12-
Snippet string
13-
Prev error
11+
Line int `json:"line"`
12+
Column int `json:"column"`
13+
Message string `json:"message"`
14+
Snippet string `json:"snippet"`
15+
Prev error `json:"prev"`
1416
}
1517

1618
func (e *Error) Error() string {
1719
return e.format()
1820
}
1921

20-
func (e *Error) Bind(source *Source) *Error {
21-
if snippet, found := source.Snippet(e.Location.Line); found {
22+
func (e *Error) Bind(source Source) *Error {
23+
e.Line = 1
24+
for i, r := range source {
25+
if i == e.From {
26+
break
27+
}
28+
if r == '\n' {
29+
e.Line++
30+
e.Column = 0
31+
} else {
32+
e.Column++
33+
}
34+
}
35+
if snippet, found := source.Snippet(e.Line); found {
2236
snippet := strings.Replace(snippet, "\t", " ", -1)
2337
srcLine := "\n | " + snippet
2438
var bytes = []byte(snippet)
2539
var indLine = "\n | "
26-
for i := 0; i < e.Location.Column && len(bytes) > 0; i++ {
40+
for i := 0; i < e.Column && len(bytes) > 0; i++ {
2741
_, sz := utf8.DecodeRune(bytes)
2842
bytes = bytes[sz:]
2943
if sz > 1 {
@@ -54,7 +68,7 @@ func (e *Error) Wrap(err error) {
5468
}
5569

5670
func (e *Error) format() string {
57-
if e.Location.Empty() {
71+
if e.Snippet == "" {
5872
return e.Message
5973
}
6074
return fmt.Sprintf(

file/location.go

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,6 @@
11
package file
22

33
type Location struct {
4-
Line int // The 1-based line of the location.
5-
Column int // The 0-based column number of the location.
6-
}
7-
8-
func (l Location) Empty() bool {
9-
return l.Column == 0 && l.Line == 0
4+
From int `json:"from"`
5+
To int `json:"to"`
106
}

file/source.go

Lines changed: 21 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -1,78 +1,47 @@
11
package file
22

33
import (
4-
"encoding/json"
54
"strings"
65
"unicode/utf8"
76
)
87

9-
type Source struct {
10-
contents []rune
11-
lineOffsets []int32
12-
}
13-
14-
func NewSource(contents string) *Source {
15-
s := &Source{
16-
contents: []rune(contents),
17-
}
18-
s.updateOffsets()
19-
return s
20-
}
21-
22-
func (s *Source) MarshalJSON() ([]byte, error) {
23-
return json.Marshal(s.contents)
24-
}
25-
26-
func (s *Source) UnmarshalJSON(b []byte) error {
27-
contents := make([]rune, 0)
28-
err := json.Unmarshal(b, &contents)
29-
if err != nil {
30-
return err
31-
}
8+
type Source []rune
329

33-
s.contents = contents
34-
s.updateOffsets()
35-
return nil
10+
func NewSource(contents string) Source {
11+
return []rune(contents)
3612
}
3713

38-
func (s *Source) Content() string {
39-
return string(s.contents)
14+
func (s Source) String() string {
15+
return string(s)
4016
}
4117

42-
func (s *Source) Snippet(line int) (string, bool) {
18+
func (s Source) Snippet(line int) (string, bool) {
4319
if s == nil {
4420
return "", false
4521
}
46-
charStart, found := s.findLineOffset(line)
47-
if !found || len(s.contents) == 0 {
22+
lines := strings.Split(string(s), "\n")
23+
lineOffsets := make([]int, len(lines))
24+
var offset int
25+
for i, line := range lines {
26+
offset = offset + utf8.RuneCountInString(line) + 1
27+
lineOffsets[i] = offset
28+
}
29+
charStart, found := getLineOffset(lineOffsets, line)
30+
if !found || len(s) == 0 {
4831
return "", false
4932
}
50-
charEnd, found := s.findLineOffset(line + 1)
33+
charEnd, found := getLineOffset(lineOffsets, line+1)
5134
if found {
52-
return string(s.contents[charStart : charEnd-1]), true
53-
}
54-
return string(s.contents[charStart:]), true
55-
}
56-
57-
// updateOffsets compute line offsets up front as they are referred to frequently.
58-
func (s *Source) updateOffsets() {
59-
lines := strings.Split(string(s.contents), "\n")
60-
offsets := make([]int32, len(lines))
61-
var offset int32
62-
for i, line := range lines {
63-
offset = offset + int32(utf8.RuneCountInString(line)) + 1
64-
offsets[int32(i)] = offset
35+
return string(s[charStart : charEnd-1]), true
6536
}
66-
s.lineOffsets = offsets
37+
return string(s[charStart:]), true
6738
}
6839

69-
// findLineOffset returns the offset where the (1-indexed) line begins,
70-
// or false if line doesn't exist.
71-
func (s *Source) findLineOffset(line int) (int32, bool) {
40+
func getLineOffset(lineOffsets []int, line int) (int, bool) {
7241
if line == 1 {
7342
return 0, true
74-
} else if line > 1 && line <= len(s.lineOffsets) {
75-
offset := s.lineOffsets[line-2]
43+
} else if line > 1 && line <= len(lineOffsets) {
44+
offset := lineOffsets[line-2]
7645
return offset, true
7746
}
7847
return -1, false

file/source_test.go

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,7 @@
11
package file
22

33
import (
4-
"encoding/json"
54
"testing"
6-
7-
"github.com/expr-lang/expr/internal/testify/assert"
85
)
96

107
const (
@@ -55,15 +52,3 @@ func TestStringSource_SnippetSingleLine(t *testing.T) {
5552
t.Errorf(unexpectedSnippet, t.Name(), str2, "")
5653
}
5754
}
58-
59-
func TestStringSource_MarshalJSON(t *testing.T) {
60-
source := NewSource("hello, world")
61-
encoded, err := json.Marshal(source)
62-
assert.NoError(t, err)
63-
assert.Equal(t, `[104,101,108,108,111,44,32,119,111,114,108,100]`, string(encoded))
64-
65-
decoded := &Source{}
66-
err = json.Unmarshal(encoded, decoded)
67-
assert.NoError(t, err)
68-
assert.Equal(t, source.Content(), decoded.Content())
69-
}

parser/lexer/lexer.go

Lines changed: 43 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -3,20 +3,18 @@ package lexer
33
import (
44
"fmt"
55
"strings"
6-
"unicode/utf8"
76

87
"github.com/expr-lang/expr/file"
98
)
109

11-
func Lex(source *file.Source) ([]Token, error) {
10+
func Lex(source file.Source) ([]Token, error) {
1211
l := &lexer{
13-
input: source.Content(),
12+
source: source,
1413
tokens: make([]Token, 0),
14+
start: 0,
15+
end: 0,
1516
}
16-
17-
l.loc = file.Location{Line: 1, Column: 0}
18-
l.prev = l.loc
19-
l.startLoc = l.loc
17+
l.commit()
2018

2119
for state := root; state != nil; {
2220
state = state(l)
@@ -30,34 +28,25 @@ func Lex(source *file.Source) ([]Token, error) {
3028
}
3129

3230
type lexer struct {
33-
input string
31+
source file.Source
3432
tokens []Token
35-
start, end int // current position in input
36-
width int // last rune width
37-
startLoc file.Location // start location
38-
prev, loc file.Location // prev location of end location, end location
33+
start, end int
3934
err *file.Error
4035
}
4136

4237
const eof rune = -1
4338

39+
func (l *lexer) commit() {
40+
l.start = l.end
41+
}
42+
4443
func (l *lexer) next() rune {
45-
if l.end >= len(l.input) {
46-
l.width = 0
44+
if l.end >= len(l.source) {
45+
l.end++
4746
return eof
4847
}
49-
r, w := utf8.DecodeRuneInString(l.input[l.end:])
50-
l.width = w
51-
l.end += w
52-
53-
l.prev = l.loc
54-
if r == '\n' {
55-
l.loc.Line++
56-
l.loc.Column = 0
57-
} else {
58-
l.loc.Column++
59-
}
60-
48+
r := l.source[l.end]
49+
l.end++
6150
return r
6251
}
6352

@@ -68,8 +57,7 @@ func (l *lexer) peek() rune {
6857
}
6958

7059
func (l *lexer) backup() {
71-
l.end -= l.width
72-
l.loc = l.prev
60+
l.end--
7361
}
7462

7563
func (l *lexer) emit(t Kind) {
@@ -78,35 +66,39 @@ func (l *lexer) emit(t Kind) {
7866

7967
func (l *lexer) emitValue(t Kind, value string) {
8068
l.tokens = append(l.tokens, Token{
81-
Location: l.startLoc,
69+
Location: file.Location{From: l.start, To: l.end},
8270
Kind: t,
8371
Value: value,
8472
})
85-
l.start = l.end
86-
l.startLoc = l.loc
73+
l.commit()
8774
}
8875

8976
func (l *lexer) emitEOF() {
77+
from := l.end - 2
78+
if from < 0 {
79+
from = 0
80+
}
81+
to := l.end - 1
82+
if to < 0 {
83+
to = 0
84+
}
9085
l.tokens = append(l.tokens, Token{
91-
Location: l.prev, // Point to previous position for better error messages.
86+
Location: file.Location{From: from, To: to},
9287
Kind: EOF,
9388
})
94-
l.start = l.end
95-
l.startLoc = l.loc
89+
l.commit()
9690
}
9791

9892
func (l *lexer) skip() {
99-
l.start = l.end
100-
l.startLoc = l.loc
93+
l.commit()
10194
}
10295

10396
func (l *lexer) word() string {
104-
return l.input[l.start:l.end]
105-
}
106-
107-
func (l *lexer) ignore() {
108-
l.start = l.end
109-
l.startLoc = l.loc
97+
// TODO: boundary check is NOT needed here, but for some reason CI fuzz tests are failing.
98+
if l.start > len(l.source) || l.end > len(l.source) {
99+
return "__invalid__"
100+
}
101+
return string(l.source[l.start:l.end])
110102
}
111103

112104
func (l *lexer) accept(valid string) bool {
@@ -132,18 +124,18 @@ func (l *lexer) skipSpaces() {
132124
}
133125

134126
func (l *lexer) acceptWord(word string) bool {
135-
pos, loc, prev := l.end, l.loc, l.prev
127+
pos := l.end
136128

137129
l.skipSpaces()
138130

139131
for _, ch := range word {
140132
if l.next() != ch {
141-
l.end, l.loc, l.prev = pos, loc, prev
133+
l.end = pos
142134
return false
143135
}
144136
}
145137
if r := l.peek(); r != ' ' && r != eof {
146-
l.end, l.loc, l.prev = pos, loc, prev
138+
l.end = pos
147139
return false
148140
}
149141

@@ -153,8 +145,11 @@ func (l *lexer) acceptWord(word string) bool {
153145
func (l *lexer) error(format string, args ...any) stateFn {
154146
if l.err == nil { // show first error
155147
l.err = &file.Error{
156-
Location: l.loc,
157-
Message: fmt.Sprintf(format, args...),
148+
Location: file.Location{
149+
From: l.end - 1,
150+
To: l.end,
151+
},
152+
Message: fmt.Sprintf(format, args...),
158153
}
159154
}
160155
return nil
@@ -230,6 +225,6 @@ func (l *lexer) scanRawString(quote rune) (n int) {
230225
ch = l.next()
231226
n++
232227
}
233-
l.emitValue(String, l.input[l.start+1:l.end-1])
228+
l.emitValue(String, string(l.source[l.start+1:l.end-1]))
234229
return
235230
}

0 commit comments

Comments
 (0)