1
+ //===----------------------------------------------------------------------===//
2
+ //
3
+ // This source file is part of the Swift.org open source project
4
+ //
5
+ // Copyright (c) 2021-2022 Apple Inc. and the Swift project authors
6
+ // Licensed under Apache License v2.0 with Runtime Library Exception
7
+ //
8
+ // See https://swift.org/LICENSE.txt for license information
9
+ //
10
+ //===----------------------------------------------------------------------===//
11
+
12
+ @_spi ( _Unicode)
13
+ import Swift
14
+
1
15
@_implementationOnly import _RegexParser
2
16
3
17
extension Compiler {
@@ -107,12 +121,13 @@ fileprivate extension Compiler.ByteCodeGen {
107
121
// need to supply both a slice bounds and a per-search bounds.
108
122
switch kind {
109
123
case . startOfSubject:
110
- builder. buildAssert { ( input, pos, subjectBounds) in
124
+ builder. buildAssert { ( _ , _ , input, pos, subjectBounds) in
111
125
pos == subjectBounds. lowerBound
112
126
}
113
127
114
128
case . endOfSubjectBeforeNewline:
115
- builder. buildAssert { [ semanticLevel = options. semanticLevel] ( input, pos, subjectBounds) in
129
+ builder. buildAssert { [ semanticLevel = options. semanticLevel]
130
+ ( _, _, input, pos, subjectBounds) in
116
131
if pos == subjectBounds. upperBound { return true }
117
132
switch semanticLevel {
118
133
case . graphemeCluster:
@@ -125,7 +140,7 @@ fileprivate extension Compiler.ByteCodeGen {
125
140
}
126
141
127
142
case . endOfSubject:
128
- builder. buildAssert { ( input, pos, subjectBounds) in
143
+ builder. buildAssert { ( _ , _ , input, pos, subjectBounds) in
129
144
pos == subjectBounds. upperBound
130
145
}
131
146
@@ -138,16 +153,16 @@ fileprivate extension Compiler.ByteCodeGen {
138
153
139
154
// FIXME: This needs to be based on `searchBounds`,
140
155
// not the `subjectBounds` given as an argument here
141
- builder. buildAssert { ( input, pos, subjectBounds) in false }
156
+ builder. buildAssert { ( _ , _ , input, pos, subjectBounds) in false }
142
157
143
158
case . textSegment:
144
- builder. buildAssert { ( input, pos, _) in
159
+ builder. buildAssert { ( _ , _ , input, pos, _) in
145
160
// FIXME: Grapheme or word based on options
146
161
input. isOnGraphemeClusterBoundary ( pos)
147
162
}
148
163
149
164
case . notTextSegment:
150
- builder. buildAssert { ( input, pos, _) in
165
+ builder. buildAssert { ( _ , _ , input, pos, _) in
151
166
// FIXME: Grapheme or word based on options
152
167
!input. isOnGraphemeClusterBoundary ( pos)
153
168
}
@@ -158,7 +173,8 @@ fileprivate extension Compiler.ByteCodeGen {
158
173
// the DSL-based `.startOfLine` anchor should always match the start
159
174
// of a line. Right now we don't distinguish between those anchors.
160
175
if options. anchorsMatchNewlines {
161
- builder. buildAssert { [ semanticLevel = options. semanticLevel] ( input, pos, subjectBounds) in
176
+ builder. buildAssert { [ semanticLevel = options. semanticLevel]
177
+ ( _, _, input, pos, subjectBounds) in
162
178
if pos == subjectBounds. lowerBound { return true }
163
179
switch semanticLevel {
164
180
case . graphemeCluster:
@@ -168,7 +184,7 @@ fileprivate extension Compiler.ByteCodeGen {
168
184
}
169
185
}
170
186
} else {
171
- builder. buildAssert { ( input, pos, subjectBounds) in
187
+ builder. buildAssert { ( _ , _ , input, pos, subjectBounds) in
172
188
pos == subjectBounds. lowerBound
173
189
}
174
190
}
@@ -179,7 +195,8 @@ fileprivate extension Compiler.ByteCodeGen {
179
195
// the DSL-based `.endOfLine` anchor should always match the end
180
196
// of a line. Right now we don't distinguish between those anchors.
181
197
if options. anchorsMatchNewlines {
182
- builder. buildAssert { [ semanticLevel = options. semanticLevel] ( input, pos, subjectBounds) in
198
+ builder. buildAssert { [ semanticLevel = options. semanticLevel]
199
+ ( _, _, input, pos, subjectBounds) in
183
200
if pos == subjectBounds. upperBound { return true }
184
201
switch semanticLevel {
185
202
case . graphemeCluster:
@@ -189,25 +206,41 @@ fileprivate extension Compiler.ByteCodeGen {
189
206
}
190
207
}
191
208
} else {
192
- builder. buildAssert { ( input, pos, subjectBounds) in
209
+ builder. buildAssert { ( _ , _ , input, pos, subjectBounds) in
193
210
pos == subjectBounds. upperBound
194
211
}
195
212
}
196
213
197
214
case . wordBoundary:
198
- // TODO: May want to consider Unicode level
199
- builder. buildAssert { [ options] ( input, pos, subjectBounds) in
200
- // TODO: How should we handle bounds?
201
- _CharacterClassModel. word. isBoundary (
202
- input, at: pos, bounds: subjectBounds, with: options)
215
+ builder. buildAssert { [ options]
216
+ ( cache, maxIndex, input, pos, subjectBounds) in
217
+ if options. usesSimpleUnicodeBoundaries {
218
+ // TODO: How should we handle bounds?
219
+ return _CharacterClassModel. word. isBoundary (
220
+ input,
221
+ at: pos,
222
+ bounds: subjectBounds,
223
+ with: options
224
+ )
225
+ } else {
226
+ return input. isOnWordBoundary ( at: pos, using: & cache, & maxIndex)
227
+ }
203
228
}
204
229
205
230
case . notWordBoundary:
206
- // TODO: May want to consider Unicode level
207
- builder. buildAssert { [ options] ( input, pos, subjectBounds) in
208
- // TODO: How should we handle bounds?
209
- !_CharacterClassModel. word. isBoundary (
210
- input, at: pos, bounds: subjectBounds, with: options)
231
+ builder. buildAssert { [ options]
232
+ ( cache, maxIndex, input, pos, subjectBounds) in
233
+ if options. usesSimpleUnicodeBoundaries {
234
+ // TODO: How should we handle bounds?
235
+ return !_CharacterClassModel. word. isBoundary (
236
+ input,
237
+ at: pos,
238
+ bounds: subjectBounds,
239
+ with: options
240
+ )
241
+ } else {
242
+ return !input. isOnWordBoundary ( at: pos, using: & cache, & maxIndex)
243
+ }
211
244
}
212
245
}
213
246
}
0 commit comments