@@ -157,6 +157,19 @@ extension Source {
157
157
return . init( start ..< currentPosition)
158
158
}
159
159
160
+ /// Attempt to eat a given prefix that satisfies a given predicate, with the
161
+ /// source location recorded.
162
+ mutating func tryEatLocatedPrefix(
163
+ maxLength: Int ? = nil ,
164
+ _ f: ( Char ) -> Bool
165
+ ) -> Located < String > ? {
166
+ let result = recordLoc { src in
167
+ src. tryEatPrefix ( maxLength: maxLength, f)
168
+ }
169
+ guard let result = result else { return nil }
170
+ return result. map ( \. string)
171
+ }
172
+
160
173
/// Throws an expected ASCII character error if not matched
161
174
mutating func expectASCII( ) throws -> Located < Character > {
162
175
try recordLoc { src in
@@ -217,13 +230,13 @@ extension Source {
217
230
/// return the scalar value, or throw an error if the string is malformed or
218
231
/// would overflow the scalar.
219
232
private static func validateUnicodeScalar(
220
- _ str: String , _ kind: RadixKind
221
- ) throws -> Unicode . Scalar {
222
- let num = try validateNumber ( str, UInt32 . self, kind)
233
+ _ str: Source . Located < String > , _ kind: RadixKind
234
+ ) throws -> AST . Atom . Scalar {
235
+ let num = try validateNumber ( str. value , UInt32 . self, kind)
223
236
guard let scalar = Unicode . Scalar ( num) else {
224
237
throw ParseError . misc ( " Invalid scalar value U+ \( num. hexStr) " )
225
238
}
226
- return scalar
239
+ return . init ( scalar, str . location )
227
240
}
228
241
229
242
/// Try to eat a number of a particular type and radix off the front.
@@ -266,14 +279,15 @@ extension Source {
266
279
/// Eat a scalar value from hexadecimal notation off the front
267
280
private mutating func expectUnicodeScalar(
268
281
numDigits: Int
269
- ) throws -> Located < Unicode . Scalar > {
270
- try recordLoc { src in
282
+ ) throws -> AST . Atom . Scalar {
283
+ let str = try recordLoc { src -> String in
271
284
let str = src. eat ( upToCount: numDigits) . string
272
285
guard str. count == numDigits else {
273
286
throw ParseError . expectedNumDigits ( str, numDigits)
274
287
}
275
- return try Source . validateUnicodeScalar ( str, . hex )
288
+ return str
276
289
}
290
+ return try Source . validateUnicodeScalar ( str, . hex)
277
291
}
278
292
279
293
/// Eat a scalar off the front, starting from after the
@@ -289,49 +303,57 @@ extension Source {
289
303
///
290
304
mutating func expectUnicodeScalar(
291
305
escapedCharacter base: Character
292
- ) throws -> Located < Unicode . Scalar > {
306
+ ) throws -> AST . Atom . Kind {
293
307
try recordLoc { src in
308
+
309
+ func nullScalar( ) -> AST . Atom . Kind {
310
+ let pos = src. currentPosition
311
+ return . scalar( . init( UnicodeScalar ( 0 ) , SourceLocation ( pos ..< pos) ) )
312
+ }
313
+
294
314
// TODO: PCRE offers a different behavior if PCRE2_ALT_BSUX is set.
295
315
switch base {
296
316
// Hex numbers.
297
317
case " u " where src. tryEat ( " { " ) , " x " where src. tryEat ( " { " ) :
298
- let str = try src. lexUntil ( eating: " } " ) . value
299
- return try Source . validateUnicodeScalar ( str, . hex)
318
+ let str = try src. lexUntil ( eating: " } " )
319
+ return . scalar ( try Source . validateUnicodeScalar ( str, . hex) )
300
320
301
321
case " x " :
302
322
// \x expects *up to* 2 digits.
303
- guard let digits = src. tryEatPrefix ( maxLength: 2 , \. isHexDigit) else {
323
+ guard let digits = src. tryEatLocatedPrefix ( maxLength: 2 , \. isHexDigit)
324
+ else {
304
325
// In PCRE, \x without any valid hex digits is \u{0}.
305
326
// TODO: This doesn't appear to be followed by ICU or Oniguruma, so
306
327
// could be changed to throw an error if we had a parsing mode for
307
328
// them.
308
- return Unicode . Scalar ( 0 )
329
+ return nullScalar ( )
309
330
}
310
- return try Source . validateUnicodeScalar ( digits. string , . hex)
331
+ return . scalar ( try Source . validateUnicodeScalar ( digits, . hex) )
311
332
312
333
case " u " :
313
- return try src. expectUnicodeScalar ( numDigits: 4 ) . value
334
+ return . scalar ( try src. expectUnicodeScalar ( numDigits: 4 ) )
314
335
case " U " :
315
- return try src. expectUnicodeScalar ( numDigits: 8 ) . value
336
+ return . scalar ( try src. expectUnicodeScalar ( numDigits: 8 ) )
316
337
317
338
// Octal numbers.
318
339
case " o " where src. tryEat ( " { " ) :
319
- let str = try src. lexUntil ( eating: " } " ) . value
320
- return try Source . validateUnicodeScalar ( str, . octal)
340
+ let str = try src. lexUntil ( eating: " } " )
341
+ return . scalar ( try Source . validateUnicodeScalar ( str, . octal) )
321
342
322
343
case " 0 " :
323
344
// We can read *up to* 3 more octal digits.
324
345
// FIXME: PCRE can only read up to 2 octal digits, if we get a strict
325
346
// PCRE mode, we should limit it here.
326
- guard let digits = src. tryEatPrefix ( maxLength: 3 , \. isOctalDigit) else {
327
- return Unicode . Scalar ( 0 )
347
+ guard let digits = src. tryEatLocatedPrefix ( maxLength: 3 , \. isOctalDigit)
348
+ else {
349
+ return nullScalar ( )
328
350
}
329
- return try Source . validateUnicodeScalar ( digits. string , . octal)
351
+ return . scalar ( try Source . validateUnicodeScalar ( digits, . octal) )
330
352
331
353
default :
332
354
fatalError ( " Unexpected scalar start " )
333
355
}
334
- }
356
+ } . value
335
357
}
336
358
337
359
/// Try to consume a quantifier
@@ -1153,7 +1175,7 @@ extension Source {
1153
1175
1154
1176
// We should either have a unicode scalar.
1155
1177
if src. tryEat ( sequence: " U+ " ) {
1156
- let str = try src. lexUntil ( eating: " } " ) . value
1178
+ let str = try src. lexUntil ( eating: " } " )
1157
1179
return . scalar( try Source . validateUnicodeScalar ( str, . hex) )
1158
1180
}
1159
1181
@@ -1581,8 +1603,7 @@ extension Source {
1581
1603
switch char {
1582
1604
// Hexadecimal and octal unicode scalars.
1583
1605
case " u " , " x " , " U " , " o " , " 0 " :
1584
- return try . scalar(
1585
- src. expectUnicodeScalar ( escapedCharacter: char) . value)
1606
+ return try src. expectUnicodeScalar ( escapedCharacter: char)
1586
1607
default :
1587
1608
break
1588
1609
}
0 commit comments