11
11
12
12
// TODO: mock up multi-line soon
13
13
14
- enum Delimiter : Hashable , CaseIterable {
15
- case traditional
16
- case experimental
17
- case reSingleQuote
18
- case rxSingleQuote
19
-
20
- var openingAndClosing : ( opening: String , closing: String ) {
21
- switch self {
22
- case . traditional: return ( " #/ " , " /# " )
23
- case . experimental: return ( " #| " , " |# " )
24
- case . reSingleQuote: return ( " re' " , " ' " )
25
- case . rxSingleQuote: return ( " rx' " , " ' " )
26
- }
14
+ struct Delimiter : Hashable {
15
+ let kind : Kind
16
+ let poundCount : Int
17
+
18
+ init ( _ kind: Kind , poundCount: Int ) {
19
+ precondition ( kind. allowsExtendedPoundSyntax || poundCount == 0 )
20
+ self . kind = kind
21
+ self . poundCount = poundCount
22
+ }
23
+
24
+ var opening : String {
25
+ String ( repeating: " # " , count: poundCount) + kind. opening
26
+ }
27
+ var closing : String {
28
+ kind. closing + String( repeating: " # " , count: poundCount)
27
29
}
28
- var opening : String { openingAndClosing. opening }
29
- var closing : String { openingAndClosing. closing }
30
30
31
31
/// The default set of syntax options that the delimiter indicates.
32
32
var defaultSyntaxOptions : SyntaxOptions {
33
- switch self {
34
- case . traditional , . reSingleQuote:
33
+ switch kind {
34
+ case . forwardSlash , . reSingleQuote:
35
35
return . traditional
36
36
case . experimental, . rxSingleQuote:
37
37
return . experimental
38
38
}
39
39
}
40
40
}
41
41
42
+ extension Delimiter {
43
+ enum Kind : Hashable , CaseIterable {
44
+ case forwardSlash
45
+ case experimental
46
+ case reSingleQuote
47
+ case rxSingleQuote
48
+
49
+ var openingAndClosing : ( opening: String , closing: String ) {
50
+ switch self {
51
+ case . forwardSlash: return ( " / " , " / " )
52
+ case . experimental: return ( " #| " , " |# " )
53
+ case . reSingleQuote: return ( " re' " , " ' " )
54
+ case . rxSingleQuote: return ( " rx' " , " ' " )
55
+ }
56
+ }
57
+ var opening : String { openingAndClosing. opening }
58
+ var closing : String { openingAndClosing. closing }
59
+
60
+ /// Whether or not extended pound syntax e.g `##/.../##` is allowed with
61
+ /// this delimiter.
62
+ var allowsExtendedPoundSyntax : Bool {
63
+ switch self {
64
+ case . forwardSlash:
65
+ return true
66
+ case . experimental, . reSingleQuote, . rxSingleQuote:
67
+ return false
68
+ }
69
+ }
70
+ }
71
+ }
72
+
42
73
struct DelimiterLexError : Error , CustomStringConvertible {
43
74
enum Kind : Hashable {
44
75
case unterminated
@@ -120,25 +151,34 @@ fileprivate struct DelimiterLexer {
120
151
precondition ( cursor <= end, " Cannot advance past end " )
121
152
}
122
153
123
- /// Check to see if a UTF-8 sequence can be eaten from the current cursor.
124
- func canEat( _ utf8: String . UTF8View ) -> Bool {
125
- guard let slice = slice ( utf8. count) else { return false }
126
- return slice. elementsEqual ( utf8)
154
+ /// Check to see if a byte sequence can be eaten from the current cursor.
155
+ func canEat< C : Collection > ( _ bytes: C ) -> Bool where C. Element == UInt8 {
156
+ guard let slice = slice ( bytes. count) else { return false }
157
+ return slice. elementsEqual ( bytes)
158
+ }
159
+
160
+ /// Attempt to eat a byte sequence, returning `true` if successful.
161
+ mutating func tryEat< C : Collection > (
162
+ _ bytes: C
163
+ ) -> Bool where C. Element == UInt8 {
164
+ guard canEat ( bytes) else { return false }
165
+ advanceCursor ( bytes. count)
166
+ return true
127
167
}
128
168
129
- /// Attempt to eat a UTF-8 byte sequence , returning `true` if successful.
130
- mutating func tryEat( _ utf8 : String . UTF8View ) -> Bool {
131
- guard canEat ( utf8 ) else { return false }
132
- advanceCursor ( utf8 . count )
169
+ /// Attempt to eat an ascii scalar , returning `true` if successful.
170
+ mutating func tryEat( ascii s : Unicode . Scalar ) -> Bool {
171
+ guard load ( ) == ascii ( s ) else { return false }
172
+ advanceCursor ( )
133
173
return true
134
174
}
135
175
136
176
/// Attempt to skip over a closing delimiter character that is unlikely to be
137
177
/// the actual closing delimiter.
138
178
mutating func trySkipDelimiter( _ delimiter: Delimiter ) {
139
179
// Only the closing `'` for re'...'/rx'...' can potentially be skipped over.
140
- switch delimiter {
141
- case . traditional , . experimental:
180
+ switch delimiter. kind {
181
+ case . forwardSlash , . experimental:
142
182
return
143
183
case . reSingleQuote, . rxSingleQuote:
144
184
break
@@ -272,16 +312,42 @@ fileprivate struct DelimiterLexer {
272
312
}
273
313
}
274
314
315
+ mutating func tryLexOpeningDelimiter( poundCount: Int ) -> Delimiter ? {
316
+ for kind in Delimiter . Kind. allCases {
317
+ // If the delimiter allows extended pound syntax, or there are no pounds,
318
+ // we just need to lex it.
319
+ let opening = kind. opening. utf8
320
+ if kind. allowsExtendedPoundSyntax || poundCount == 0 {
321
+ guard tryEat ( opening) else { continue }
322
+ return Delimiter ( kind, poundCount: poundCount)
323
+ }
324
+
325
+ // The delimiter doesn't allow extended pound syntax, so the pounds must be
326
+ // part of the delimiter.
327
+ guard
328
+ poundCount < opening. count,
329
+ opening. prefix ( poundCount)
330
+ . elementsEqual ( repeatElement ( ascii ( " # " ) , count: poundCount) ) ,
331
+ tryEat ( opening. dropFirst ( poundCount) )
332
+ else { continue }
333
+
334
+ return Delimiter ( kind, poundCount: 0 )
335
+ }
336
+ return nil
337
+ }
338
+
275
339
/*consuming*/ mutating func lex(
276
340
) throws -> ( contents: String , Delimiter , end: UnsafeRawPointer ) {
341
+ // We can consume any number of pound signs.
342
+ var poundCount = 0
343
+ while tryEat ( ascii: " # " ) {
344
+ poundCount += 1
345
+ }
277
346
278
347
// Try to lex the opening delimiter.
279
- guard let delimiter = Delimiter . allCases. first (
280
- where: { tryEat ( $0. opening. utf8) }
281
- ) else {
348
+ guard let delimiter = tryLexOpeningDelimiter ( poundCount: poundCount) else {
282
349
throw DelimiterLexError ( . unknownDelimiter, resumeAt: cursor. successor ( ) )
283
350
}
284
-
285
351
let contentsStart = cursor
286
352
while true {
287
353
// Check to see if we're at a character that looks like a delimiter, but
@@ -302,20 +368,34 @@ fileprivate struct DelimiterLexer {
302
368
/// Drop a set of regex delimiters from the input string, returning the contents
303
369
/// and the delimiter used. The input string must have valid delimiters.
304
370
func droppingRegexDelimiters( _ str: String ) -> ( String , Delimiter ) {
305
- func stripDelimiter( _ delim: Delimiter ) -> String ? {
371
+ func stripDelimiter( _ kind: Delimiter . Kind ) -> ( String , Delimiter ) ? {
372
+ var slice = str. utf8 [ ... ]
373
+
374
+ // Try strip any number of opening '#'s.
375
+ var poundCount = 0
376
+ if kind. allowsExtendedPoundSyntax {
377
+ poundCount = slice. prefix ( while: {
378
+ $0 == UInt8 ( ( " # " as UnicodeScalar ) . value)
379
+ } ) . count
380
+ slice = slice. dropFirst ( poundCount)
381
+ }
382
+
306
383
// The opening delimiter must match.
307
- guard var slice = str . utf8 . tryDropPrefix ( delim . opening. utf8)
384
+ guard var slice = slice . tryDropPrefix ( kind . opening. utf8)
308
385
else { return nil }
309
386
310
387
// The closing delimiter may optionally match, as it may not be present in
311
388
// invalid code.
389
+ let delim = Delimiter ( kind, poundCount: poundCount)
312
390
if let newSlice = slice. tryDropSuffix ( delim. closing. utf8) {
313
391
slice = newSlice
314
392
}
315
- return String ( slice)
393
+ let result = String ( decoding: slice, as: UTF8 . self)
394
+ precondition ( result. utf8. elementsEqual ( slice) )
395
+ return ( result, delim)
316
396
}
317
- for d in Delimiter . allCases {
318
- if let contents = stripDelimiter ( d ) {
397
+ for kind in Delimiter . Kind . allCases {
398
+ if let ( contents, d ) = stripDelimiter ( kind ) {
319
399
return ( contents, d)
320
400
}
321
401
}
0 commit comments