@@ -25,6 +25,15 @@ public func _debugLog(_ arg0: @autoclosure ()->Any, _ arg1: @autoclosure ()->Any
25
25
/// An index type for views onto random access collections whose elements are
26
26
/// effectively variable-width.
27
27
public protocol UnicodeIndexProtocol {
28
+
29
+ // FIXME: it's not clear that there is always enough information to construct
30
+ // these from just an offset without also using the Collection into which they
31
+ // are indexing (e.g. when an index caches information). If so, this
32
+ // requirement would need to be replaced by a requirement on the collection.
33
+ // In all such scenarios we've found *so far*, indices can have an empty cache
34
+ // that will be filled on demand without loss of efficiency.
35
+ init ( codeUnitOffset: Int64 )
36
+
28
37
var codeUnitOffset : Int64 { get }
29
38
}
30
39
@@ -90,6 +99,20 @@ extension UnicodeStorage.EncodedScalars {
90
99
// practice.
91
100
let nextStride : UInt8
92
101
102
+ public init ( codeUnitOffset: Int64 ) {
103
+ self . init ( offset: numericCast ( codeUnitOffset) )
104
+ }
105
+
106
+ internal init (
107
+ offset: CodeUnits . IndexDistance ,
108
+ nextStride: UInt8 = 0 ,
109
+ scalar: Encoding . EncodedScalar ? = nil
110
+ ) {
111
+ self . offset = offset
112
+ self . nextStride = nextStride
113
+ self . scalar = scalar
114
+ }
115
+
93
116
public var codeUnitOffset : Int64 { return numericCast ( offset) }
94
117
95
118
var nextOffset : CodeUnits . IndexDistance {
@@ -100,74 +123,86 @@ extension UnicodeStorage.EncodedScalars {
100
123
// EncodedScalar so as not to waste a separate bool here.
101
124
let scalar : Encoding . EncodedScalar ?
102
125
}
126
+
127
+ internal func _base( _ i: Index ) -> CodeUnits . Index {
128
+ return codeUnits. index ( atOffset: i. offset)
129
+ }
130
+
131
+ internal func _next( _ i: Index ) -> CodeUnits . Index {
132
+ return codeUnits. index ( atOffset: i. nextOffset)
133
+ }
134
+
135
+ internal func _index(
136
+ base: CodeUnits . Index , next: CodeUnits . Index , scalar: Encoding . EncodedScalar ?
137
+ ) -> Index {
138
+ return Index (
139
+ offset: codeUnits. offset ( of: base) ,
140
+ nextStride: numericCast ( codeUnits [ base..< next] . count) ,
141
+ scalar: scalar
142
+ )
143
+ }
103
144
}
104
145
105
146
/// Collection Conformance
106
147
extension UnicodeStorage . EncodedScalars : BidirectionalCollection {
107
148
public var startIndex : Index {
108
149
if _slowPath ( codeUnits. isEmpty) { return endIndex }
109
- return index ( after: Index ( offset: 0 , nextStride: 0 , scalar: nil ) )
150
+ let s = codeUnits. startIndex
151
+ return index ( after: _index ( base: s, next: s, scalar: nil ) )
110
152
}
111
153
112
154
public var endIndex : Index {
113
- return Index ( offset: codeUnits. count, nextStride: 0 , scalar: nil )
155
+ let s = codeUnits. endIndex
156
+ return _index ( base: s, next: s, scalar: nil )
114
157
}
115
158
116
159
public subscript( i: Index ) -> Encoding . EncodedScalar {
117
160
if let r = i. scalar {
118
161
return r
119
162
}
120
163
return index ( after:
121
- Index ( offset: i. offset, nextStride: 0 , scalar: nil ) ) . scalar!
164
+ _index ( base: _base ( i) , next: _next ( i) , scalar: nil )
165
+ ) . scalar!
122
166
}
123
167
124
168
public func index( after i: Index ) -> Index {
125
- let p = codeUnits. index ( atOffset: i. nextOffset)
126
- var remainder = codeUnits [ p... ]
169
+ var remainder = codeUnits [ _next ( i) ..< codeUnits. endIndex]
127
170
while true {
128
171
switch Encoding . parse1Forward ( remainder, knownCount: 0 ) {
129
172
case . valid( let scalar, let nextIndex) :
130
- return Index (
131
- offset: i. nextOffset,
132
- nextStride: numericCast ( remainder. offset ( of: nextIndex) ) ,
133
- scalar: scalar)
173
+ return _index ( base: _next ( i) , next: nextIndex, scalar: scalar)
134
174
case . error( let nextIndex) :
135
175
// FIXME: don't go through UnicodeScalar once this is in the stdlib
136
176
if let replacement = Encoding . encode (
137
177
UTF32 . EncodedScalar ( UnicodeScalar ( 0xFFFD ) !) ) {
138
- return Index (
139
- offset: i. nextOffset,
140
- nextStride: numericCast ( remainder. offset ( of: nextIndex) ) ,
141
- scalar: replacement)
178
+ return _index ( base: _next ( i) , next: nextIndex, scalar: replacement)
142
179
}
143
- remainder = remainder. dropFirst ( )
180
+ // If we get here, the encoding couldn't represent a replacement
181
+ // character, so the best we can do is to drop that scalar on the floor
182
+ // and keep going.
183
+ remainder = codeUnits [ nextIndex... ]
144
184
case . emptyInput:
145
185
return endIndex
146
186
}
147
187
}
148
188
}
149
189
150
190
public func index( before i: Index ) -> Index {
151
- var remainder = codeUnits [ ..< codeUnits . index ( atOffset : i . offset ) ]
191
+ var remainder = codeUnits [ ..< _base ( i ) ]
152
192
while true {
153
193
switch Encoding . parse1Reverse ( remainder, knownCount: 0 ) {
154
194
case . valid( let scalar, let priorIndex) :
155
- let stride = remainder [ priorIndex... ] . count
156
- return Index (
157
- offset: i. offset - numericCast( stride) ,
158
- nextStride: numericCast ( stride) ,
159
- scalar: scalar)
195
+ return _index ( base: priorIndex, next: _base ( i) , scalar: scalar)
160
196
case . error( let priorIndex) :
161
- let stride = remainder [ priorIndex... ] . count
162
197
// FIXME: don't go through UnicodeScalar once this is in the stdlib
163
198
if let replacement = Encoding . encode (
164
199
UTF32 . EncodedScalar ( UnicodeScalar ( 0xFFFD ) !) ) {
165
- return Index (
166
- offset: i. offset - numericCast( stride) ,
167
- nextStride: numericCast ( stride) ,
168
- scalar: replacement)
200
+ return _index ( base: priorIndex, next: _base ( i) , scalar: replacement)
169
201
}
170
- remainder = remainder. dropLast ( )
202
+ // If we get here, the encoding couldn't represent a replacement
203
+ // character, so the best we can do is to drop that scalar on the floor
204
+ // and keep going.
205
+ remainder = codeUnits [ ..< priorIndex]
171
206
case . emptyInput:
172
207
fatalError ( " Indexing past start of code units " )
173
208
}
@@ -247,6 +282,7 @@ extension UnicodeStorage : _UTextable {
247
282
_ deep: Bool , _ status: UnsafeMutablePointer < _UErrorCode > ?
248
283
) -> UnsafeMutablePointer < _UText > {
249
284
UnsafeMutablePointer ( mutating: src) [ 0 ] . validate ( )
285
+ _sanityCheck ( !deep, " deep cloning not supported " )
250
286
// _debugLog("_clone with dst = \(String(describing: dst))")
251
287
// _debugLog("src: \(src[0])")
252
288
let r = dst
@@ -258,6 +294,14 @@ extension UnicodeStorage : _UTextable {
258
294
return r
259
295
}
260
296
297
+ // A helper for translating indices out of the result of _parsedSuffix
298
+ internal var _indexBase
299
+ : UnicodeStorage < CodeUnits . SubSequence , Encoding > . EncodedScalars {
300
+ return UnicodeStorage <
301
+ CodeUnits . SubSequence , Encoding
302
+ > ( codeUnits [ ... ] ) . scalars
303
+ }
304
+
261
305
internal func _access(
262
306
_ u: inout _UText , _ nativeTargetIndex: Int64 , _ forward: Bool
263
307
) -> Bool {
@@ -280,6 +324,8 @@ extension UnicodeStorage : _UTextable {
280
324
return true
281
325
}
282
326
// _debugLog("_access: filling buffer")
327
+
328
+ // FIXME: should we use parseForward/parseReverse on some slice?
283
329
284
330
guard ( 0 ... codeUnits. count^) . contains ( nativeTargetIndex)
285
331
else { return false }
@@ -301,7 +347,7 @@ extension UnicodeStorage : _UTextable {
301
347
buffer [ u. chunkLength^] = unit
302
348
u. chunkLength += 1
303
349
}
304
- u. chunkNativeLimit = i . nextOffset ^
350
+ u. chunkNativeLimit = codeUnits . offset ( of : _indexBase . _next ( i ) ) ^
305
351
}
306
352
}
307
353
else {
@@ -320,7 +366,7 @@ extension UnicodeStorage : _UTextable {
320
366
buffer [ u. chunkLength^] = unit
321
367
u. chunkLength += 1
322
368
}
323
- u. chunkNativeStart = i . codeUnitOffset
369
+ u. chunkNativeStart = codeUnits . offset ( of : _indexBase . _base ( i ) ) ^
324
370
u. chunkOffset = u. chunkLength
325
371
}
326
372
var b = buffer // copy due to https://bugs.swift.org/browse/SR-3782
@@ -382,7 +428,7 @@ extension UnicodeStorage : _UTextable {
382
428
for i in chunkSource. indices {
383
429
chunkOffset += chunkSource [ i] . utf16. count
384
430
if chunkOffset == u [ 0 ] . chunkOffset^ {
385
- return i . nextOffset ^
431
+ return codeUnits . offset ( of : _indexBase . _next ( i ) ) ^
386
432
}
387
433
}
388
434
fatalError ( " supposed to be unreachable " )
0 commit comments