@@ -22,6 +22,28 @@ public func _debugLog(_ arg0: @autoclosure ()->Any, _ arg1: @autoclosure ()->Any
22
22
print ( arg0 ( ) , arg1 ( ) )
23
23
}
24
24
25
+ /// An index type for views onto random access collections whose elements are
26
+ /// effectively variable-width.
27
+ public protocol UnicodeIndexProtocol {
28
+ var codeUnitOffset : Int64 { get }
29
+ }
30
+
31
+ extension UnicodeIndexProtocol {
32
+ public static func == ( l: UnicodeIndexProtocol , r: UnicodeIndexProtocol ) -> Bool {
33
+ return l. codeUnitOffset == r. codeUnitOffset
34
+ }
35
+ public static func < ( l: UnicodeIndexProtocol , r: UnicodeIndexProtocol ) -> Bool {
36
+ return l. codeUnitOffset < r. codeUnitOffset
37
+ }
38
+
39
+ public static func == ( l: Self , r: Self ) -> Bool {
40
+ return l. codeUnitOffset == r. codeUnitOffset
41
+ }
42
+ public static func < ( l: Self , r: Self ) -> Bool {
43
+ return l. codeUnitOffset < r. codeUnitOffset
44
+ }
45
+ }
46
+
25
47
/// A collection of `CodeUnit`s to be interpreted by some `Encoding`
26
48
public struct UnicodeStorage <
27
49
CodeUnits : RandomAccessCollection ,
@@ -60,59 +82,62 @@ extension UnicodeStorage.EncodedScalars {
60
82
// and the next index. This would obviously be more complicated if
61
83
// the buffer contained more than a single scalar (and it probably
62
84
// should).
63
- public struct Index : Comparable {
64
- let base : CodeUnits . Index
85
+ public struct Index : UnicodeIndexProtocol , Comparable {
86
+ let offset : CodeUnits . IndexDistance
65
87
// FIXME: We might get a much better memory footprint if we used a
66
88
// UInt8 to store the distance between base and next, rather than
67
89
// storing next explicitly. CodeUnits will be random-access in
68
90
// practice.
69
- let next : CodeUnits . Index
91
+ let nextStride : UInt8
92
+
93
+ public var codeUnitOffset : Int64 { return numericCast ( offset) }
94
+
95
+ var nextOffset : CodeUnits . IndexDistance {
96
+ return offset + numericCast( nextStride)
97
+ }
98
+
70
99
// FIXME: there should be an invalid inhabitant we can use in
71
100
// EncodedScalar so as not to waste a separate bool here.
72
101
let scalar : Encoding . EncodedScalar ?
73
-
74
- public static func < ( lhs: Index , rhs: Index ) -> Bool {
75
- return lhs. base < rhs. base
76
- }
77
- public static func == ( lhs: Index , rhs: Index ) -> Bool {
78
- return lhs. base == rhs. base
79
- }
80
102
}
81
103
}
82
104
83
105
/// Collection Conformance
84
106
extension UnicodeStorage . EncodedScalars : BidirectionalCollection {
85
107
public var startIndex : Index {
86
108
if _slowPath ( codeUnits. isEmpty) { return endIndex }
87
- let s = codeUnits. startIndex
88
- return index ( after: Index ( base: s, next: s, scalar: nil ) )
109
+ return index ( after: Index ( offset: 0 , nextStride: 0 , scalar: nil ) )
89
110
}
90
111
91
112
public var endIndex : Index {
92
- let s = codeUnits. endIndex
93
- return Index ( base: s, next: s, scalar: nil )
113
+ return Index ( offset: codeUnits. count, nextStride: 0 , scalar: nil )
94
114
}
95
115
96
116
public subscript( i: Index ) -> Encoding . EncodedScalar {
97
117
if let r = i. scalar {
98
118
return r
99
119
}
100
120
return index ( after:
101
- Index ( base : i. base , next : i . base , scalar: nil ) ) . scalar!
121
+ Index ( offset : i. offset , nextStride : 0 , scalar: nil ) ) . scalar!
102
122
}
103
123
104
124
public func index( after i: Index ) -> Index {
105
- var remainder = codeUnits [ i. next..< codeUnits. endIndex]
125
+ let p = codeUnits. index ( atOffset: i. nextOffset)
126
+ var remainder = codeUnits [ p... ]
106
127
while true {
107
128
switch Encoding . parse1Forward ( remainder, knownCount: 0 ) {
108
129
case . valid( let scalar, let nextIndex) :
109
- return Index ( base: i. next, next: nextIndex, scalar: scalar)
130
+ return Index (
131
+ offset: i. nextOffset,
132
+ nextStride: numericCast ( remainder. offset ( of: nextIndex) ) ,
133
+ scalar: scalar)
110
134
case . error( let nextIndex) :
111
135
// FIXME: don't go through UnicodeScalar once this is in the stdlib
112
136
if let replacement = Encoding . encode (
113
137
UTF32 . EncodedScalar ( UnicodeScalar ( 0xFFFD ) !) ) {
114
138
return Index (
115
- base: i. next, next: nextIndex,
139
+ offset: i. nextOffset,
140
+ nextStride: numericCast ( remainder. offset ( of: nextIndex) ) ,
116
141
scalar: replacement)
117
142
}
118
143
remainder = remainder. dropFirst ( )
@@ -123,17 +148,23 @@ extension UnicodeStorage.EncodedScalars : BidirectionalCollection {
123
148
}
124
149
125
150
public func index( before i: Index ) -> Index {
126
- var remainder = codeUnits [ ..< i . base ]
151
+ var remainder = codeUnits [ ..< codeUnits . index ( atOffset : i . offset ) ]
127
152
while true {
128
153
switch Encoding . parse1Reverse ( remainder, knownCount: 0 ) {
129
154
case . valid( let scalar, let priorIndex) :
130
- return Index ( base: priorIndex, next: i. base, scalar: scalar)
155
+ let stride = remainder [ priorIndex... ] . count
156
+ return Index (
157
+ offset: i. offset - numericCast( stride) ,
158
+ nextStride: numericCast ( stride) ,
159
+ scalar: scalar)
131
160
case . error( let priorIndex) :
161
+ let stride = remainder [ priorIndex... ] . count
132
162
// FIXME: don't go through UnicodeScalar once this is in the stdlib
133
163
if let replacement = Encoding . encode (
134
164
UTF32 . EncodedScalar ( UnicodeScalar ( 0xFFFD ) !) ) {
135
165
return Index (
136
- base: priorIndex, next: i. base,
166
+ offset: i. offset - numericCast( stride) ,
167
+ nextStride: numericCast ( stride) ,
137
168
scalar: replacement)
138
169
}
139
170
remainder = remainder. dropLast ( )
@@ -149,12 +180,12 @@ extension UnicodeStorage {
149
180
/// `FromEncoding`, provides a collection of `ToEncoding.CodeUnit`s
150
181
/// representing the same text.
151
182
public struct TranscodedView < ToEncoding : UnicodeEncoding > : BidirectionalCollection {
152
- typealias FromEncoding = Encoding
183
+ public typealias FromEncoding = Encoding
153
184
154
185
// We could just be a generic typealias as this type, but it turns
155
186
// out to be impossible, or nearly so, to write the init() below.
156
187
// Instead, we wrap an instance of Base.
157
- typealias Base = FlattenBidirectionalCollection <
188
+ public typealias Base = FlattenBidirectionalCollection <
158
189
LazyMapBidirectionalCollection <
159
190
UnicodeStorage < CodeUnits , FromEncoding > . EncodedScalars ,
160
191
ToEncoding . EncodedScalar
@@ -171,8 +202,6 @@ extension UnicodeStorage {
171
202
} )
172
203
}
173
204
174
- // FIXME: this should go in the extension below but for <rdar://30320012>
175
- //typealias SubSequence = BidirectionalSlice<TranscodedView>
176
205
public var startIndex : Base . Index {
177
206
return base. startIndex
178
207
}
@@ -272,7 +301,7 @@ extension UnicodeStorage : _UTextable {
272
301
buffer [ u. chunkLength^] = unit
273
302
u. chunkLength += 1
274
303
}
275
- u. chunkNativeLimit = codeUnits . offset ( of : i . next ) ^
304
+ u. chunkNativeLimit = i . nextOffset ^
276
305
}
277
306
}
278
307
else {
@@ -291,7 +320,7 @@ extension UnicodeStorage : _UTextable {
291
320
buffer [ u. chunkLength^] = unit
292
321
u. chunkLength += 1
293
322
}
294
- u. chunkNativeStart = codeUnits . offset ( of : i . base ) ^
323
+ u. chunkNativeStart = i . codeUnitOffset
295
324
u. chunkOffset = u. chunkLength
296
325
}
297
326
var b = buffer // copy due to https://bugs.swift.org/browse/SR-3782
@@ -353,7 +382,7 @@ extension UnicodeStorage : _UTextable {
353
382
for i in chunkSource. indices {
354
383
chunkOffset += chunkSource [ i] . utf16. count
355
384
if chunkOffset == u [ 0 ] . chunkOffset^ {
356
- return codeUnits . offset ( of : i . next ) ^
385
+ return i . nextOffset ^
357
386
}
358
387
}
359
388
fatalError ( " supposed to be unreachable " )
@@ -454,10 +483,6 @@ extension UnicodeStorage {
454
483
defer { __swift_stdlib_ubrk_close ( bi) }
455
484
456
485
return storage. _withUText { u in
457
- let access = u [ 0 ] . pFuncs [ 0 ] . access ( u, storage. codeUnits. offset ( of: i) ^, 1 )
458
- // _debugLog("access result:", access)
459
- // _debugLog("ubrk_setUText(breakIterator: \(bi), u: \(u)")
460
- // _debugLog("u: \(u.pointee)")
461
486
__swift_stdlib_ubrk_setUText ( bi, u, & err)
462
487
_precondition ( err. isSuccess, " unexpected ubrk_setUText failure " )
463
488
return body ( bi)
0 commit comments