Skip to content

Commit 6141fa7

Browse files
author
Dave Abrahams
committed
WIP on providing type-erased default implementations of views
1 parent 25a687e commit 6141fa7

File tree

2 files changed

+139
-36
lines changed

2 files changed

+139
-36
lines changed

stdlib/public/core/UnicodeStorage.swift

Lines changed: 57 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,28 @@ public func _debugLog(_ arg0: @autoclosure ()->Any, _ arg1: @autoclosure ()->Any
2222
print(arg0(), arg1())
2323
}
2424

25+
/// An index type for views onto random access collections whose elements are
26+
/// effectively variable-width.
27+
public protocol UnicodeIndexProtocol {
28+
var codeUnitOffset: Int64 { get }
29+
}
30+
31+
extension UnicodeIndexProtocol {
32+
public static func == (l: UnicodeIndexProtocol, r: UnicodeIndexProtocol) -> Bool {
33+
return l.codeUnitOffset == r.codeUnitOffset
34+
}
35+
public static func < (l: UnicodeIndexProtocol, r: UnicodeIndexProtocol) -> Bool {
36+
return l.codeUnitOffset < r.codeUnitOffset
37+
}
38+
39+
public static func == (l: Self, r: Self) -> Bool {
40+
return l.codeUnitOffset == r.codeUnitOffset
41+
}
42+
public static func < (l: Self, r: Self) -> Bool {
43+
return l.codeUnitOffset < r.codeUnitOffset
44+
}
45+
}
46+
2547
/// A collection of `CodeUnit`s to be interpreted by some `Encoding`
2648
public struct UnicodeStorage<
2749
CodeUnits : RandomAccessCollection,
@@ -60,59 +82,62 @@ extension UnicodeStorage.EncodedScalars {
6082
// and the next index. This would obviously be more complicated if
6183
// the buffer contained more than a single scalar (and it probably
6284
// should).
63-
public struct Index : Comparable {
64-
let base: CodeUnits.Index
85+
public struct Index : UnicodeIndexProtocol, Comparable {
86+
let offset: CodeUnits.IndexDistance
6587
// FIXME: We might get a much better memory footprint if we used a
6688
// UInt8 to store the distance between base and next, rather than
6789
// storing next explicitly. CodeUnits will be random-access in
6890
// practice.
69-
let next: CodeUnits.Index
91+
let nextStride: UInt8
92+
93+
public var codeUnitOffset: Int64 { return numericCast(offset) }
94+
95+
var nextOffset: CodeUnits.IndexDistance {
96+
return offset + numericCast(nextStride)
97+
}
98+
7099
// FIXME: there should be an invalid inhabitant we can use in
71100
// EncodedScalar so as not to waste a separate bool here.
72101
let scalar: Encoding.EncodedScalar?
73-
74-
public static func < (lhs: Index, rhs: Index) -> Bool {
75-
return lhs.base < rhs.base
76-
}
77-
public static func == (lhs: Index, rhs: Index) -> Bool {
78-
return lhs.base == rhs.base
79-
}
80102
}
81103
}
82104

83105
/// Collection Conformance
84106
extension UnicodeStorage.EncodedScalars : BidirectionalCollection {
85107
public var startIndex: Index {
86108
if _slowPath(codeUnits.isEmpty) { return endIndex }
87-
let s = codeUnits.startIndex
88-
return index(after: Index(base: s, next: s, scalar: nil))
109+
return index(after: Index(offset: 0, nextStride: 0, scalar: nil))
89110
}
90111

91112
public var endIndex: Index {
92-
let s = codeUnits.endIndex
93-
return Index(base: s, next: s, scalar: nil)
113+
return Index(offset: codeUnits.count, nextStride: 0, scalar: nil)
94114
}
95115

96116
public subscript(i: Index) -> Encoding.EncodedScalar {
97117
if let r = i.scalar {
98118
return r
99119
}
100120
return index(after:
101-
Index(base: i.base, next: i.base, scalar: nil)).scalar!
121+
Index(offset: i.offset, nextStride: 0, scalar: nil)).scalar!
102122
}
103123

104124
public func index(after i: Index) -> Index {
105-
var remainder = codeUnits[i.next..<codeUnits.endIndex]
125+
let p = codeUnits.index(atOffset: i.nextOffset)
126+
var remainder = codeUnits[p...]
106127
while true {
107128
switch Encoding.parse1Forward(remainder, knownCount: 0) {
108129
case .valid(let scalar, let nextIndex):
109-
return Index(base:i.next, next: nextIndex, scalar: scalar)
130+
return Index(
131+
offset: i.nextOffset,
132+
nextStride: numericCast(remainder.offset(of: nextIndex)),
133+
scalar: scalar)
110134
case .error(let nextIndex):
111135
// FIXME: don't go through UnicodeScalar once this is in the stdlib
112136
if let replacement = Encoding.encode(
113137
UTF32.EncodedScalar(UnicodeScalar(0xFFFD)!)) {
114138
return Index(
115-
base:i.next, next: nextIndex,
139+
offset: i.nextOffset,
140+
nextStride: numericCast(remainder.offset(of: nextIndex)),
116141
scalar: replacement)
117142
}
118143
remainder = remainder.dropFirst()
@@ -123,17 +148,23 @@ extension UnicodeStorage.EncodedScalars : BidirectionalCollection {
123148
}
124149

125150
public func index(before i: Index) -> Index {
126-
var remainder = codeUnits[..<i.base]
151+
var remainder = codeUnits[..<codeUnits.index(atOffset: i.offset)]
127152
while true {
128153
switch Encoding.parse1Reverse(remainder, knownCount: 0) {
129154
case .valid(let scalar, let priorIndex):
130-
return Index(base: priorIndex, next: i.base, scalar: scalar)
155+
let stride = remainder[priorIndex...].count
156+
return Index(
157+
offset: i.offset - numericCast(stride),
158+
nextStride: numericCast(stride),
159+
scalar: scalar)
131160
case .error(let priorIndex):
161+
let stride = remainder[priorIndex...].count
132162
// FIXME: don't go through UnicodeScalar once this is in the stdlib
133163
if let replacement = Encoding.encode(
134164
UTF32.EncodedScalar(UnicodeScalar(0xFFFD)!)) {
135165
return Index(
136-
base: priorIndex, next: i.base,
166+
offset: i.offset - numericCast(stride),
167+
nextStride: numericCast(stride),
137168
scalar: replacement)
138169
}
139170
remainder = remainder.dropLast()
@@ -149,12 +180,12 @@ extension UnicodeStorage {
149180
/// `FromEncoding`, provides a collection of `ToEncoding.CodeUnit`s
150181
/// representing the same text.
151182
public struct TranscodedView<ToEncoding : UnicodeEncoding> : BidirectionalCollection {
152-
typealias FromEncoding = Encoding
183+
public typealias FromEncoding = Encoding
153184

154185
// We could just be a generic typealias as this type, but it turns
155186
// out to be impossible, or nearly so, to write the init() below.
156187
// Instead, we wrap an instance of Base.
157-
typealias Base = FlattenBidirectionalCollection<
188+
public typealias Base = FlattenBidirectionalCollection<
158189
LazyMapBidirectionalCollection<
159190
UnicodeStorage<CodeUnits, FromEncoding>.EncodedScalars,
160191
ToEncoding.EncodedScalar
@@ -171,8 +202,6 @@ extension UnicodeStorage {
171202
})
172203
}
173204

174-
// FIXME: this should go in the extension below but for <rdar://30320012>
175-
//typealias SubSequence = BidirectionalSlice<TranscodedView>
176205
public var startIndex : Base.Index {
177206
return base.startIndex
178207
}
@@ -272,7 +301,7 @@ extension UnicodeStorage : _UTextable {
272301
buffer[u.chunkLength^] = unit
273302
u.chunkLength += 1
274303
}
275-
u.chunkNativeLimit = codeUnits.offset(of: i.next)^
304+
u.chunkNativeLimit = i.nextOffset^
276305
}
277306
}
278307
else {
@@ -291,7 +320,7 @@ extension UnicodeStorage : _UTextable {
291320
buffer[u.chunkLength^] = unit
292321
u.chunkLength += 1
293322
}
294-
u.chunkNativeStart = codeUnits.offset(of: i.base)^
323+
u.chunkNativeStart = i.codeUnitOffset
295324
u.chunkOffset = u.chunkLength
296325
}
297326
var b = buffer // copy due to https://bugs.swift.org/browse/SR-3782
@@ -353,7 +382,7 @@ extension UnicodeStorage : _UTextable {
353382
for i in chunkSource.indices {
354383
chunkOffset += chunkSource[i].utf16.count
355384
if chunkOffset == u[0].chunkOffset^ {
356-
return codeUnits.offset(of: i.next)^
385+
return i.nextOffset^
357386
}
358387
}
359388
fatalError("supposed to be unreachable")
@@ -454,10 +483,6 @@ extension UnicodeStorage {
454483
defer { __swift_stdlib_ubrk_close(bi) }
455484

456485
return storage._withUText { u in
457-
let access = u[0].pFuncs[0].access(u, storage.codeUnits.offset(of: i)^, 1)
458-
// _debugLog("access result:", access)
459-
// _debugLog("ubrk_setUText(breakIterator: \(bi), u: \(u)")
460-
// _debugLog("u: \(u.pointee)")
461486
__swift_stdlib_ubrk_setUText(bi, u, &err)
462487
_precondition(err.isSuccess, "unexpected ubrk_setUText failure")
463488
return body(bi)

test/Prototypes/AnyUnicode.swift

Lines changed: 82 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -366,12 +366,90 @@ protocol AnyUnicode {
366366
func isInFastCOrDForm(scan: Bool/* = true*/) -> Bool
367367
}
368368

369+
struct AnyUIntCollection<
370+
Base: RandomAccessCollection, Element_ : UnsignedInteger
371+
> : RandomAccessCollection
372+
where Base.Iterator.Element : UnsignedInteger {
373+
typealias IndexDistance = Int64
374+
typealias Index = Int64
375+
typealias Element = Element_
376+
// FIXME: associated type deduction seems to need a hint here.
377+
typealias Indices = DefaultRandomAccessIndices<AnyUIntCollection>
378+
379+
let base: Base
380+
381+
var startIndex: Index { return 0 }
382+
var endIndex: Index { return numericCast(base.count) }
383+
384+
func index(after i: Index) -> Index {
385+
return numericCast(
386+
base.offset(of: base.index(after: base.index(atOffset: i))))
387+
}
388+
389+
func index(before i: Index) -> Index {
390+
return numericCast(
391+
base.offset(of: base.index(before: base.index(atOffset: i))))
392+
}
393+
394+
func index(_ i: Index, offsetBy n: Int64) -> Index {
395+
return numericCast(
396+
base.offset(
397+
of: base.index(base.index(atOffset: i),
398+
offsetBy: numericCast(n))))
399+
}
400+
401+
subscript(i: Index) -> Element {
402+
return numericCast(base[base.index(atOffset: i)])
403+
}
404+
405+
public func withUnsafeElementStorage<R>(
406+
_ body: (UnsafeBufferPointer<Element>?) throws -> R
407+
) rethrows -> R {
408+
return try base.withUnsafeElementStorage { b in
409+
if let b1 = b {
410+
if let b2 = b1 as Any as? UnsafeBufferPointer<Element> {
411+
return try body(b2)
412+
}
413+
}
414+
return try body(nil)
415+
}
416+
}
417+
}
418+
419+
protocol AnyUnicodeEncoding : Swift.AnyUnicodeEncoding {
420+
static func utf16View<CodeUnits: RandomAccessCollection>(_: CodeUnits) -> AnyUTF16
421+
where CodeUnits.Iterator.Element : UnsignedInteger
422+
}
423+
424+
extension AnyUnicodeEncoding
425+
where Self : UnicodeEncoding,
426+
Self.EncodedScalar.Iterator.Element : UnsignedInteger {
427+
428+
static func utf16View<CodeUnits: RandomAccessCollection>(codeUnits: CodeUnits) -> AnyUTF16
429+
where CodeUnits.Iterator.Element : UnsignedInteger {
430+
typealias WidthAdjusted = AnyUIntCollection<CodeUnits, CodeUnit>
431+
typealias Storage = UnicodeStorage<WidthAdjusted, Self>
432+
433+
let r = Storage.TranscodedView(
434+
WidthAdjusted(base: codeUnits),
435+
from: self,
436+
to: UTF16.self
437+
)
438+
fatalError()
439+
}
440+
}
441+
/*
442+
extension AnyUnicode {
443+
var utf16: AnyUTF16 {
444+
return
445+
}
446+
}
447+
*/
448+
369449
var suite = TestSuite("AnyUnicode")
370450
suite.test("basics") {
371-
let g : [UInt16] = Array(3...7)
372-
let h : [UInt8] = Array(3...7)
373-
var x = AnyUTF16ZeroExtender(base: g)
374-
var y = AnyUTF16ZeroExtender(base: h)
451+
let x = AnyUTF16ZeroExtender(base: Array(3...7) as [UInt16])
452+
let y = AnyUTF16ZeroExtender(base: Array(3...7) as [UInt8])
375453
expectTrue(x.elementsEqual(y))
376454
}
377455

0 commit comments

Comments
 (0)