Skip to content

Commit dc23e91

Browse files
committed
WIP: simple forwards exhaustive test working
1 parent 8d0a99b commit dc23e91

File tree

3 files changed

+154
-83
lines changed

3 files changed

+154
-83
lines changed

stdlib/public/core/StringCharacterView.swift

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -189,15 +189,15 @@ extension String: BidirectionalCollection {
189189
public subscript(i: Index) -> Character {
190190
_boundsCheck(i)
191191

192-
print(Array(self.utf16).map { String($0, radix: 16, uppercase: true) })
193-
print(String(i._rawBits, radix: 16, uppercase: true))
194-
print(String(i._encodedOffset))
192+
// print(Array(self.utf16).map { String($0, radix: 16, uppercase: true) })
193+
// print(String(i._rawBits, radix: 16, uppercase: true))
194+
// print(String(i._encodedOffset))
195195

196196
let i = _guts.scalarAlign(i)
197-
print(String(i._rawBits, radix: 16, uppercase: true))
198-
print(String(i._encodedOffset))
197+
// print(String(i._rawBits, radix: 16, uppercase: true))
198+
// print(String(i._encodedOffset))
199199
let distance = _characterStride(startingAt: i)
200-
print(distance)
200+
// print(distance)
201201

202202
return _guts.errorCorrectedCharacter(
203203
startingAt: i._encodedOffset, endingAt: i._encodedOffset &+ distance)
@@ -207,7 +207,7 @@ extension String: BidirectionalCollection {
207207
internal func _characterStride(startingAt i: Index) -> Int {
208208
// Fast check if it's already been measured, otherwise check resiliently
209209
if let d = i.characterStride {
210-
print("cached stride: \(d)")
210+
// print("cached stride: \(d)")
211211
return d
212212
}
213213

stdlib/public/core/StringGraphemeBreaking.swift

Lines changed: 10 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -86,19 +86,18 @@ private func _hasGraphemeBreakBetween(
8686
private func _measureCharacterStrideICU(
8787
of utf8: UnsafeBufferPointer<UInt8>, startingAt i: Int
8888
) -> Int {
89-
// FIXME: need slice
89+
// ICU will gives us a different result if we feed in the whole buffer, so
90+
// slice it appropriately.
91+
let utf8Slice = UnsafeBufferPointer(rebasing: utf8[i...])
92+
let iterator = _ThreadLocalStorage.getUBreakIterator(utf8Slice)
93+
let offset = __swift_stdlib_ubrk_following(iterator, 0)
9094

91-
let iterator = _ThreadLocalStorage.getUBreakIterator(utf8)
92-
let offset = __swift_stdlib_ubrk_following(
93-
iterator, Int32(truncatingIfNeeded: i))
9495
// ubrk_following returns -1 (UBRK_DONE) when it hits the end of the buffer.
95-
if _fastPath(offset != -1) {
96-
// The offset into our buffer is the distance.
97-
_internalInvariant(offset > i, "zero-sized grapheme?")
98-
return Int(truncatingIfNeeded: offset) &- i
99-
}
100-
_internalInvariant(utf8.count > i)
101-
return utf8.count &- i
96+
guard _fastPath(offset != -1) else { return utf8Slice.count }
97+
98+
// The offset into our buffer is the distance.
99+
_internalInvariant(offset > 0, "zero-sized grapheme?")
100+
return Int(truncatingIfNeeded: offset)
102101
}
103102

104103
@inline(never) // slow-path

test/stdlib/StringIndex.swift

Lines changed: 137 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -5,22 +5,22 @@ import StdlibUnittest
55

66
var StringIndexTests = TestSuite("StringIndexTests")
77

8-
enum SimpleString: String {
9-
case smallASCII = "abcdefg"
10-
case smallUnicode = "abéÏ𓀀"
11-
case largeASCII = "012345678901234567890"
12-
case largeUnicode = "abéÏ012345678901234567890𓀀"
13-
case emoji = "😀😃🤢🤮👩🏿‍🎤🧛🏻‍♂️🧛🏻‍♂️👩‍👩‍👦‍👦"
14-
}
8+
// enum SimpleString: String {
9+
// case smallASCII = "abcdefg"
10+
// case smallUnicode = "abéÏ𓀀"
11+
// case largeASCII = "012345678901234567890"
12+
// case largeUnicode = "abéÏ012345678901234567890𓀀"
13+
// case emoji = "😀😃🤢🤮👩🏿‍🎤🧛🏻‍♂️🧛🏻‍♂️👩‍👩‍👦‍👦"
14+
// }
1515

16-
let simpleStrings: [String] = [
17-
SimpleString.smallASCII.rawValue,
18-
SimpleString.smallUnicode.rawValue,
19-
SimpleString.largeASCII.rawValue,
20-
SimpleString.largeUnicode.rawValue,
21-
SimpleString.emoji.rawValue,
22-
"",
23-
]
16+
// let simpleStrings: [String] = [
17+
// SimpleString.smallASCII.rawValue,
18+
// SimpleString.smallUnicode.rawValue,
19+
// SimpleString.largeASCII.rawValue,
20+
// SimpleString.largeUnicode.rawValue,
21+
// SimpleString.emoji.rawValue,
22+
// "",
23+
// ]
2424

2525
// StringIndexTests.test("basic sanity checks") {
2626
// for s in simpleStrings {
@@ -248,65 +248,137 @@ import Foundation
248248
// }
249249
// }
250250

251-
StringIndexTests.test("Misaligned") {
252-
func doIt(_ str: String) {
253-
let characterIndices = Array(str.indices)
254-
let scalarIndices = Array(str.unicodeScalars.indices) + [str.endIndex]
255-
let utf8Indices = Array(str.utf8.indices)
256-
let utf16Indices = Array(str.utf16.indices)
257-
258-
var lastScalarI = 0
259-
for i in 1..<utf8Indices.count {
260-
let idx = utf8Indices[i]
261-
262-
// Skip aligned indices
263-
guard idx < scalarIndices[lastScalarI + 1] else {
264-
assert(idx == scalarIndices[lastScalarI + 1])
265-
lastScalarI += 1
266-
continue
267-
}
268-
expectTrue(UTF8.isContinuation(str.utf8[idx]))
251+
// StringIndexTests.test("Misaligned") {
252+
// func doIt(_ str: String) {
253+
// let characterIndices = Array(str.indices)
254+
// let scalarIndices = Array(str.unicodeScalars.indices) + [str.endIndex]
255+
// let utf8Indices = Array(str.utf8.indices)
256+
// let utf16Indices = Array(str.utf16.indices)
257+
258+
// var lastScalarI = 0
259+
// for i in 1..<utf8Indices.count {
260+
// let idx = utf8Indices[i]
261+
262+
// // Skip aligned indices
263+
// guard idx < scalarIndices[lastScalarI + 1] else {
264+
// assert(idx == scalarIndices[lastScalarI + 1])
265+
// lastScalarI += 1
266+
// continue
267+
// }
268+
// expectTrue(UTF8.isContinuation(str.utf8[idx]))
269269

270-
let lastScalarIdx = scalarIndices[lastScalarI]
270+
// let lastScalarIdx = scalarIndices[lastScalarI]
271271

272-
dump(idx)
273-
dump(lastScalarIdx)
274-
dump(scalarIndices[lastScalarI + 1])
272+
// dump(idx)
273+
// dump(lastScalarIdx)
274+
// dump(scalarIndices[lastScalarI + 1])
275275

276-
// Check aligning-down
277-
expectEqual(str[lastScalarIdx], str[idx])
278-
expectEqual(str.utf16[lastScalarIdx], str.utf16[idx])
279-
expectEqual(str.unicodeScalars[lastScalarIdx], str.unicodeScalars[idx])
276+
// // Check aligning-down
277+
// expectEqual(str[lastScalarIdx], str[idx])
278+
// expectEqual(str.utf16[lastScalarIdx], str.utf16[idx])
279+
// expectEqual(str.unicodeScalars[lastScalarIdx], str.unicodeScalars[idx])
280+
281+
// // Check distance
282+
// let (start, end) = (str.startIndex, str.endIndex)
283+
// if characterIndices.contains(lastScalarIdx) {
284+
// expectEqual(0, str.distance(from: lastScalarIdx, to: idx))
285+
// expectEqual(str[..<idx].count, str.distance(from: start, to: idx))
286+
// expectEqual(str[idx...].count, str.distance(from: idx, to: end))
287+
// }
288+
// expectEqual(
289+
// 0, str.unicodeScalars.distance(from: lastScalarIdx, to: idx))
290+
// expectEqual(
291+
// str.unicodeScalars[..<idx].count,
292+
// str.unicodeScalars.distance(from: start, to: idx))
293+
// expectEqual(
294+
// str.unicodeScalars[idx...].count,
295+
// str.unicodeScalars.distance(from: idx, to: end))
296+
297+
// expectEqual(0, str.utf16.distance(from: lastScalarIdx, to: idx))
298+
// expectEqual(
299+
// str.utf16[..<idx].count, str.utf16.distance(from: start, to: idx))
300+
// expectEqual(
301+
// str.utf16[idx...].count, str.utf16.distance(from: idx, to: end))
302+
// }
303+
// }
304+
305+
// let nsstring: NSString = "aодиde\u{301}日🧟‍♀️"
306+
// doIt(nsstring as String)
307+
308+
// let string = "aодиde\u{301}日🧟‍♀️"
309+
// doIt(string)
310+
// }
311+
312+
StringIndexTests.test("Index interchange") {
313+
// Exhaustively test aspects of string index interchange
314+
func testInterchange(
315+
_ str: String,
316+
stackTrace: SourceLocStack = SourceLocStack(),
317+
showFrame: Bool = true,
318+
file: String = #file,
319+
line: UInt = #line
320+
) {
321+
let stackTrace = stackTrace.pushIf(showFrame, file: file, line: line)
322+
func expect(
323+
_ condition: @autoclosure () -> Bool,
324+
_ message: String = "",
325+
file: String = #file,
326+
line: UInt = #line
327+
) {
328+
expectTrue(condition(), message,
329+
stackTrace: stackTrace, showFrame: showFrame,
330+
file: file, line: line)
331+
}
280332

281-
// Check distance
282-
let (start, end) = (str.startIndex, str.endIndex)
283-
if characterIndices.contains(lastScalarIdx) {
284-
expectEqual(0, str.distance(from: lastScalarIdx, to: idx))
285-
expectEqual(str[..<idx].count, str.distance(from: start, to: idx))
286-
expectEqual(str[idx...].count, str.distance(from: idx, to: end))
333+
var curCharIdx = str.startIndex
334+
var curScalarIdx = str.startIndex
335+
var curUTF8Idx = str.startIndex
336+
var curUTF16Idx = str.startIndex
337+
338+
while curCharIdx < str.endIndex {
339+
let curChar = str[curCharIdx]
340+
expect(curChar == str[curScalarIdx])
341+
expect(curChar == str[curUTF8Idx])
342+
expect(curChar == str[curUTF16Idx])
343+
344+
// Advance the character index once and have the scalar index catch up
345+
str.formIndex(after: &curCharIdx)
346+
347+
while curScalarIdx < curCharIdx {
348+
let curScalar = str.unicodeScalars[curScalarIdx]
349+
350+
print(curScalar)
351+
print(curScalarIdx.encodedOffset)
352+
353+
let curSubChar = str[curScalarIdx]
354+
355+
// Advance the scalar index once and have the code unit indices catch up
356+
str.unicodeScalars.formIndex(after: &curScalarIdx)
357+
358+
while curUTF8Idx < curScalarIdx {
359+
expect(curScalar == str.unicodeScalars[curUTF8Idx])
360+
// expect(curSubChar == str[curUTF8Idx])
361+
expect(!UTF16.isTrailSurrogate(str.utf16[curUTF8Idx]))
362+
str.utf8.formIndex(after: &curUTF8Idx)
363+
}
364+
expect(curUTF8Idx == curScalarIdx)
365+
366+
while curUTF16Idx < curScalarIdx {
367+
expect(curScalar == str.unicodeScalars[curUTF16Idx])
368+
// expect(curSubChar == str[curUTF16Idx])
369+
expect(!UTF8.isContinuation(str.utf8[curUTF16Idx]))
370+
str.utf16.formIndex(after: &curUTF16Idx)
371+
}
372+
expect(curUTF16Idx == curScalarIdx)
287373
}
288-
expectEqual(
289-
0, str.unicodeScalars.distance(from: lastScalarIdx, to: idx))
290-
expectEqual(
291-
str.unicodeScalars[..<idx].count,
292-
str.unicodeScalars.distance(from: start, to: idx))
293-
expectEqual(
294-
str.unicodeScalars[idx...].count,
295-
str.unicodeScalars.distance(from: idx, to: end))
296-
297-
expectEqual(0, str.utf16.distance(from: lastScalarIdx, to: idx))
298-
expectEqual(
299-
str.utf16[..<idx].count, str.utf16.distance(from: start, to: idx))
300-
expectEqual(
301-
str.utf16[idx...].count, str.utf16.distance(from: idx, to: end))
302374
}
303375
}
304376

305-
let nsstring: NSString = "aодиde\u{301}日🧟‍♀️"
306-
doIt(nsstring as String)
377+
testInterchange("abc\r\ndefg")
378+
testInterchange(("abc\r\ndefg" as NSString) as String)
307379

308-
let string = "aодиde\u{301}日🧟‍♀️"
309-
doIt(string)
380+
testInterchange("ab\r\ncдиde\u{301}日🧟‍♀️")
381+
testInterchange(("ab\r\ncдиde\u{301}日🧟‍♀️" as NSString) as String)
310382
}
311383

312384
#endif // _runtime(_ObjC)

0 commit comments

Comments
 (0)