Skip to content

[stdlib] properly promote stack buffer to heap buffer when necessary #20585

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Nov 17, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
155 changes: 77 additions & 78 deletions stdlib/public/core/NormalizedCodeUnitIterator.swift
Original file line number Diff line number Diff line change
Expand Up @@ -227,14 +227,13 @@ internal
struct _NormalizedUTF16CodeUnitIterator: IteratorProtocol {
internal typealias CodeUnit = UInt16
var segmentBuffer = _FixedArray16<CodeUnit>(allZeros:())
var overflowBuffer: [CodeUnit]? = nil
var normalizationBuffer: [CodeUnit]? = nil
var normalizationBuffer = _FixedArray16<CodeUnit>(allZeros:())
var segmentHeapBuffer: [CodeUnit]? = nil
var normalizationHeapBuffer: [CodeUnit]? = nil
var source: _SegmentSource

var segmentBufferIndex = 0
var segmentBufferCount = 0
var overflowBufferIndex = 0
var overflowBufferCount = 0

init(_ guts: _StringGuts, _ range: Range<String.Index>) {
source = _ForeignStringGutsSource(guts, range)
Expand Down Expand Up @@ -350,83 +349,86 @@ struct _NormalizedUTF16CodeUnitIterator: IteratorProtocol {

mutating func next() -> UInt16? {
if segmentBufferCount == segmentBufferIndex {
if source.isEmpty {
return nil
}
segmentBuffer = _FixedArray16<CodeUnit>(allZeros:())
segmentBufferCount = 0
segmentBufferIndex = 0
}

if overflowBufferCount == overflowBufferIndex {
overflowBufferCount = 0
overflowBufferIndex = 0

if segmentBufferCount == 0 {
segmentBufferCount = normalizeFromSource()
}

if source.isEmpty
&& segmentBufferCount == 0
&& overflowBufferCount == 0 {
// Our source of code units to normalize is empty and our buffers from
// previous normalizations are also empty.
return nil
guard segmentBufferIndex < segmentBufferCount else { return nil }

defer { segmentBufferIndex += 1 }
if _slowPath(segmentHeapBuffer != nil) {
return segmentHeapBuffer![segmentBufferIndex]
}
if segmentBufferCount == 0 && overflowBufferCount == 0 {
//time to fill a buffer if possible. Otherwise we are done, return nil
// Normalize segment, and then compare first code unit
var intermediateBuffer = _FixedArray16<CodeUnit>(allZeros:())
if overflowBuffer == nil,
let filled = source.tryFill(into: &intermediateBuffer)
{
guard let count = _tryNormalize(
_castOutputBuffer(&intermediateBuffer,
endingAt: filled),
into: &segmentBuffer
)
else {
fatalError("Output buffer was not big enough, this should not happen")
}
segmentBufferCount = count
} else {
if overflowBuffer == nil {
let size = source.remaining * _Normalization._maxNFCExpansionFactor
overflowBuffer = Array(repeating: 0, count: size)
normalizationBuffer = Array(repeating:0, count: size)
}

guard let count = normalizationBuffer!.withUnsafeMutableBufferPointer({
(normalizationBufferPtr) -> Int? in
guard let filled = source.tryFill(into: normalizationBufferPtr)
else {
fatalError("Invariant broken, buffer should have space")
}
return overflowBuffer!.withUnsafeMutableBufferPointer {
(overflowBufferPtr) -> Int? in
return _tryNormalize(
UnsafeBufferPointer(rebasing: normalizationBufferPtr[..<filled]),
into: overflowBufferPtr
)
}
}) else {
fatalError("Invariant broken, overflow buffer should have space")
}

overflowBufferCount = count
return segmentBuffer[segmentBufferIndex]
}

mutating func normalizeFromSource() -> Int {
if segmentHeapBuffer == nil,
let filled = source.tryFill(into: &normalizationBuffer)
{
if let count = _tryNormalize(
_castOutputBuffer(&normalizationBuffer,
endingAt: filled),
into: &segmentBuffer
) {
return count
}
return normalizeWithHeapBuffers(filled)
}

//exactly one of the buffers should have code units for us to return
_internalInvariant((segmentBufferCount == 0)
!= ((overflowBuffer?.count ?? 0) == 0))

if segmentBufferIndex < segmentBufferCount {
let index = segmentBufferIndex
segmentBufferIndex += 1
return segmentBuffer[index]
} else if overflowBufferIndex < overflowBufferCount {
_internalInvariant(overflowBufferIndex < overflowBuffer!.count)
let index = overflowBufferIndex
overflowBufferIndex += 1
return overflowBuffer![index]
} else {
return nil
return normalizeWithHeapBuffers()
}

//This handles normalization from an intermediate buffer to the heap segment
//buffer. This can get called in 3 situations:
//* We've already transitioned to heap buffers
//* We attempted to fill the pre-normal stack buffer but there was not enough
//. room, so we need to promote both and then attempt the fill again.
//* The fill for the stack buffer succeeded, but the normalization didn't. In
// this case, we want to first copy the contents of the stack buffer that
// we filled into the new heap buffer. The stackBufferCount
// parameter signals that we need to do this copy, thus skipping the fill
// that we would normally do before normalization.
mutating func normalizeWithHeapBuffers(
_ stackBufferCount: Int? = nil
) -> Int {
if segmentHeapBuffer == nil {
_internalInvariant(normalizationHeapBuffer == nil)
let preFilledBufferCount = stackBufferCount ?? 0
let size = (source.remaining + preFilledBufferCount)
* _Normalization._maxNFCExpansionFactor
segmentHeapBuffer = Array(repeating: 0, count: size)
normalizationHeapBuffer = Array(repeating:0, count: size)
for i in 0..<preFilledBufferCount {
normalizationHeapBuffer![i] = normalizationBuffer[i]
}
}

guard let count = normalizationHeapBuffer!.withUnsafeMutableBufferPointer({
(normalizationHeapBufferPtr) -> Int? in
guard let filled = stackBufferCount ??
source.tryFill(into: normalizationHeapBufferPtr)
else {
fatalError("Invariant broken, buffer should have space")
}
return segmentHeapBuffer!.withUnsafeMutableBufferPointer {
(segmentHeapBufferPtr) -> Int? in
return _tryNormalize(
UnsafeBufferPointer(rebasing: normalizationHeapBufferPtr[..<filled]),
into: segmentHeapBufferPtr
)
}
}) else {
fatalError("Invariant broken, overflow buffer should have space")
}
return count
}
}

Expand Down Expand Up @@ -635,22 +637,19 @@ extension _NormalizedUTF8CodeUnitIterator_2 {
let result = _lexicographicalCompare(cu, otherCU)
if result == .equal {
continue
} else {
return result
}
} else {
//other returned nil, we are greater
return .greater
return result
}
//other returned nil, we are greater
return .greater
}

//we ran out of code units, either we are equal, or only we ran out and
//other is greater
if let _ = mutableOther.next() {
return .less
} else {
return .equal
}
return .equal
}
}

Expand Down
15 changes: 15 additions & 0 deletions validation-test/stdlib/String.swift
Original file line number Diff line number Diff line change
Expand Up @@ -2183,4 +2183,19 @@ for test in comparisonTestCases {
}
}

StringTests.test("NormalizationBufferCrashRegressionTest") {
let str = "\u{0336}\u{0344}\u{0357}\u{0343}\u{0314}\u{0351}\u{0340}\u{0300}\u{0340}\u{0360}\u{0314}\u{0357}\u{0315}\u{0301}\u{0344}a"
let set = Set([str])

expectTrue(set.contains(str))
}

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we have any expectation on what this should normalize to? Can we write a test for that?

StringTests.test("NormalizationCheck") {
let str = "\u{0336}\u{0344}\u{0357}\u{0343}\u{0314}\u{0351}\u{0340}\u{0300}\u{0340}\u{0360}\u{0314}\u{0357}\u{0315}\u{0301}\u{0344}a"
let nfcCodeUnits = str._nfcCodeUnits
let expectedCodeUnits: [UInt8] = [0xCC, 0xB6, 0xCC, 0x88, 0xCC, 0x81, 0xCD, 0x97, 0xCC, 0x93, 0xCC, 0x94, 0xCD, 0x91, 0xCC, 0x80, 0xCC, 0x80, 0xCC, 0x80, 0xCC, 0x94, 0xCD, 0x97, 0xCC, 0x81, 0xCC, 0x88, 0xCC, 0x81, 0xCC, 0x95, 0xCD, 0xA0, 0x61]

expectEqual(expectedCodeUnits, nfcCodeUnits)
}

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Were you able to test the case where the previous approach would've skipped over the long segment?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, this new test caught it.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The concern was that a corner case would cause us to skip a portion of the String. Unfortunately, a test case expressed in terms of string comparison is only testing behavioral consistency but not the actual result. We have _nfcCodeUnits and _withNFCCodeUnits to check exactly this.

runAllTests()