Skip to content

[stdlib]Properly promote stack buffer to heap buffer when necessary (5.0 cherry pick) #20668

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
155 changes: 77 additions & 78 deletions stdlib/public/core/NormalizedCodeUnitIterator.swift
Original file line number Diff line number Diff line change
Expand Up @@ -227,14 +227,13 @@ internal
struct _NormalizedUTF16CodeUnitIterator: IteratorProtocol {
internal typealias CodeUnit = UInt16
var segmentBuffer = _FixedArray16<CodeUnit>(allZeros:())
var overflowBuffer: [CodeUnit]? = nil
var normalizationBuffer: [CodeUnit]? = nil
var normalizationBuffer = _FixedArray16<CodeUnit>(allZeros:())
var segmentHeapBuffer: [CodeUnit]? = nil
var normalizationHeapBuffer: [CodeUnit]? = nil
var source: _SegmentSource

var segmentBufferIndex = 0
var segmentBufferCount = 0
var overflowBufferIndex = 0
var overflowBufferCount = 0

init(_ guts: _StringGuts, _ range: Range<String.Index>) {
source = _ForeignStringGutsSource(guts, range)
Expand Down Expand Up @@ -350,83 +349,86 @@ struct _NormalizedUTF16CodeUnitIterator: IteratorProtocol {

mutating func next() -> UInt16? {
if segmentBufferCount == segmentBufferIndex {
if source.isEmpty {
return nil
}
segmentBuffer = _FixedArray16<CodeUnit>(allZeros:())
segmentBufferCount = 0
segmentBufferIndex = 0
}

if overflowBufferCount == overflowBufferIndex {
overflowBufferCount = 0
overflowBufferIndex = 0

if segmentBufferCount == 0 {
segmentBufferCount = normalizeFromSource()
}

if source.isEmpty
&& segmentBufferCount == 0
&& overflowBufferCount == 0 {
// Our source of code units to normalize is empty and our buffers from
// previous normalizations are also empty.
return nil
guard segmentBufferIndex < segmentBufferCount else { return nil }

defer { segmentBufferIndex += 1 }
if _slowPath(segmentHeapBuffer != nil) {
return segmentHeapBuffer![segmentBufferIndex]
}
if segmentBufferCount == 0 && overflowBufferCount == 0 {
//time to fill a buffer if possible. Otherwise we are done, return nil
// Normalize segment, and then compare first code unit
var intermediateBuffer = _FixedArray16<CodeUnit>(allZeros:())
if overflowBuffer == nil,
let filled = source.tryFill(into: &intermediateBuffer)
{
guard let count = _tryNormalize(
_castOutputBuffer(&intermediateBuffer,
endingAt: filled),
into: &segmentBuffer
)
else {
fatalError("Output buffer was not big enough, this should not happen")
}
segmentBufferCount = count
} else {
if overflowBuffer == nil {
let size = source.remaining * _Normalization._maxNFCExpansionFactor
overflowBuffer = Array(repeating: 0, count: size)
normalizationBuffer = Array(repeating:0, count: size)
}

guard let count = normalizationBuffer!.withUnsafeMutableBufferPointer({
(normalizationBufferPtr) -> Int? in
guard let filled = source.tryFill(into: normalizationBufferPtr)
else {
fatalError("Invariant broken, buffer should have space")
}
return overflowBuffer!.withUnsafeMutableBufferPointer {
(overflowBufferPtr) -> Int? in
return _tryNormalize(
UnsafeBufferPointer(rebasing: normalizationBufferPtr[..<filled]),
into: overflowBufferPtr
)
}
}) else {
fatalError("Invariant broken, overflow buffer should have space")
}

overflowBufferCount = count
return segmentBuffer[segmentBufferIndex]
}

mutating func normalizeFromSource() -> Int {
if segmentHeapBuffer == nil,
let filled = source.tryFill(into: &normalizationBuffer)
{
if let count = _tryNormalize(
_castOutputBuffer(&normalizationBuffer,
endingAt: filled),
into: &segmentBuffer
) {
return count
}
return normalizeWithHeapBuffers(filled)
}

//exactly one of the buffers should have code units for us to return
_internalInvariant((segmentBufferCount == 0)
!= ((overflowBuffer?.count ?? 0) == 0))

if segmentBufferIndex < segmentBufferCount {
let index = segmentBufferIndex
segmentBufferIndex += 1
return segmentBuffer[index]
} else if overflowBufferIndex < overflowBufferCount {
_internalInvariant(overflowBufferIndex < overflowBuffer!.count)
let index = overflowBufferIndex
overflowBufferIndex += 1
return overflowBuffer![index]
} else {
return nil
return normalizeWithHeapBuffers()
}

//This handles normalization from an intermediate buffer to the heap segment
//buffer. This can get called in 3 situations:
//* We've already transitioned to heap buffers
//* We attempted to fill the pre-normal stack buffer but there was not enough
//. room, so we need to promote both and then attempt the fill again.
//* The fill for the stack buffer succeeded, but the normalization didn't. In
// this case, we want to first copy the contents of the stack buffer that
// we filled into the new heap buffer. The stackBufferCount
// parameter signals that we need to do this copy, thus skipping the fill
// that we would normally do before normalization.
mutating func normalizeWithHeapBuffers(
_ stackBufferCount: Int? = nil
) -> Int {
if segmentHeapBuffer == nil {
_internalInvariant(normalizationHeapBuffer == nil)
let preFilledBufferCount = stackBufferCount ?? 0
let size = (source.remaining + preFilledBufferCount)
* _Normalization._maxNFCExpansionFactor
segmentHeapBuffer = Array(repeating: 0, count: size)
normalizationHeapBuffer = Array(repeating:0, count: size)
for i in 0..<preFilledBufferCount {
normalizationHeapBuffer![i] = normalizationBuffer[i]
}
}

guard let count = normalizationHeapBuffer!.withUnsafeMutableBufferPointer({
(normalizationHeapBufferPtr) -> Int? in
guard let filled = stackBufferCount ??
source.tryFill(into: normalizationHeapBufferPtr)
else {
fatalError("Invariant broken, buffer should have space")
}
return segmentHeapBuffer!.withUnsafeMutableBufferPointer {
(segmentHeapBufferPtr) -> Int? in
return _tryNormalize(
UnsafeBufferPointer(rebasing: normalizationHeapBufferPtr[..<filled]),
into: segmentHeapBufferPtr
)
}
}) else {
fatalError("Invariant broken, overflow buffer should have space")
}
return count
}
}

Expand Down Expand Up @@ -635,22 +637,19 @@ extension _NormalizedUTF8CodeUnitIterator_2 {
let result = _lexicographicalCompare(cu, otherCU)
if result == .equal {
continue
} else {
return result
}
} else {
//other returned nil, we are greater
return .greater
return result
}
//other returned nil, we are greater
return .greater
}

//we ran out of code units, either we are equal, or only we ran out and
//other is greater
if let _ = mutableOther.next() {
return .less
} else {
return .equal
}
return .equal
}
}

Expand Down
15 changes: 15 additions & 0 deletions validation-test/stdlib/String.swift
Original file line number Diff line number Diff line change
Expand Up @@ -2183,4 +2183,19 @@ for test in comparisonTestCases {
}
}

StringTests.test("NormalizationBufferCrashRegressionTest") {
let str = "\u{0336}\u{0344}\u{0357}\u{0343}\u{0314}\u{0351}\u{0340}\u{0300}\u{0340}\u{0360}\u{0314}\u{0357}\u{0315}\u{0301}\u{0344}a"
let set = Set([str])

expectTrue(set.contains(str))
}

StringTests.test("NormalizationCheck") {
let str = "\u{0336}\u{0344}\u{0357}\u{0343}\u{0314}\u{0351}\u{0340}\u{0300}\u{0340}\u{0360}\u{0314}\u{0357}\u{0315}\u{0301}\u{0344}a"
let nfcCodeUnits = str._nfcCodeUnits
let expectedCodeUnits: [UInt8] = [0xCC, 0xB6, 0xCC, 0x88, 0xCC, 0x81, 0xCD, 0x97, 0xCC, 0x93, 0xCC, 0x94, 0xCD, 0x91, 0xCC, 0x80, 0xCC, 0x80, 0xCC, 0x80, 0xCC, 0x94, 0xCD, 0x97, 0xCC, 0x81, 0xCC, 0x88, 0xCC, 0x81, 0xCC, 0x95, 0xCD, 0xA0, 0x61]

expectEqual(expectedCodeUnits, nfcCodeUnits)
}

runAllTests()