Skip to content

Commit 54f4c77

Browse files
committed
[stdlib] Revert hasNormalizationBoundaryBefore
This property is too specific in that it forces a particular normalization; let's not expose it this way, but instead in the future with a full normalization API.
1 parent ff40d04 commit 54f4c77

File tree

3 files changed

+28
-29
lines changed

3 files changed

+28
-29
lines changed

stdlib/public/core/StringComparison.swift

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -317,8 +317,7 @@ internal func _decodeSurrogatePair(
317317

318318
internal func _hasNormalizationBoundary(before cu: UInt16) -> Bool {
319319
guard !_isSurrogate(cu) else { return false }
320-
return UnicodeScalar(
321-
_unchecked: UInt32(cu)).properties.hasNormalizationBoundaryBefore
320+
return UnicodeScalar(_unchecked: UInt32(cu))._hasNormalizationBoundaryBefore
322321
}
323322

324323
//
@@ -862,10 +861,8 @@ private struct _UnicodeScalarExceptions {
862861
var i = 0
863862
while i < length {
864863
let (innerScalar, nextI) = _parseRawScalar(&outBuffer, startingFrom: i)
865-
if _slowPath(
866-
i != 0 && innerScalar.properties.hasNormalizationBoundaryBefore
867-
) {
868-
guard innerScalar.properties.hasNormalizationBoundaryBefore else {
864+
if _slowPath(i != 0 && innerScalar._hasNormalizationBoundaryBefore) {
865+
guard innerScalar._hasNormalizationBoundaryBefore else {
869866
fatalError(
870867
"Unicode invariant violated: non-starter multi-segment expander")
871868
}
@@ -1051,7 +1048,7 @@ extension _UnmanagedString where CodeUnit == UInt16 {
10511048
var (_, segmentEndIdx) = self._parseRawScalar(startingFrom: idx)
10521049
while segmentEndIdx < count {
10531050
let (scalar, nextIdx) = self._parseRawScalar(startingFrom: segmentEndIdx)
1054-
if scalar.properties.hasNormalizationBoundaryBefore {
1051+
if scalar._hasNormalizationBoundaryBefore {
10551052
break
10561053
}
10571054
segmentEndIdx = nextIdx
@@ -1069,7 +1066,7 @@ extension _UnmanagedString where CodeUnit == UInt16 {
10691066
while idx > 0 {
10701067
let (scalar, priorIdx) = _reverseParseRawScalar(endingAt: idx)
10711068
idx = priorIdx
1072-
if scalar.properties.hasNormalizationBoundaryBefore {
1069+
if scalar._hasNormalizationBoundaryBefore {
10731070
break
10741071
}
10751072
}
@@ -1095,8 +1092,7 @@ extension _UnmanagedString where CodeUnit == UInt16 {
10951092
}
10961093

10971094
// Check current scalar
1098-
let currentScalar = self._parseRawScalar(startingFrom: idx).0
1099-
if currentScalar.properties.hasNormalizationBoundaryBefore {
1095+
if self._parseRawScalar(startingFrom: idx).0._hasNormalizationBoundaryBefore {
11001096
return (idx, segmentEnd)
11011097
}
11021098

stdlib/public/core/StringNormalization.swift

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,29 @@ internal enum _Normalization {
6868
}
6969
return length == string.count
7070
}
71+
}
72+
73+
extension UnicodeScalar {
74+
// Normalization boundary - a place in a string where everything left of the
75+
// boundary can be normalized independently from everything right of the
76+
// boundary. The concatenation of each result is the same as if the entire
77+
// string had been normalized as a whole.
78+
//
79+
// Normalization segment - a sequence of code units between two normalization
80+
// boundaries (without any boundaries in the middle). Note that normalization
81+
// segments can, as a process of normalization, expand, contract, and even
82+
// produce new sub-segments.
83+
84+
// Whether this scalar value always has a normalization boundary before it.
85+
internal var _hasNormalizationBoundaryBefore: Bool {
86+
_sanityCheck(Int32(exactly: self.value) != nil, "top bit shouldn't be set")
87+
let value = Int32(bitPattern: self.value)
88+
return 0 != __swift_stdlib_unorm2_hasBoundaryBefore(
89+
_Normalization._nfcNormalizer, value)
90+
}
91+
}
7192

93+
extension _Normalization {
7294
// When normalized in NFC, some segments may expand in size (e.g. some non-BMP
7395
// musical notes). This expansion is capped by the maximum expansion factor of
7496
// the normal form. For NFC, that is 3x.

stdlib/public/core/UnicodeScalarProperties.swift

Lines changed: 0 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1421,23 +1421,4 @@ extension Unicode.Scalar.Properties {
14211421
public var isDefined: Bool {
14221422
return __swift_stdlib_u_isdefined(_value) != 0
14231423
}
1424-
1425-
/// A Boolean property indicating whether a normalization boundary always
1426-
/// occurs before this scalar.
1427-
///
1428-
/// A normalization boundary is a position in a string where everything to the
1429-
/// left of the boundary can be normalized independently of everything to the
1430-
/// right of the boundary. The concatenation of each such normalization result
1431-
/// is thus the same as if the entire string had been normalized as a whole.
1432-
///
1433-
/// ```
1434-
/// print(("A" as Unicode.Scalar).properties.hasNormalizationBoundaryBefore)
1435-
/// // Prints "true"
1436-
/// print(("\u{0301}" as Unicode.Scalar).properties.hasNormalizationBoundaryBefore)
1437-
/// // Prints "false"
1438-
/// ```
1439-
public var hasNormalizationBoundaryBefore: Bool {
1440-
return __swift_stdlib_unorm2_hasBoundaryBefore(
1441-
_Normalization._nfcNormalizer, _value) != 0
1442-
}
14431424
}

0 commit comments

Comments
 (0)