Skip to content

[string] Internal Unicode.Scalar.withUTF16CodeUnits #18642

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Aug 11, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 38 additions & 3 deletions stdlib/public/core/UnicodeScalar.swift
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ extension Unicode {
/// print(airplane)
/// // Prints "✈︎"
@_fixed_layout
public struct Scalar {
public struct Scalar {
@inlinable // FIXME(sil-serialize-all)
internal init(_value: UInt32) {
self._value = _value
Expand Down Expand Up @@ -337,7 +337,7 @@ extension Unicode.Scalar {
///
/// In case of an invalid input value, nil is returned.
///
/// let codepoint: UInt32 = extValue // This might be an invalid value.
/// let codepoint: UInt32 = extValue // This might be an invalid value.
/// if let emoji = Unicode.Scalar(codepoint) {
/// print(emoji)
/// } else {
Expand Down Expand Up @@ -396,7 +396,7 @@ extension Unicode.Scalar : Comparable {
extension Unicode.Scalar {
@_fixed_layout // FIXME(sil-serialize-all)
public struct UTF16View {
@inlinable // FIXME(sil-serialize-all)
@inlinable // FIXME(sil-serialize-all)
internal init(value: Unicode.Scalar) {
self.value = value
}
Expand Down Expand Up @@ -464,3 +464,38 @@ extension Unicode.Scalar {
Builtin.unreachable()
}
}

// Access the underlying code units
extension Unicode.Scalar {
// Access the scalar as encoded in UTF-16
internal func withUTF16CodeUnits<Result>(
_ body: (UnsafeBufferPointer<UInt16>) throws -> Result
) rethrows -> Result {
var codeUnits: (UInt16, UInt16) = (self.utf16[0], 0)
let utf16Count = self.utf16.count
if utf16Count > 1 {
_sanityCheck(utf16Count == 2)
codeUnits.1 = self.utf16[1]
}
return try Swift.withUnsafePointer(to: &codeUnits) {
return try $0.withMemoryRebound(to: UInt16.self, capacity: 2) {
return try body(UnsafeBufferPointer(start: $0, count: utf16Count))
}
}
}

// Access the scalar as encoded in UTF-8
internal func withUTF8CodeUnits<Result>(
_ body: (UnsafeBufferPointer<UInt8>) throws -> Result
) rethrows -> Result {
let encodedScalar = UTF8.encode(self)!
var (codeUnits, utf8Count) = encodedScalar._bytes
return try Swift.withUnsafePointer(to: &codeUnits) {
return try $0.withMemoryRebound(to: UInt8.self, capacity: 4) {
return try body(UnsafeBufferPointer(start: $0, count: utf8Count))
}
}
}
}


104 changes: 44 additions & 60 deletions stdlib/public/core/UnicodeScalarProperties.swift
Original file line number Diff line number Diff line change
Expand Up @@ -20,32 +20,33 @@ extension Unicode.Scalar {
/// A value that provides access to properties of a Unicode scalar that are
/// defined by the Unicode standard.
public struct Properties {
internal init(_scalar: Unicode.Scalar) {
// We convert the value to the underlying UChar32 type here and store it
// in that form to make calling the ICU APIs cleaner below.
self._value = __swift_stdlib_UChar32(bitPattern: _scalar._value)
@usableFromInline
internal var _scalar: Unicode.Scalar

internal init(_ scalar: Unicode.Scalar) {
self._scalar = scalar
}

@usableFromInline
internal var _value: __swift_stdlib_UChar32
// Provide the value as UChar32 to make calling the ICU APIs cleaner
internal var icuValue: __swift_stdlib_UChar32 {
return __swift_stdlib_UChar32(bitPattern: self._scalar._value)
}
}

/// A value that provides access to properties of the Unicode scalar that are
/// defined by the Unicode standard.
public var properties: Properties {
return Properties(_scalar: self)
return Properties(self)
}
}

/// Boolean properties that are defined by the Unicode Standard (i.e., not
/// ICU-specific).
extension Unicode.Scalar.Properties {

@usableFromInline @_transparent
internal func _hasBinaryProperty(
_ property: __swift_stdlib_UProperty
) -> Bool {
return __swift_stdlib_u_hasBinaryProperty(_value, property) != 0
return __swift_stdlib_u_hasBinaryProperty(icuValue, property) != 0
}

/// A Boolean property indicating whether the scalar is alphabetic.
Expand Down Expand Up @@ -668,16 +669,6 @@ extension Unicode.Scalar.Properties {

/// Case mapping properties.
extension Unicode.Scalar.Properties {

/// The UTF-16 encoding of the scalar, represented as a tuple of 2 elements.
///
/// If the scalar only encodes to one code unit, the second element is zero.
@usableFromInline @_transparent
internal var _utf16CodeUnits: (UTF16.CodeUnit, UTF16.CodeUnit) {
let utf16 = UnicodeScalar(UInt32(_value))!.utf16
return (utf16[0], utf16.count > 1 ? utf16[1] : 0)
}

// The type of ICU case conversion functions.
internal typealias _U_StrToX = (
/* dest */ UnsafeMutablePointer<__swift_stdlib_UChar>,
Expand All @@ -695,51 +686,44 @@ extension Unicode.Scalar.Properties {
/// all current case mappings. In the event more space is needed, it will be
/// allocated on the heap.
internal func _applyMapping(_ u_strTo: _U_StrToX) -> String {
let utf16Length = UnicodeScalar(UInt32(_value))!.utf16.count
var utf16 = _utf16CodeUnits
var scratchBuffer = _Normalization._SegmentOutputBuffer(allZeros: ())
let count = scratchBuffer.withUnsafeMutableBufferPointer { bufPtr -> Int in
return withUnsafePointer(to: &utf16) { tuplePtr in
return tuplePtr.withMemoryRebound(to: UInt16.self, capacity: 2) {
utf16Pointer in
var err = __swift_stdlib_U_ZERO_ERROR
let correctSize = u_strTo(
bufPtr.baseAddress._unsafelyUnwrappedUnchecked,
Int32(bufPtr.count),
utf16Pointer,
Int32(utf16Length),
"",
&err)
guard err.isSuccess ||
err == __swift_stdlib_U_BUFFER_OVERFLOW_ERROR else {
fatalError("Unexpected error case-converting Unicode scalar.")
}
return Int(correctSize)
return _scalar.withUTF16CodeUnits { utf16 in
var err = __swift_stdlib_U_ZERO_ERROR
let correctSize = u_strTo(
bufPtr.baseAddress._unsafelyUnwrappedUnchecked,
Int32(bufPtr.count),
utf16.baseAddress._unsafelyUnwrappedUnchecked,
Int32(utf16.count),
"",
&err)
guard err.isSuccess ||
err == __swift_stdlib_U_BUFFER_OVERFLOW_ERROR else {
fatalError("Unexpected error case-converting Unicode scalar.")
}
return Int(correctSize)
}
}

if _fastPath(count <= scratchBuffer.count) {
scratchBuffer.count = count
return String._fromWellFormedUTF16CodeUnits(scratchBuffer)
}
var array = Array<UInt16>(repeating: 0, count: count)
array.withUnsafeMutableBufferPointer { bufPtr in
withUnsafePointer(to: &utf16) { tuplePtr in
tuplePtr.withMemoryRebound(to: UInt16.self, capacity: 2) {
utf16Pointer in
var err = __swift_stdlib_U_ZERO_ERROR
let correctSize = u_strTo(
bufPtr.baseAddress._unsafelyUnwrappedUnchecked,
Int32(bufPtr.count),
utf16Pointer,
Int32(utf16Length),
"",
&err)
guard err.isSuccess else {
fatalError("Unexpected error case-converting Unicode scalar.")
}
_sanityCheck(count == correctSize, "inconsistent ICU behavior")
return _scalar.withUTF16CodeUnits { utf16 in
var err = __swift_stdlib_U_ZERO_ERROR
let correctSize = u_strTo(
bufPtr.baseAddress._unsafelyUnwrappedUnchecked,
Int32(bufPtr.count),
utf16.baseAddress._unsafelyUnwrappedUnchecked,
Int32(utf16.count),
"",
&err)
guard err.isSuccess else {
fatalError("Unexpected error case-converting Unicode scalar.")
}
_sanityCheck(count == correctSize, "inconsistent ICU behavior")
}
}
return String._fromWellFormedUTF16CodeUnits(array[..<count])
Expand Down Expand Up @@ -811,7 +795,7 @@ extension Unicode.Scalar.Properties {
withUnsafeMutablePointer(to: &versionInfo) { tuplePtr in
tuplePtr.withMemoryRebound(to: UInt8.self, capacity: 4) {
versionInfoPtr in
__swift_stdlib_u_charAge(_value, versionInfoPtr)
__swift_stdlib_u_charAge(icuValue, versionInfoPtr)
}
}
guard versionInfo.0 != 0 else { return nil }
Expand Down Expand Up @@ -1087,7 +1071,7 @@ extension Unicode.Scalar.Properties {
public var generalCategory: Unicode.GeneralCategory {
let rawValue = __swift_stdlib_UCharCategory(
UInt32(__swift_stdlib_u_getIntPropertyValue(
_value, __swift_stdlib_UCHAR_GENERAL_CATEGORY)))
icuValue, __swift_stdlib_UCHAR_GENERAL_CATEGORY)))
return Unicode.GeneralCategory(rawValue: rawValue)
}
}
Expand All @@ -1098,15 +1082,15 @@ extension Unicode.Scalar.Properties {
_ choice: __swift_stdlib_UCharNameChoice
) -> String? {
var err = __swift_stdlib_U_ZERO_ERROR
let count = Int(__swift_stdlib_u_charName(_value, choice, nil, 0, &err))
let count = Int(__swift_stdlib_u_charName(icuValue, choice, nil, 0, &err))
guard count > 0 else { return nil }

// ICU writes a trailing null, so we have to save room for it as well.
var array = Array<UInt8>(repeating: 0, count: count + 1)
return array.withUnsafeMutableBufferPointer { bufPtr in
var err = __swift_stdlib_U_ZERO_ERROR
let correctSize = __swift_stdlib_u_charName(
_value,
icuValue,
choice,
UnsafeMutableRawPointer(bufPtr.baseAddress._unsafelyUnwrappedUnchecked)
.assumingMemoryBound(to: Int8.self),
Expand Down Expand Up @@ -1282,7 +1266,7 @@ extension Unicode.Scalar.Properties {
/// the [Unicode Standard](http://www.unicode.org/versions/latest/).
public var canonicalCombiningClass: Unicode.CanonicalCombiningClass {
let rawValue = UInt8(__swift_stdlib_u_getIntPropertyValue(
_value, __swift_stdlib_UCHAR_CANONICAL_COMBINING_CLASS))
icuValue, __swift_stdlib_UCHAR_CANONICAL_COMBINING_CLASS))
return Unicode.CanonicalCombiningClass(rawValue: rawValue)
}
}
Expand Down Expand Up @@ -1364,7 +1348,7 @@ extension Unicode.Scalar.Properties {
public var numericType: Unicode.NumericType? {
let rawValue = __swift_stdlib_UNumericType(
UInt32(__swift_stdlib_u_getIntPropertyValue(
_value, __swift_stdlib_UCHAR_NUMERIC_TYPE)))
icuValue, __swift_stdlib_UCHAR_NUMERIC_TYPE)))
return Unicode.NumericType(rawValue: rawValue)
}

Expand All @@ -1391,7 +1375,7 @@ extension Unicode.Scalar.Properties {
/// [Unicode Standard](http://www.unicode.org/versions/latest/).
public var numericValue: Double? {
let icuNoNumericValue: Double = -123456789
let result = __swift_stdlib_u_getNumericValue(_value)
let result = __swift_stdlib_u_getNumericValue(icuValue)
return result != icuNoNumericValue ? result : nil
}
}