Skip to content

[5.7] [stdlib] Add caseFolded to scalar properties #59212

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jun 2, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
//
//===----------------------------------------------------------------------===//

// Unicode scalar tests are currently only avaible on Darwin, awaiting a sensible
// Unicode scalar tests are currently only available on Darwin, awaiting a sensible
// file API...
#if _runtime(_ObjC)
import Foundation
Expand Down Expand Up @@ -638,6 +638,54 @@ public let names: [Unicode.Scalar: String] = {
return result
}()

//===----------------------------------------------------------------------===//
// Case Folding
//===----------------------------------------------------------------------===//

func parseCaseFoldings(
_ data: String,
into result: inout [Unicode.Scalar: String]
) {
for line in data.split(separator: "\n") {
// Skip comments
guard !line.hasPrefix("#") else {
continue
}

let components = line.split(separator: ";")

let status = components[1].filter { !$0.isWhitespace }

// We only care about Common and Full case mappings.
guard status == "C" || status == "F" else {
continue
}

let scalar = Unicode.Scalar(parseScalars(String(components[0])).lowerBound)!

let mapping = components[2].split(separator: " ").map {
Unicode.Scalar(UInt32($0, radix: 16)!)!
}

var mappingString = ""

for scalar in mapping {
mappingString.unicodeScalars.append(scalar)
}

result[scalar] = mappingString
}
}

public let caseFolding: [Unicode.Scalar: String] = {
var result: [Unicode.Scalar: String] = [:]

let caseFolding = readInputFile("CaseFolding.txt")
parseCaseFoldings(caseFolding, into: &result)

return result
}()

//===----------------------------------------------------------------------===//
// Script/Script Extensions
//===----------------------------------------------------------------------===//
Expand Down
4 changes: 4 additions & 0 deletions stdlib/public/SwiftShims/UnicodeData.h
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,10 @@ const __swift_uint8_t *_swift_stdlib_getScriptExtensions(
__swift_uint32_t scalar,
__swift_uint8_t *count);

SWIFT_RUNTIME_STDLIB_INTERNAL
void _swift_stdlib_getCaseMapping(__swift_uint32_t scalar,
__swift_uint32_t *buffer);

#ifdef __cplusplus
} // extern "C"
#endif
Expand Down
34 changes: 34 additions & 0 deletions stdlib/public/core/UnicodeSPI.swift
Original file line number Diff line number Diff line change
Expand Up @@ -165,3 +165,37 @@ extension Unicode.Scalar.Properties {
return result
}
}

//===----------------------------------------------------------------------===//
// Case folding
//===----------------------------------------------------------------------===//

extension Unicode.Scalar.Properties {
@_spi(_Unicode)
@available(SwiftStdlib 5.7, *)
public var _caseFolded: String {
var buffer: (UInt32, UInt32, UInt32) = (.max, .max, .max)

withUnsafeMutableBytes(of: &buffer) {
// This is safe because the memory is already UInt32
let ptr = $0.baseAddress!.assumingMemoryBound(to: UInt32.self)
_swift_stdlib_getCaseMapping(_scalar.value, ptr)
}

var result = ""
// Max mapping is 3 scalars and the max UTF8 bytes of a scalar is 4.
result.reserveCapacity(12)

withUnsafeBytes(of: &buffer) {
for scalar in $0.bindMemory(to: UInt32.self) {
guard scalar != .max else {
break
}

result.unicodeScalars.append(Unicode.Scalar(scalar)!)
}
}

return result
}
}
522 changes: 522 additions & 0 deletions stdlib/public/stubs/Unicode/Common/CaseData.h

Large diffs are not rendered by default.

60 changes: 60 additions & 0 deletions stdlib/public/stubs/Unicode/UnicodeScalarProps.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include "Common/ScalarPropsData.h"
#endif

#include "Common/CaseData.h"
#include "Common/ScriptData.h"

#else
Expand Down Expand Up @@ -510,3 +511,62 @@ const __swift_uint8_t *_swift_stdlib_getScriptExtensions(__swift_uint32_t scalar
return _swift_stdlib_script_extensions_data + (scalarDataIdx & 0x7FF);
#endif
}

SWIFT_RUNTIME_STDLIB_INTERNAL
void _swift_stdlib_getCaseMapping(__swift_uint32_t scalar,
__swift_uint32_t *buffer) {
#if !SWIFT_STDLIB_ENABLE_UNICODE_DATA
swift::swift_abortDisabledUnicodeSupport();
#else
auto mphIdx = _swift_stdlib_getMphIdx(scalar, CASE_FOLD_LEVEL_COUNT,
_swift_stdlib_case_keys,
_swift_stdlib_case_ranks,
_swift_stdlib_case_sizes);

auto caseValue = _swift_stdlib_case[mphIdx];
__swift_uint32_t hashedScalar = (caseValue << 43) >> 43;

// If our scalar is not the original one we hashed, then this scalar has no
// case mapping. It maps to itself.
if (scalar != hashedScalar) {
buffer[0] = scalar;
return;
}

// If the top bit is NOT set, then this scalar simply maps to another scalar.
// We have stored the distance to said scalar in this value.
if ((caseValue & ((__swift_uint64_t)(0x1) << 63)) == 0) {
auto distance = (__swift_int32_t)((caseValue << 1) >> 22);
auto mappedScalar = (__swift_uint32_t)((__swift_int32_t)(scalar) - distance);

buffer[0] = mappedScalar;
return;
}

// Our top bit WAS set which means this scalar maps to multiple scalars.
// Lookup our mapping in the full mph.
auto fullMphIdx = _swift_stdlib_getMphIdx(scalar, CASE_FULL_FOLD_LEVEL_COUNT,
_swift_stdlib_case_full_keys,
_swift_stdlib_case_full_ranks,
_swift_stdlib_case_full_sizes);

auto fullCaseValue = _swift_stdlib_case_full[fullMphIdx];

// Count is either 2 or 3.
auto count = fullCaseValue >> 62;

for (__swift_uint64_t i = 0; i != count; i += 1) {
auto distance = (__swift_int32_t)(fullCaseValue & 0xFFFF);

if ((fullCaseValue & 0x10000) != 0) {
distance = -distance;
}

fullCaseValue >>= 17;

auto mappedScalar = (__swift_uint32_t)((__swift_int32_t)(scalar) - distance);

buffer[i] = mappedScalar;
}
#endif
}
Loading