Skip to content

Commit 5fe6a7e

Browse files
committed
Add caseFolded to scalar properties
1 parent 3459000 commit 5fe6a7e

File tree

7 files changed

+2316
-0
lines changed

7 files changed

+2316
-0
lines changed

stdlib/private/StdlibUnicodeUnittest/UnicodeScalarProperties.swift

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -635,4 +635,53 @@ public let names: [Unicode.Scalar: String] = {
635635

636636
return result
637637
}()
638+
639+
//===----------------------------------------------------------------------===//
640+
// Case Folding
641+
//===----------------------------------------------------------------------===//
642+
643+
func parseCaseFoldings(
644+
_ data: String,
645+
into result: inout [Unicode.Scalar: String]
646+
) {
647+
for line in data.split(separator: "\n") {
648+
// Skip comments
649+
guard !line.hasPrefix("#") else {
650+
continue
651+
}
652+
653+
let components = line.split(separator: ";")
654+
655+
let status = components[1].filter { !$0.isWhitespace }
656+
657+
// We only care about Common and Full case mappings.
658+
guard status == "C" || status == "F" else {
659+
continue
660+
}
661+
662+
let scalar = Unicode.Scalar(parseScalars(String(components[0])).lowerBound)!
663+
664+
let mapping = components[2].split(separator: " ").map {
665+
Unicode.Scalar(UInt32($0, radix: 16)!)!
666+
}
667+
668+
var mappingString = ""
669+
670+
for scalar in mapping {
671+
mappingString.unicodeScalars.append(scalar)
672+
}
673+
674+
result[scalar] = mappingString
675+
}
676+
}
677+
678+
public let caseFolding: [Unicode.Scalar: String] = {
679+
var result: [Unicode.Scalar: String] = [:]
680+
681+
let caseFolding = readInputFile("CaseFolding.txt")
682+
parseCaseFoldings(caseFolding, into: &result)
683+
684+
return result
685+
}()
686+
638687
#endif

stdlib/public/SwiftShims/UnicodeData.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,10 @@ const __swift_uint8_t *_swift_stdlib_getScriptExtensions(
108108
__swift_uint32_t scalar,
109109
__swift_uint8_t *count);
110110

111+
SWIFT_RUNTIME_STDLIB_INTERNAL
112+
void _swift_stdlib_getCaseMapping(__swift_uint32_t scalar,
113+
__swift_uint32_t *buffer);
114+
111115
#ifdef __cplusplus
112116
} // extern "C"
113117
#endif

stdlib/public/core/UnicodeSPI.swift

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -165,3 +165,37 @@ extension Unicode.Scalar.Properties {
165165
return result
166166
}
167167
}
168+
169+
//===----------------------------------------------------------------------===//
170+
// Case folding
171+
//===----------------------------------------------------------------------===//
172+
173+
extension Unicode.Scalar.Properties {
174+
@_spi(_Unicode)
175+
@available(SwiftStdlib 5.7, *)
176+
public var _caseFolded: String {
177+
var buffer: (UInt32, UInt32, UInt32) = (.max, .max, .max)
178+
179+
withUnsafeMutableBytes(of: &buffer) {
180+
// This is safe because the memory is already UInt32
181+
let ptr = $0.baseAddress!.assumingMemoryBound(to: UInt32.self)
182+
_swift_stdlib_getCaseMapping(_scalar.value, ptr)
183+
}
184+
185+
var result = ""
186+
// Max mapping is 3 scalars and the max UTF8 bytes of a scalar is 4.
187+
result.reserveCapacity(12)
188+
189+
withUnsafeBytes(of: &buffer) {
190+
for scalar in $0.bindMemory(to: UInt32.self) {
191+
guard scalar != .max else {
192+
break
193+
}
194+
195+
result.unicodeScalars.append(Unicode.Scalar(scalar)!)
196+
}
197+
}
198+
199+
return result
200+
}
201+
}

stdlib/public/stubs/Unicode/Common/CaseData.h

Lines changed: 522 additions & 0 deletions
Large diffs are not rendered by default.

stdlib/public/stubs/Unicode/UnicodeScalarProps.cpp

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
#include "Common/ScalarPropsData.h"
1919
#endif
2020

21+
#include "Common/CaseData.h"
2122
#include "Common/ScriptData.h"
2223

2324
#else
@@ -510,3 +511,62 @@ const __swift_uint8_t *_swift_stdlib_getScriptExtensions(__swift_uint32_t scalar
510511
return _swift_stdlib_script_extensions_data + (scalarDataIdx & 0x7FF);
511512
#endif
512513
}
514+
515+
SWIFT_RUNTIME_STDLIB_INTERNAL
516+
void _swift_stdlib_getCaseMapping(__swift_uint32_t scalar,
517+
__swift_uint32_t *buffer) {
518+
#if !SWIFT_STDLIB_ENABLE_UNICODE_DATA
519+
swift::swift_abortDisabledUnicodeSupport();
520+
#else
521+
auto mphIdx = _swift_stdlib_getMphIdx(scalar, CASE_FOLD_LEVEL_COUNT,
522+
_swift_stdlib_case_keys,
523+
_swift_stdlib_case_ranks,
524+
_swift_stdlib_case_sizes);
525+
526+
auto caseValue = _swift_stdlib_case[mphIdx];
527+
__swift_uint32_t hashedScalar = (caseValue << 43) >> 43;
528+
529+
// If our scalar is not the original one we hashed, then this scalar has no
530+
// case mapping. It maps to itself.
531+
if (scalar != hashedScalar) {
532+
buffer[0] = scalar;
533+
return;
534+
}
535+
536+
// If the top bit is NOT set, then this scalar simply maps to another scalar.
537+
// We have stored the distance to said scalar in this value.
538+
if ((caseValue & ((__swift_uint64_t)(0x1) << 63)) == 0) {
539+
auto distance = (__swift_int32_t)((caseValue << 1) >> 22);
540+
auto mappedScalar = (__swift_uint32_t)((__swift_int32_t)(scalar) - distance);
541+
542+
buffer[0] = mappedScalar;
543+
return;
544+
}
545+
546+
// Our top bit WAS set which means this scalar maps to multiple scalars.
547+
// Lookup our mapping in the full mph.
548+
auto fullMphIdx = _swift_stdlib_getMphIdx(scalar, CASE_FULL_FOLD_LEVEL_COUNT,
549+
_swift_stdlib_case_full_keys,
550+
_swift_stdlib_case_full_ranks,
551+
_swift_stdlib_case_full_sizes);
552+
553+
auto fullCaseValue = _swift_stdlib_case_full[fullMphIdx];
554+
555+
// Count is either 2 or 3.
556+
auto count = fullCaseValue >> 62;
557+
558+
for (__swift_uint64_t i = 0; i != count; i += 1) {
559+
auto distance = (__swift_int32_t)(fullCaseValue & 0xFFFF);
560+
561+
if ((fullCaseValue & 0x10000) != 0) {
562+
distance = -distance;
563+
}
564+
565+
fullCaseValue >>= 17;
566+
567+
auto mappedScalar = (__swift_uint32_t)((__swift_int32_t)(scalar) - distance);
568+
569+
buffer[i] = mappedScalar;
570+
}
571+
#endif
572+
}

0 commit comments

Comments
 (0)