Skip to content

Commit 8f658e1

Browse files
committed
Merge pull request #239 from glessard/unicode-prefix-suffix
Unicode-compatible (mostly) hasPrefix and hasSuffix
2 parents dd8951e + 805d6c5 commit 8f658e1

File tree

2 files changed

+218
-35
lines changed

2 files changed

+218
-35
lines changed

Foundation/String.swift

Lines changed: 21 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1579,43 +1579,30 @@ extension String {
15791579
) != nil
15801580
return r
15811581
}
1582-
1583-
#if os(Linux)
1582+
}
1583+
1584+
#if !_runtime(_ObjC)
1585+
import CoreFoundation
1586+
1587+
extension String {
15841588
public func hasPrefix(prefix: String) -> Bool {
1585-
let characters = utf16
1586-
let prefixCharacters = prefix.utf16
1587-
let start = characters.startIndex
1588-
let prefixStart = prefixCharacters.startIndex
1589-
if characters.count < prefixCharacters.count {
1590-
return false
1591-
}
1592-
for idx in 0..<prefixCharacters.count {
1593-
if characters[start.advancedBy(idx)] != prefixCharacters[prefixStart.advancedBy(idx)] {
1594-
return false
1595-
}
1596-
}
1597-
return true
1589+
let cfstring = self._cfObject
1590+
let range = CFRangeMake(0, CFStringGetLength(cfstring))
1591+
let opts = CFStringCompareFlags(
1592+
kCFCompareAnchored | kCFCompareNonliteral)
1593+
1594+
return CFStringFindWithOptions(cfstring, prefix._cfObject,
1595+
range, opts, nil)
15981596
}
15991597

16001598
public func hasSuffix(suffix: String) -> Bool {
1601-
let characters = utf16
1602-
let suffixCharacters = suffix.utf16
1603-
let start = characters.startIndex
1604-
let suffixStart = suffixCharacters.startIndex
1605-
1606-
if characters.count < suffixCharacters.count {
1607-
return false
1608-
}
1609-
for idx in 0..<suffixCharacters.count {
1610-
let charactersIdx = start.advancedBy(characters.count - idx - 1)
1611-
let suffixIdx = suffixStart.advancedBy(suffixCharacters.count - idx - 1)
1612-
if characters[charactersIdx] != suffixCharacters[suffixIdx] {
1613-
return false
1614-
}
1615-
}
1616-
return true
1599+
let cfstring = self._cfObject
1600+
let range = CFRangeMake(0, CFStringGetLength(cfstring))
1601+
let opts = CFStringCompareFlags(
1602+
kCFCompareAnchored | kCFCompareBackwards | kCFCompareNonliteral)
1603+
1604+
return CFStringFindWithOptions(cfstring, suffix._cfObject,
1605+
range, opts, nil)
16171606
}
1618-
#endif
16191607
}
1620-
1621-
1608+
#endif

TestFoundation/TestNSString.swift

Lines changed: 197 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,8 @@ class TestNSString : XCTestCase {
7777
("test_stringByExpandingTildeInPath", test_stringByExpandingTildeInPath),
7878
("test_stringByStandardizingPath", test_stringByStandardizingPath),
7979
("test_ExternalRepresentation", test_ExternalRepresentation),
80-
("test_mutableStringConstructor", test_mutableStringConstructor)
80+
("test_mutableStringConstructor", test_mutableStringConstructor),
81+
("test_PrefixSuffix", test_PrefixSuffix),
8182
]
8283
}
8384

@@ -864,3 +865,198 @@ class TestNSString : XCTestCase {
864865
XCTAssertEqual(mutableString, "Test")
865866
}
866867
}
868+
869+
struct ComparisonTest {
870+
let lhs: String
871+
let rhs: String
872+
let loc: UInt
873+
let reason: String
874+
875+
var xfail: Bool {
876+
return !reason.isEmpty
877+
}
878+
879+
init(
880+
_ lhs: String, _ rhs: String,
881+
reason: String = "", line: UInt = __LINE__
882+
) {
883+
self.lhs = lhs
884+
self.rhs = rhs
885+
self.reason = reason
886+
self.loc = line
887+
}
888+
}
889+
890+
let comparisonTests = [
891+
ComparisonTest("", ""),
892+
ComparisonTest("", "a"),
893+
894+
// ASCII cases
895+
ComparisonTest("t", "tt"),
896+
ComparisonTest("t", "Tt"),
897+
ComparisonTest("\u{0}", ""),
898+
ComparisonTest("\u{0}", "\u{0}",
899+
reason: "https://bugs.swift.org/browse/SR-332"),
900+
ComparisonTest("\r\n", "t"),
901+
ComparisonTest("\r\n", "\n",
902+
reason: "blocked on rdar://problem/19036555"),
903+
ComparisonTest("\u{0}", "\u{0}\u{0}",
904+
reason: "rdar://problem/19034601"),
905+
906+
// Whitespace
907+
// U+000A LINE FEED (LF)
908+
// U+000B LINE TABULATION
909+
// U+000C FORM FEED (FF)
910+
// U+0085 NEXT LINE (NEL)
911+
// U+2028 LINE SEPARATOR
912+
// U+2029 PARAGRAPH SEPARATOR
913+
ComparisonTest("\u{0085}", "\n"),
914+
ComparisonTest("\u{000b}", "\n"),
915+
ComparisonTest("\u{000c}", "\n"),
916+
ComparisonTest("\u{2028}", "\n"),
917+
ComparisonTest("\u{2029}", "\n"),
918+
ComparisonTest("\r\n\r\n", "\r\n"),
919+
920+
// U+0301 COMBINING ACUTE ACCENT
921+
// U+00E1 LATIN SMALL LETTER A WITH ACUTE
922+
ComparisonTest("a\u{301}", "\u{e1}"),
923+
ComparisonTest("a", "a\u{301}"),
924+
ComparisonTest("a", "\u{e1}"),
925+
926+
// U+304B HIRAGANA LETTER KA
927+
// U+304C HIRAGANA LETTER GA
928+
// U+3099 COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK
929+
ComparisonTest("\u{304b}", "\u{304b}"),
930+
ComparisonTest("\u{304c}", "\u{304c}"),
931+
ComparisonTest("\u{304b}", "\u{304c}"),
932+
ComparisonTest("\u{304b}", "\u{304c}\u{3099}"),
933+
ComparisonTest("\u{304c}", "\u{304b}\u{3099}"),
934+
ComparisonTest("\u{304c}", "\u{304c}\u{3099}"),
935+
936+
// U+212B ANGSTROM SIGN
937+
// U+030A COMBINING RING ABOVE
938+
// U+00C5 LATIN CAPITAL LETTER A WITH RING ABOVE
939+
ComparisonTest("\u{212b}", "A\u{30a}"),
940+
ComparisonTest("\u{212b}", "\u{c5}"),
941+
ComparisonTest("A\u{30a}", "\u{c5}"),
942+
ComparisonTest("A\u{30a}", "a"),
943+
ComparisonTest("A", "A\u{30a}"),
944+
945+
// U+2126 OHM SIGN
946+
// U+03A9 GREEK CAPITAL LETTER OMEGA
947+
ComparisonTest("\u{2126}", "\u{03a9}"),
948+
949+
// U+0323 COMBINING DOT BELOW
950+
// U+0307 COMBINING DOT ABOVE
951+
// U+1E63 LATIN SMALL LETTER S WITH DOT BELOW
952+
// U+1E69 LATIN SMALL LETTER S WITH DOT BELOW AND DOT ABOVE
953+
ComparisonTest("\u{1e69}", "s\u{323}\u{307}"),
954+
ComparisonTest("\u{1e69}", "s\u{307}\u{323}"),
955+
ComparisonTest("\u{1e69}", "\u{1e63}\u{307}"),
956+
ComparisonTest("\u{1e63}", "s\u{323}"),
957+
ComparisonTest("\u{1e63}\u{307}", "s\u{323}\u{307}"),
958+
ComparisonTest("\u{1e63}\u{307}", "s\u{307}\u{323}"),
959+
ComparisonTest("s\u{323}", "\u{1e69}"),
960+
961+
// U+FB01 LATIN SMALL LIGATURE FI
962+
ComparisonTest("\u{fb01}", "\u{fb01}"),
963+
ComparisonTest("fi", "\u{fb01}"),
964+
965+
// U+1F1E7 REGIONAL INDICATOR SYMBOL LETTER B
966+
// \u{1F1E7}\u{1F1E7} Flag of Barbados
967+
ComparisonTest("\u{1F1E7}", "\u{1F1E7}\u{1F1E7}",
968+
reason: "https://bugs.swift.org/browse/SR-367"),
969+
970+
// Test that Unicode collation is performed in deterministic mode.
971+
//
972+
// U+0301 COMBINING ACUTE ACCENT
973+
// U+0341 COMBINING ACUTE TONE MARK
974+
// U+0954 DEVANAGARI ACUTE ACCENT
975+
//
976+
// Collation elements from DUCET:
977+
// 0301 ; [.0000.0024.0002] # COMBINING ACUTE ACCENT
978+
// 0341 ; [.0000.0024.0002] # COMBINING ACUTE TONE MARK
979+
// 0954 ; [.0000.0024.0002] # DEVANAGARI ACUTE ACCENT
980+
//
981+
// U+0301 and U+0954 don't decompose in the canonical decomposition mapping.
982+
// U+0341 has a canonical decomposition mapping of U+0301.
983+
ComparisonTest("\u{0301}", "\u{0341}",
984+
reason: "https://bugs.swift.org/browse/SR-243"),
985+
ComparisonTest("\u{0301}", "\u{0954}"),
986+
ComparisonTest("\u{0341}", "\u{0954}"),
987+
]
988+
989+
enum Stack: ErrorType {
990+
case Stack([UInt])
991+
}
992+
993+
func checkHasPrefixHasSuffix(lhs: String, _ rhs: String, _ stack: [UInt]) -> Int {
994+
if lhs == "" {
995+
var failures = 0
996+
failures += lhs.hasPrefix(rhs) ? 1 : 0
997+
failures += lhs.hasSuffix(rhs) ? 1 : 0
998+
return failures
999+
}
1000+
if rhs == "" {
1001+
var failures = 0
1002+
failures += lhs.hasPrefix(rhs) ? 1 : 0
1003+
failures += lhs.hasSuffix(rhs) ? 1 : 0
1004+
return failures
1005+
}
1006+
1007+
// To determine the expected results, compare grapheme clusters,
1008+
// scalar-to-scalar, of the NFD form of the strings.
1009+
let lhsNFDGraphemeClusters =
1010+
lhs.decomposedStringWithCanonicalMapping.characters.map {
1011+
Array(String($0).unicodeScalars)
1012+
}
1013+
let rhsNFDGraphemeClusters =
1014+
rhs.decomposedStringWithCanonicalMapping.characters.map {
1015+
Array(String($0).unicodeScalars)
1016+
}
1017+
let expectHasPrefix = lhsNFDGraphemeClusters.startsWith(
1018+
rhsNFDGraphemeClusters, isEquivalent: (==))
1019+
let expectHasSuffix =
1020+
lhsNFDGraphemeClusters.lazy.reverse().startsWith(
1021+
rhsNFDGraphemeClusters.lazy.reverse(), isEquivalent: (==))
1022+
1023+
func testFailure(lhs: Bool, _ rhs: Bool, _ stack: [UInt]) -> Int {
1024+
guard lhs == rhs else {
1025+
// print(stack)
1026+
return 1
1027+
}
1028+
return 0
1029+
}
1030+
1031+
var failures = 0
1032+
failures += testFailure(expectHasPrefix, lhs.hasPrefix(rhs), stack + [__LINE__])
1033+
failures += testFailure(expectHasSuffix, lhs.hasSuffix(rhs), stack + [__LINE__])
1034+
return failures
1035+
}
1036+
1037+
extension TestNSString {
1038+
func test_PrefixSuffix() {
1039+
#if !_runtime(_ObjC)
1040+
for test in comparisonTests {
1041+
var failures = 0
1042+
failures += checkHasPrefixHasSuffix(test.lhs, test.rhs, [test.loc, __LINE__])
1043+
failures += checkHasPrefixHasSuffix(test.rhs, test.lhs, [test.loc, __LINE__])
1044+
1045+
let fragment = "abc"
1046+
let combiner = "\u{0301}"
1047+
1048+
failures += checkHasPrefixHasSuffix(test.lhs + fragment, test.rhs, [test.loc, __LINE__])
1049+
failures += checkHasPrefixHasSuffix(fragment + test.lhs, test.rhs, [test.loc, __LINE__])
1050+
failures += checkHasPrefixHasSuffix(test.lhs + combiner, test.rhs, [test.loc, __LINE__])
1051+
failures += checkHasPrefixHasSuffix(combiner + test.lhs, test.rhs, [test.loc, __LINE__])
1052+
1053+
let fail = (failures > 0)
1054+
if fail {
1055+
// print("Prefix/Suffix case \(test.loc): \(failures) failures")
1056+
// print("Failures were\(test.xfail ? "" : " not") expected")
1057+
}
1058+
XCTAssert(test.xfail == fail, "Unexpected \(test.xfail ?"success":"failure"): \(test.loc)")
1059+
}
1060+
#endif
1061+
}
1062+
}

0 commit comments

Comments
 (0)