Skip to content

Commit c5ce63a

Browse files
authored
Merge pull request #20599 from milseman/emoji_tag
[Character] Permit tagged emoji Character literals
2 parents 7537740 + bca1b74 commit c5ce63a

File tree

3 files changed

+17
-15
lines changed

3 files changed

+17
-15
lines changed

include/swift/Basic/Unicode.h

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -54,17 +54,6 @@ extern const uint16_t ExtendedGraphemeClusterNoBoundaryRulesMatrix[];
5454
/// point.
5555
GraphemeClusterBreakProperty getGraphemeClusterBreakProperty(uint32_t C);
5656

57-
/// Returns true if there is always an extended grapheme cluster boundary
58-
/// after a code point with a given property value. Use only for optimization,
59-
/// to skip calculating Grapheme_Cluster_Break property for the second code
60-
/// point.
61-
static inline bool
62-
isExtendedGraphemeClusterBoundaryAfter(GraphemeClusterBreakProperty GCB1) {
63-
auto RuleRow =
64-
ExtendedGraphemeClusterNoBoundaryRulesMatrix[static_cast<unsigned>(GCB1)];
65-
return RuleRow == 0;
66-
}
67-
6857
/// Determine if there is an extended grapheme cluster boundary between code
6958
/// points with given Grapheme_Cluster_Break property values.
7059
static inline bool

lib/Basic/Unicode.cpp

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,22 @@ using namespace swift;
2727
// break between them. That is, whether we're overriding the behavior of the
2828
// hard coded Unicode 8 rules surrounding ZWJ and emoji modifiers.
2929
static inline bool graphemeBreakOverride(llvm::UTF32 lhs, llvm::UTF32 rhs) {
30-
return lhs == 0x200D || (rhs >= 0x1F3FB && rhs <= 0x1F3FF);
30+
// Assume ZWJ sequences produce new emoji
31+
if (lhs == 0x200D) {
32+
return true;
33+
}
34+
35+
// Permit continuing regional indicators
36+
if (rhs >= 0x1F3FB && rhs <= 0x1F3FF) {
37+
return true;
38+
}
39+
40+
// Permit emoji tag sequences
41+
if (rhs >= 0xE0020 && rhs <= 0xE007F) {
42+
return true;
43+
}
44+
45+
return false;
3146
}
3247

3348
StringRef swift::unicode::extractFirstExtendedGraphemeCluster(StringRef S) {
@@ -52,9 +67,6 @@ StringRef swift::unicode::extractFirstExtendedGraphemeCluster(StringRef S) {
5267

5368
GraphemeClusterBreakProperty GCBForC0 = getGraphemeClusterBreakProperty(C[0]);
5469
while (true) {
55-
if (isExtendedGraphemeClusterBoundaryAfter(GCBForC0))
56-
return S.slice(0, SourceNext - SourceStart);
57-
5870
size_t C1Offset = SourceNext - SourceStart;
5971
ConvertUTF8toUTF32(&SourceNext, SourceStart + S.size(), &TargetStart, C + 2,
6072
llvm::lenientConversion);

test/Parse/enum.swift

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -197,6 +197,7 @@ enum RawTypeWithCharacterValues_Correct : Character {
197197
case First = "😅" // ok
198198
case Second = "👩‍👩‍👧‍👦" // ok
199199
case Third = "👋🏽" // ok
200+
case Fourth = "\u{1F3F4}\u{E0067}\u{E0062}\u{E0065}\u{E006E}\u{E0067}\u{E007F}" // ok
200201
}
201202

202203
enum RawTypeWithCharacterValues_Error1 : Character { // expected-error {{'RawTypeWithCharacterValues_Error1' declares raw type 'Character', but does not conform to RawRepresentable and conformance could not be synthesized}}

0 commit comments

Comments
 (0)