Skip to content

Commit 0ad4083

Browse files
Update emoji dataset with skin tone variants (go-gitea#11678) (go-gitea#11763)
* Update emoji dataset with skin tone variants Since the format of emoji that support skin tone modifiers is predictable we can add different variants into our dataset when generating it so that we can match and properly style most skin tone variants of emoji. No real code change here other than what generates the dataset and the data itself. * use escape unicode sequence in map Co-authored-by: techknowlogick <[email protected]> Co-authored-by: techknowlogick <[email protected]>
1 parent 99058de commit 0ad4083

File tree

4 files changed

+3155
-1742
lines changed

4 files changed

+3155
-1742
lines changed

assets/emoji.json

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

build/generate-emoji.go

Lines changed: 51 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ import (
1919
"sort"
2020
"strconv"
2121
"strings"
22+
"unicode/utf8"
2223
)
2324

2425
const (
@@ -39,6 +40,7 @@ type Emoji struct {
3940
Description string `json:"description,omitempty"`
4041
Aliases []string `json:"aliases"`
4142
UnicodeVersion string `json:"unicode_version,omitempty"`
43+
SkinTones bool `json:"skin_tones,omitempty"`
4244
}
4345

4446
// Don't include some fields in JSON
@@ -47,6 +49,7 @@ func (e Emoji) MarshalJSON() ([]byte, error) {
4749
x := emoji(e)
4850
x.UnicodeVersion = ""
4951
x.Description = ""
52+
x.SkinTones = false
5053
return json.Marshal(x)
5154
}
5255

@@ -75,6 +78,7 @@ var replacer = strings.NewReplacer(
7578
", Description:", ", ",
7679
", Aliases:", ", ",
7780
", UnicodeVersion:", ", ",
81+
", SkinTones:", ", ",
7882
)
7983

8084
var emojiRE = regexp.MustCompile(`\{Emoji:"([^"]*)"`)
@@ -102,18 +106,20 @@ func generate() ([]byte, error) {
102106
return nil, err
103107
}
104108

105-
var re = regexp.MustCompile(`keycap|registered|copyright`)
106-
tmp := data[:0]
109+
var skinTones = make(map[string]string)
107110

108-
// filter out emoji that require greater than max unicode version
111+
skinTones["\U0001f3fb"] = "Light Skin Tone"
112+
skinTones["\U0001f3fc"] = "Medium-Light Skin Tone"
113+
skinTones["\U0001f3fd"] = "Medium Skin Tone"
114+
skinTones["\U0001f3fe"] = "Medium-Dark Skin Tone"
115+
skinTones["\U0001f3ff"] = "Dark Skin Tone"
116+
117+
var tmp Gemoji
118+
119+
//filter out emoji that require greater than max unicode version
109120
for i := range data {
110121
val, _ := strconv.ParseFloat(data[i].UnicodeVersion, 64)
111122
if int(val) <= maxUnicodeVersion {
112-
// remove these keycaps for now they really complicate matching since
113-
// they include normal letters in them
114-
if re.MatchString(data[i].Description) {
115-
continue
116-
}
117123
tmp = append(tmp, data[i])
118124
}
119125
}
@@ -123,7 +129,6 @@ func generate() ([]byte, error) {
123129
return data[i].Aliases[0] < data[j].Aliases[0]
124130
})
125131

126-
aliasPairs := make([]string, 0)
127132
aliasMap := make(map[string]int, len(data))
128133

129134
for i, e := range data {
@@ -135,7 +140,6 @@ func generate() ([]byte, error) {
135140
continue
136141
}
137142
aliasMap[a] = i
138-
aliasPairs = append(aliasPairs, ":"+a+":", e.Emoji)
139143
}
140144
}
141145

@@ -149,6 +153,43 @@ func generate() ([]byte, error) {
149153
data[i].Aliases = append(data[i].Aliases, "laugh")
150154
}
151155

156+
// write a JSON file to use with tribute (write before adding skin tones since we can't support them there yet)
157+
file, _ := json.Marshal(data)
158+
_ = ioutil.WriteFile("assets/emoji.json", file, 0644)
159+
160+
// Add skin tones to emoji that support it
161+
var (
162+
s []string
163+
newEmoji string
164+
newDescription string
165+
newData Emoji
166+
)
167+
168+
for i := range data {
169+
if data[i].SkinTones {
170+
for k, v := range skinTones {
171+
s = strings.Split(data[i].Emoji, "")
172+
173+
if utf8.RuneCountInString(data[i].Emoji) == 1 {
174+
s = append(s, k)
175+
} else {
176+
// insert into slice after first element because all emoji that support skin tones
177+
// have that modifer placed at this spot
178+
s = append(s, "")
179+
copy(s[2:], s[1:])
180+
s[1] = k
181+
}
182+
183+
newEmoji = strings.Join(s, "")
184+
newDescription = data[i].Description + ": " + v
185+
newAlias := data[i].Aliases[0] + "_" + strings.ReplaceAll(v, " ", "_")
186+
187+
newData = Emoji{newEmoji, newDescription, []string{newAlias}, "12.0", false}
188+
data = append(data, newData)
189+
}
190+
}
191+
}
192+
152193
// add header
153194
str := replacer.Replace(fmt.Sprintf(hdr, gemojiURL, data))
154195

@@ -162,10 +203,6 @@ func generate() ([]byte, error) {
162203
return "{" + strconv.QuoteToASCII(s)
163204
})
164205

165-
// write a JSON file to use with tribute
166-
file, _ := json.Marshal(data)
167-
_ = ioutil.WriteFile("assets/emoji.json", file, 0644)
168-
169206
// format
170207
return format.Source([]byte(str))
171208
}

modules/emoji/emoji.go

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@ import (
99
"sort"
1010
"strings"
1111
"sync"
12-
"unicode/utf8"
1312
)
1413

1514
// Gemoji is a set of emoji data.
@@ -21,6 +20,7 @@ type Emoji struct {
2120
Description string
2221
Aliases []string
2322
UnicodeVersion string
23+
SkinTones bool
2424
}
2525

2626
var (
@@ -131,11 +131,12 @@ func ReplaceAliases(s string) string {
131131
func FindEmojiSubmatchIndex(s string) []int {
132132
loadMap()
133133

134-
// if rune and string length are the same then no emoji will be present
135-
// similar performance when there is unicode present but almost 200% faster when not
136-
if utf8.RuneCountInString(s) == len(s) {
134+
//see if there are any emoji in string before looking for position of specific ones
135+
//no performance difference when there is a match but 10x faster when there are not
136+
if s == ReplaceCodes(s) {
137137
return nil
138138
}
139+
139140
for j := range GemojiData {
140141
i := strings.Index(s, GemojiData[j].Emoji)
141142
if i != -1 {

0 commit comments

Comments
 (0)