Skip to content

Update emoji regex #11584

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
May 29, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions modules/emoji/emoji.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,10 @@
package emoji

import (
"sort"
"strings"
"sync"
"unicode/utf8"
)

// Gemoji is a set of emoji data.
Expand Down Expand Up @@ -48,6 +50,12 @@ func loadMap() {
// process emoji codes and aliases
codePairs := make([]string, 0)
aliasPairs := make([]string, 0)

// sort from largest to small so we match combined emoji first
sort.Slice(GemojiData, func(i, j int) bool {
return len(GemojiData[i].Emoji) > len(GemojiData[j].Emoji)
})

for i, e := range GemojiData {
if e.Emoji == "" || len(e.Aliases) == 0 {
continue
Expand All @@ -72,6 +80,7 @@ func loadMap() {
codeReplacer = strings.NewReplacer(codePairs...)
aliasReplacer = strings.NewReplacer(aliasPairs...)
})

}

// FromCode retrieves the emoji data based on the provided unicode code (ie,
Expand Down Expand Up @@ -117,3 +126,21 @@ func ReplaceAliases(s string) string {
loadMap()
return aliasReplacer.Replace(s)
}

// FindEmojiSubmatchIndex returns index pair of longest emoji in a string
func FindEmojiSubmatchIndex(s string) []int {
loadMap()

// if rune and string length are the same then no emoji will be present
// similar performance when there is unicode present but almost 200% faster when not
if utf8.RuneCountInString(s) == len(s) {
return nil
}
for j := range GemojiData {
i := strings.Index(s, GemojiData[j].Emoji)
if i != -1 {
return []int{i, i + len(GemojiData[j].Emoji)}
}
}
return nil
}
7 changes: 1 addition & 6 deletions modules/markup/html.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,10 +65,6 @@ var (

// EmojiShortCodeRegex find emoji by alias like :smile:
EmojiShortCodeRegex = regexp.MustCompile(`\:[\w\+\-]+\:{1}`)

// find emoji literal: search all emoji hex range as many times as they appear as
// some emojis (skin color etc..) are just two or more chained together
emojiRegex = regexp.MustCompile(`[\x{1F000}-\x{1FFFF}|\x{2000}-\x{32ff}|\x{fe4e5}-\x{fe4ee}|\x{200D}|\x{FE0F}|\x{e0000}-\x{e007f}]+`)
)

// CSS class for action keywords (e.g. "closes: #1")
Expand Down Expand Up @@ -922,8 +918,7 @@ func emojiShortCodeProcessor(ctx *postProcessCtx, node *html.Node) {

// emoji processor to match emoji and add emoji class
func emojiProcessor(ctx *postProcessCtx, node *html.Node) {
m := emojiRegex.FindStringSubmatchIndex(node.Data)

m := emoji.FindEmojiSubmatchIndex(node.Data)
if m == nil {
return
}
Expand Down
4 changes: 3 additions & 1 deletion modules/markup/html_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -263,7 +263,9 @@ func TestRender_emoji(t *testing.T) {
test(
"Some text with :smile: in the middle",
`<p>Some text with <span class="emoji" aria-label="grinning face with smiling eyes">😄</span> in the middle</p>`)

test(
"Some text with 😄😄 2 emoji next to each other",
`<p>Some text with <span class="emoji" aria-label="grinning face with smiling eyes">😄</span><span class="emoji" aria-label="grinning face with smiling eyes">😄</span> 2 emoji next to each other</p>`)
// should match nothing
test(
"2001:0db8:85a3:0000:0000:8a2e:0370:7334",
Expand Down