Skip to content

Commit 2242a9f

Browse files
silverwindlafriks
authored andcommitted
Improve SHA1 link detection (#6526)
This improves the SHA1 link detection to not pick up extraneous non-whitespace characters at the end of the URL. The '.' is a special case handled in code itself because of missing regexp lookahead support. Regex test cases: https://regex101.com/r/xUMlqh/3
1 parent 0bdd81d commit 2242a9f

File tree

2 files changed

+36
-21
lines changed

2 files changed

+36
-21
lines changed

modules/markup/html.go

Lines changed: 31 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ var (
5454
shortLinkPattern = regexp.MustCompile(`\[\[(.*?)\]\](\w*)`)
5555

5656
// anySHA1Pattern allows to split url containing SHA into parts
57-
anySHA1Pattern = regexp.MustCompile(`https?://(?:\S+/){4}([0-9a-f]{40})/?([^#\s]+)?(?:#(\S+))?`)
57+
anySHA1Pattern = regexp.MustCompile(`https?://(?:\S+/){4}([0-9a-f]{40})(/[^#\s]+)?(#\S+)?`)
5858

5959
validLinksPattern = regexp.MustCompile(`^[a-z][\w-]+://`)
6060

@@ -594,31 +594,46 @@ func fullSha1PatternProcessor(ctx *postProcessCtx, node *html.Node) {
594594
if m == nil {
595595
return
596596
}
597-
// take out what's relevant
597+
598598
urlFull := node.Data[m[0]:m[1]]
599-
hash := node.Data[m[2]:m[3]]
599+
text := base.ShortSha(node.Data[m[2]:m[3]])
600600

601-
var subtree, line string
601+
// 3rd capture group matches a optional path
602+
subpath := ""
603+
if m[5] > 0 {
604+
subpath = node.Data[m[4]:m[5]]
605+
}
602606

603-
// optional, we do them depending on the length.
607+
// 4th capture group matches a optional url hash
608+
hash := ""
604609
if m[7] > 0 {
605-
line = node.Data[m[6]:m[7]]
610+
hash = node.Data[m[6]:m[7]][1:]
606611
}
607-
if m[5] > 0 {
608-
subtree = node.Data[m[4]:m[5]]
612+
613+
start := m[0]
614+
end := m[1]
615+
616+
// If url ends in '.', it's very likely that it is not part of the
617+
// actual url but used to finish a sentence.
618+
if strings.HasSuffix(urlFull, ".") {
619+
end--
620+
urlFull = urlFull[:len(urlFull)-1]
621+
if hash != "" {
622+
hash = hash[:len(hash)-1]
623+
} else if subpath != "" {
624+
subpath = subpath[:len(subpath)-1]
625+
}
609626
}
610627

611-
text := base.ShortSha(hash)
612-
if subtree != "" {
613-
text += "/" + subtree
628+
if subpath != "" {
629+
text += subpath
614630
}
615-
if line != "" {
616-
text += " ("
617-
text += line
618-
text += ")"
631+
632+
if hash != "" {
633+
text += " (" + hash + ")"
619634
}
620635

621-
replaceContent(node, m[0], m[1], createLink(urlFull, text))
636+
replaceContent(node, start, end, createLink(urlFull, text))
622637
}
623638

624639
// sha1CurrentPatternProcessor renders SHA1 strings to corresponding links that

modules/markup/html_internal_test.go

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -273,12 +273,12 @@ func TestRegExp_anySHA1Pattern(t *testing.T) {
273273
testCases := map[string][]string{
274274
"https://github.com/jquery/jquery/blob/a644101ed04d0beacea864ce805e0c4f86ba1cd1/test/unit/event.js#L2703": {
275275
"a644101ed04d0beacea864ce805e0c4f86ba1cd1",
276-
"test/unit/event.js",
277-
"L2703",
276+
"/test/unit/event.js",
277+
"#L2703",
278278
},
279279
"https://github.com/jquery/jquery/blob/a644101ed04d0beacea864ce805e0c4f86ba1cd1/test/unit/event.js": {
280280
"a644101ed04d0beacea864ce805e0c4f86ba1cd1",
281-
"test/unit/event.js",
281+
"/test/unit/event.js",
282282
"",
283283
},
284284
"https://github.com/jquery/jquery/commit/0705be475092aede1eddae01319ec931fb9c65fc": {
@@ -288,13 +288,13 @@ func TestRegExp_anySHA1Pattern(t *testing.T) {
288288
},
289289
"https://github.com/jquery/jquery/tree/0705be475092aede1eddae01319ec931fb9c65fc/src": {
290290
"0705be475092aede1eddae01319ec931fb9c65fc",
291-
"src",
291+
"/src",
292292
"",
293293
},
294294
"https://try.gogs.io/gogs/gogs/commit/d8a994ef243349f321568f9e36d5c3f444b99cae#diff-2": {
295295
"d8a994ef243349f321568f9e36d5c3f444b99cae",
296296
"",
297-
"diff-2",
297+
"#diff-2",
298298
},
299299
}
300300

0 commit comments

Comments
 (0)