Skip to content

Commit 0602a44

Browse files
andrew-boyarshinlunny
authored andcommitted
Fix URL handling in the whole markdown module, improve test coverage (#1027)
Amended with string to bool change in API SDK. Signed-off-by: Andrew Boyarshin <[email protected]>
1 parent 12e71e5 commit 0602a44

File tree

6 files changed

+276
-155
lines changed

6 files changed

+276
-155
lines changed

models/mail.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,7 @@ func composeTplData(subject, body, link string) map[string]interface{} {
150150

151151
func composeIssueMessage(issue *Issue, doer *User, tplName base.TplName, tos []string, info string) *mailer.Message {
152152
subject := issue.mailSubject()
153-
body := string(markdown.RenderSpecialLink([]byte(issue.Content), issue.Repo.HTMLURL(), issue.Repo.ComposeMetas()))
153+
body := string(markdown.RenderString(issue.Content, issue.Repo.HTMLURL(), issue.Repo.ComposeMetas()))
154154
data := composeTplData(subject, body, issue.HTMLURL())
155155
data["Doer"] = doer
156156

modules/markdown/markdown.go

Lines changed: 76 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -92,10 +92,10 @@ var (
9292
ShortLinkPattern = regexp.MustCompile(`(\[\[.*\]\]\w*)`)
9393

9494
// AnySHA1Pattern allows to split url containing SHA into parts
95-
AnySHA1Pattern = regexp.MustCompile(`http\S+//(\S+)/(\S+)/(\S+)/(\S+)/([0-9a-f]{40})(?:/?([^#\s]+)?(?:#(\S+))?)?`)
95+
AnySHA1Pattern = regexp.MustCompile(`(http\S*)://(\S+)/(\S+)/(\S+)/(\S+)/([0-9a-f]{40})(?:/?([^#\s]+)?(?:#(\S+))?)?`)
9696

9797
// IssueFullPattern allows to split issue (and pull) URLs into parts
98-
IssueFullPattern = regexp.MustCompile(`(?:^|\s|\()http\S+//((?:[^\s/]+/)+)((?:\w{1,10}-)?[1-9][0-9]*)([\?|#]\S+.(\S+)?)?\b`)
98+
IssueFullPattern = regexp.MustCompile(`(?:^|\s|\()(http\S*)://((?:[^\s/]+/)+)((?:\w{1,10}-)?[1-9][0-9]*)([\?|#]\S+.(\S+)?)?\b`)
9999

100100
validLinksPattern = regexp.MustCompile(`^[a-z][\w-]+://`)
101101
)
@@ -126,10 +126,11 @@ type Renderer struct {
126126
func (r *Renderer) Link(out *bytes.Buffer, link []byte, title []byte, content []byte) {
127127
if len(link) > 0 && !isLink(link) {
128128
if link[0] != '#' {
129-
mLink := URLJoin(r.urlPrefix, string(link))
129+
lnk := string(link)
130130
if r.isUncycloMarkdown {
131-
mLink = URLJoin(r.urlPrefix, "wiki", string(link))
131+
lnk = URLJoin("wiki", lnk)
132132
}
133+
mLink := URLJoin(r.urlPrefix, lnk)
133134
link = []byte(mLink)
134135
}
135136
}
@@ -206,12 +207,10 @@ func (r *Renderer) Image(out *bytes.Buffer, link []byte, title []byte, alt []byt
206207
return
207208
}
208209
} else {
209-
if link[0] != '/' {
210-
if !strings.HasSuffix(prefix, "/") {
211-
prefix += "/"
212-
}
213-
}
214-
link = []byte(url.QueryEscape(prefix + string(link)))
210+
lnk := string(link)
211+
lnk = URLJoin(prefix, lnk)
212+
lnk = strings.Replace(lnk, " ", "+", -1)
213+
link = []byte(lnk)
215214
}
216215
}
217216

@@ -246,10 +245,30 @@ func URLJoin(elem ...string) string {
246245
last := len(elem) - 1
247246
for i, item := range elem {
248247
res += item
249-
if !strings.HasSuffix(res, "/") && i != last {
248+
if i != last && !strings.HasSuffix(res, "/") {
250249
res += "/"
251250
}
252251
}
252+
cwdIndex := strings.Index(res, "/./")
253+
for cwdIndex != -1 {
254+
res = strings.Replace(res, "/./", "/", 1)
255+
cwdIndex = strings.Index(res, "/./")
256+
}
257+
upIndex := strings.Index(res, "/..")
258+
for upIndex != -1 {
259+
res = strings.Replace(res, "/..", "", 1)
260+
prevStart := -1
261+
for i := upIndex - 1; i >= 0; i-- {
262+
if res[i] == '/' {
263+
prevStart = i
264+
break
265+
}
266+
}
267+
if prevStart != -1 {
268+
res = res[:prevStart] + res[upIndex:]
269+
}
270+
upIndex = strings.Index(res, "/..")
271+
}
253272
return res
254273
}
255274

@@ -286,6 +305,9 @@ func RenderIssueIndexPattern(rawBytes []byte, urlPrefix string, metas map[string
286305

287306
// IsSameDomain checks if given url string has the same hostname as current Gitea instance
288307
func IsSameDomain(s string) bool {
308+
if strings.HasPrefix(s, "/") {
309+
return true
310+
}
289311
if uapp, err := url.Parse(setting.AppURL); err == nil {
290312
if u, err := url.Parse(s); err == nil {
291313
return u.Host == uapp.Host
@@ -300,26 +322,27 @@ func renderFullSha1Pattern(rawBytes []byte, urlPrefix string) []byte {
300322
ms := AnySHA1Pattern.FindAllSubmatch(rawBytes, -1)
301323
for _, m := range ms {
302324
all := m[0]
303-
paths := string(m[1])
304-
var path = "//" + paths
305-
author := string(m[2])
306-
repoName := string(m[3])
325+
protocol := string(m[1])
326+
paths := string(m[2])
327+
path := protocol + "://" + paths
328+
author := string(m[3])
329+
repoName := string(m[4])
307330
path = URLJoin(path, author, repoName)
308331
ltype := "src"
309-
itemType := m[4]
332+
itemType := m[5]
310333
if IsSameDomain(paths) {
311334
ltype = string(itemType)
312335
} else if string(itemType) == "commit" {
313336
ltype = "commit"
314337
}
315-
sha := m[5]
338+
sha := m[6]
316339
var subtree string
317-
if len(m) > 6 && len(m[6]) > 0 {
318-
subtree = string(m[6])
340+
if len(m) > 7 && len(m[7]) > 0 {
341+
subtree = string(m[7])
319342
}
320343
var line []byte
321-
if len(m) > 7 && len(m[7]) > 0 {
322-
line = m[7]
344+
if len(m) > 8 && len(m[8]) > 0 {
345+
line = m[8]
323346
}
324347
urlSuffix := ""
325348
text := base.ShortSha(string(sha))
@@ -346,23 +369,18 @@ func renderFullIssuePattern(rawBytes []byte, urlPrefix string) []byte {
346369
ms := IssueFullPattern.FindAllSubmatch(rawBytes, -1)
347370
for _, m := range ms {
348371
all := m[0]
349-
paths := bytes.Split(m[1], []byte("/"))
372+
protocol := string(m[1])
373+
paths := bytes.Split(m[2], []byte("/"))
350374
paths = paths[:len(paths)-1]
351375
if bytes.HasPrefix(paths[0], []byte("gist.")) {
352376
continue
353377
}
354-
var path string
355-
if len(paths) > 3 {
356-
// Internal one
357-
path = URLJoin(urlPrefix, "issues")
358-
} else {
359-
path = "//" + string(m[1])
360-
}
361-
id := string(m[2])
378+
path := protocol + "://" + string(m[2])
379+
id := string(m[3])
362380
path = URLJoin(path, id)
363381
var comment []byte
364382
if len(m) > 3 {
365-
comment = m[3]
383+
comment = m[4]
366384
}
367385
urlSuffix := ""
368386
text := "#" + id
@@ -394,8 +412,13 @@ func lastIndexOfByte(sl []byte, target byte) int {
394412
return -1
395413
}
396414

397-
// renderShortLinks processes [[syntax]]
398-
func renderShortLinks(rawBytes []byte, urlPrefix string, noLink bool) []byte {
415+
// RenderShortLinks processes [[syntax]]
416+
//
417+
// noLink flag disables making link tags when set to true
418+
// so this function just replaces the whole [[...]] with the content text
419+
//
420+
// isUncycloMarkdown is a flag to choose linking url prefix
421+
func RenderShortLinks(rawBytes []byte, urlPrefix string, noLink bool, isUncycloMarkdown bool) []byte {
399422
ms := ShortLinkPattern.FindAll(rawBytes, -1)
400423
for _, m := range ms {
401424
orig := bytes.TrimSpace(m)
@@ -482,11 +505,17 @@ func renderShortLinks(rawBytes []byte, urlPrefix string, noLink bool) []byte {
482505
}
483506
absoluteLink := isLink([]byte(link))
484507
if !absoluteLink {
485-
link = url.QueryEscape(link)
508+
link = strings.Replace(link, " ", "+", -1)
486509
}
487510
if image {
488511
if !absoluteLink {
489-
link = URLJoin(urlPrefix, "wiki", "raw", link)
512+
if IsSameDomain(urlPrefix) {
513+
urlPrefix = strings.Replace(urlPrefix, "/src/", "/raw/", 1)
514+
}
515+
if isUncycloMarkdown {
516+
link = URLJoin("wiki", "raw", link)
517+
}
518+
link = URLJoin(urlPrefix, link)
490519
}
491520
title := props["title"]
492521
if title == "" {
@@ -504,7 +533,10 @@ func renderShortLinks(rawBytes []byte, urlPrefix string, noLink bool) []byte {
504533
}
505534
name = fmt.Sprintf(`<img src="%s" %s title="%s" />`, link, alt, title)
506535
} else if !absoluteLink {
507-
link = URLJoin(urlPrefix, "wiki", link)
536+
if isUncycloMarkdown {
537+
link = URLJoin("wiki", link)
538+
}
539+
link = URLJoin(urlPrefix, link)
508540
}
509541
if noLink {
510542
rawBytes = bytes.Replace(rawBytes, orig, []byte(name), -1)
@@ -527,7 +559,7 @@ func RenderCrossReferenceIssueIndexPattern(rawBytes []byte, urlPrefix string, me
527559
repo := string(bytes.Split(m, []byte("#"))[0])
528560
issue := string(bytes.Split(m, []byte("#"))[1])
529561

530-
link := fmt.Sprintf(`<a href="%s">%s</a>`, URLJoin(urlPrefix, repo, "issues", issue), m)
562+
link := fmt.Sprintf(`<a href="%s">%s</a>`, URLJoin(setting.AppURL, repo, "issues", issue), m)
531563
rawBytes = bytes.Replace(rawBytes, m, []byte(link), 1)
532564
}
533565
return rawBytes
@@ -548,15 +580,15 @@ func renderSha1CurrentPattern(rawBytes []byte, urlPrefix string) []byte {
548580
}
549581

550582
// RenderSpecialLink renders mentions, indexes and SHA1 strings to corresponding links.
551-
func RenderSpecialLink(rawBytes []byte, urlPrefix string, metas map[string]string) []byte {
583+
func RenderSpecialLink(rawBytes []byte, urlPrefix string, metas map[string]string, isUncycloMarkdown bool) []byte {
552584
ms := MentionPattern.FindAll(rawBytes, -1)
553585
for _, m := range ms {
554586
m = m[bytes.Index(m, []byte("@")):]
555587
rawBytes = bytes.Replace(rawBytes, m,
556588
[]byte(fmt.Sprintf(`<a href="%s">%s</a>`, URLJoin(setting.AppURL, string(m[1:])), m)), -1)
557589
}
558590

559-
rawBytes = renderShortLinks(rawBytes, urlPrefix, false)
591+
rawBytes = RenderShortLinks(rawBytes, urlPrefix, false, isUncycloMarkdown)
560592
rawBytes = RenderIssueIndexPattern(rawBytes, urlPrefix, metas)
561593
rawBytes = RenderCrossReferenceIssueIndexPattern(rawBytes, urlPrefix, metas)
562594
rawBytes = renderFullSha1Pattern(rawBytes, urlPrefix)
@@ -601,7 +633,7 @@ var noEndTags = []string{"img", "input", "br", "hr"}
601633

602634
// PostProcess treats different types of HTML differently,
603635
// and only renders special links for plain text blocks.
604-
func PostProcess(rawHTML []byte, urlPrefix string, metas map[string]string) []byte {
636+
func PostProcess(rawHTML []byte, urlPrefix string, metas map[string]string, isUncycloMarkdown bool) []byte {
605637
startTags := make([]string, 0, 5)
606638
var buf bytes.Buffer
607639
tokenizer := html.NewTokenizer(bytes.NewReader(rawHTML))
@@ -611,7 +643,7 @@ OUTER_LOOP:
611643
token := tokenizer.Token()
612644
switch token.Type {
613645
case html.TextToken:
614-
buf.Write(RenderSpecialLink([]byte(token.String()), urlPrefix, metas))
646+
buf.Write(RenderSpecialLink([]byte(token.String()), urlPrefix, metas, isUncycloMarkdown))
615647

616648
case html.StartTagToken:
617649
buf.WriteString(token.String())
@@ -623,7 +655,7 @@ OUTER_LOOP:
623655
token = tokenizer.Token()
624656

625657
// Copy the token to the output verbatim
626-
buf.Write(renderShortLinks([]byte(token.String()), urlPrefix, true))
658+
buf.Write(RenderShortLinks([]byte(token.String()), urlPrefix, true, isUncycloMarkdown))
627659

628660
if token.Type == html.StartTagToken {
629661
if !com.IsSliceContainsStr(noEndTags, token.Data) {
@@ -673,9 +705,9 @@ OUTER_LOOP:
673705

674706
// Render renders Markdown to HTML with all specific handling stuff.
675707
func render(rawBytes []byte, urlPrefix string, metas map[string]string, isUncycloMarkdown bool) []byte {
676-
urlPrefix = strings.Replace(urlPrefix, " ", "%20", -1)
708+
urlPrefix = strings.Replace(urlPrefix, " ", "+", -1)
677709
result := RenderRaw(rawBytes, urlPrefix, isUncycloMarkdown)
678-
result = PostProcess(result, urlPrefix, metas)
710+
result = PostProcess(result, urlPrefix, metas, isUncycloMarkdown)
679711
result = Sanitizer.SanitizeBytes(result)
680712
return result
681713
}

0 commit comments

Comments
 (0)