Skip to content

Commit d7e0983

Browse files
authored
Alternative fix for HTML diff entity split (#13425)
* Alternative fix for HTML diff entity split This commit both reverts PR #13357 and uses the exiting implementation alredy used for spans to fix the same issue. That PR duplicates most of logic that is already present elsewhere and still was failing for some cases. This should be simpler as it uses the existing logic that already works for <span>s being split apart. Added both test cases as well. * Update gitdiff_test.go * fmt * entity can have uppercase letter, also add detailed comment per @zeripath
1 parent dd882f6 commit d7e0983

File tree

2 files changed

+45
-114
lines changed

2 files changed

+45
-114
lines changed

services/gitdiff/gitdiff.go

Lines changed: 30 additions & 84 deletions
Original file line numberDiff line numberDiff line change
@@ -182,6 +182,7 @@ var (
182182
codeTagSuffix = []byte(`</span>`)
183183
)
184184
var trailingSpanRegex = regexp.MustCompile(`<span\s*[[:alpha:]="]*?[>]?$`)
185+
var entityRegex = regexp.MustCompile(`&[#]*?[0-9[:alpha:]]*$`)
185186

186187
// shouldWriteInline represents combinations where we manually write inline changes
187188
func shouldWriteInline(diff diffmatchpatch.Diff, lineType DiffLineType) bool {
@@ -205,14 +206,40 @@ func diffToHTML(fileName string, diffs []diffmatchpatch.Diff, lineType DiffLineT
205206
match = ""
206207
}
207208
// Chroma HTML syntax highlighting is done before diffing individual lines in order to maintain consistency.
208-
// Since inline changes might split in the middle of a chroma span tag, make we manually put it back together
209-
// before writing so we don't try insert added/removed code spans in the middle of an existing chroma span
210-
// and create broken HTML.
209+
// Since inline changes might split in the middle of a chroma span tag or HTML entity, make we manually put it back together
210+
// before writing so we don't try insert added/removed code spans in the middle of one of those
211+
// and create broken HTML. This is done by moving incomplete HTML forward until it no longer matches our pattern of
212+
// a line ending with an incomplete HTML entity or partial/opening <span>.
213+
214+
// EX:
215+
// diffs[{Type: dmp.DiffDelete, Text: "language</span><span "},
216+
// {Type: dmp.DiffEqual, Text: "c"},
217+
// {Type: dmp.DiffDelete, Text: "lass="p">}]
218+
219+
// After first iteration
220+
// diffs[{Type: dmp.DiffDelete, Text: "language</span>"}, //write out
221+
// {Type: dmp.DiffEqual, Text: "<span c"},
222+
// {Type: dmp.DiffDelete, Text: "lass="p">,</span>}]
223+
224+
// After second iteration
225+
// {Type: dmp.DiffEqual, Text: ""}, // write out
226+
// {Type: dmp.DiffDelete, Text: "<span class="p">,</span>}]
227+
228+
// Final
229+
// {Type: dmp.DiffDelete, Text: "<span class="p">,</span>}]
230+
// end up writing <span class="removed-code"><span class="p">,</span></span>
231+
// Instead of <span class="removed-code">lass="p",</span></span>
232+
211233
m := trailingSpanRegex.FindStringSubmatchIndex(diff.Text)
212234
if m != nil {
213235
match = diff.Text[m[0]:m[1]]
214236
diff.Text = strings.TrimSuffix(diff.Text, match)
215237
}
238+
m = entityRegex.FindStringSubmatchIndex(diff.Text)
239+
if m != nil {
240+
match = diff.Text[m[0]:m[1]]
241+
diff.Text = strings.TrimSuffix(diff.Text, match)
242+
}
216243
// Print an existing closing span first before opening added/remove-code span so it doesn't unintentionally close it
217244
if strings.HasPrefix(diff.Text, "</span>") {
218245
buf.WriteString("</span>")
@@ -290,9 +317,6 @@ func init() {
290317
diffMatchPatch.DiffEditCost = 100
291318
}
292319

293-
var unterminatedEntityRE = regexp.MustCompile(`&[^ ;]*$`)
294-
var unstartedEntiyRE = regexp.MustCompile(`^[^ ;]*;`)
295-
296320
// GetComputedInlineDiffFor computes inline diff for the given line.
297321
func (diffSection *DiffSection) GetComputedInlineDiffFor(diffLine *DiffLine) template.HTML {
298322
if setting.Git.DisableDiffHighlight {
@@ -333,89 +357,11 @@ func (diffSection *DiffSection) GetComputedInlineDiffFor(diffLine *DiffLine) tem
333357
diffRecord := diffMatchPatch.DiffMain(highlight.Code(diffSection.FileName, diff1[1:]), highlight.Code(diffSection.FileName, diff2[1:]), true)
334358
diffRecord = diffMatchPatch.DiffCleanupEfficiency(diffRecord)
335359

336-
// Now we need to clean up the split entities
337-
diffRecord = unsplitEntities(diffRecord)
338360
diffRecord = diffMatchPatch.DiffCleanupEfficiency(diffRecord)
339361

340362
return diffToHTML(diffSection.FileName, diffRecord, diffLine.Type)
341363
}
342364

343-
// unsplitEntities looks for broken up html entities. It relies on records being presimplified and the data being passed in being valid html
344-
func unsplitEntities(records []diffmatchpatch.Diff) []diffmatchpatch.Diff {
345-
// Unsplitting entities is simple...
346-
//
347-
// Iterate through all be the last records because if we're the last record then there's nothing we can do
348-
for i := 0; i+1 < len(records); i++ {
349-
record := &records[i]
350-
351-
// Look for an unterminated entity at the end of the line
352-
unterminated := unterminatedEntityRE.FindString(record.Text)
353-
if len(unterminated) == 0 {
354-
continue
355-
}
356-
357-
switch record.Type {
358-
case diffmatchpatch.DiffEqual:
359-
// If we're an diff equal we want to give this unterminated entity to our next delete and insert
360-
record.Text = record.Text[0 : len(record.Text)-len(unterminated)]
361-
records[i+1].Text = unterminated + records[i+1].Text
362-
363-
nextType := records[i+1].Type
364-
365-
if nextType == diffmatchpatch.DiffEqual {
366-
continue
367-
}
368-
369-
// if the next in line is a delete then we will want the thing after that to be an insert and so on.
370-
oneAfterType := diffmatchpatch.DiffInsert
371-
if nextType == diffmatchpatch.DiffInsert {
372-
oneAfterType = diffmatchpatch.DiffDelete
373-
}
374-
375-
if i+2 < len(records) && records[i+2].Type == oneAfterType {
376-
records[i+2].Text = unterminated + records[i+2].Text
377-
} else {
378-
records = append(records[:i+2], append([]diffmatchpatch.Diff{
379-
{
380-
Type: oneAfterType,
381-
Text: unterminated,
382-
}}, records[i+2:]...)...)
383-
}
384-
case diffmatchpatch.DiffDelete:
385-
fallthrough
386-
case diffmatchpatch.DiffInsert:
387-
// if we're an insert or delete we want to claim the terminal bit of the entity from the next equal in line
388-
targetType := diffmatchpatch.DiffInsert
389-
if record.Type == diffmatchpatch.DiffInsert {
390-
targetType = diffmatchpatch.DiffDelete
391-
}
392-
next := &records[i+1]
393-
if next.Type == diffmatchpatch.DiffEqual {
394-
// if the next is an equal we need to snaffle the entity end off the start and add an delete/insert
395-
if terminal := unstartedEntiyRE.FindString(next.Text); len(terminal) > 0 {
396-
record.Text += terminal
397-
next.Text = next.Text[len(terminal):]
398-
records = append(records[:i+2], append([]diffmatchpatch.Diff{
399-
{
400-
Type: targetType,
401-
Text: unterminated,
402-
}}, records[i+2:]...)...)
403-
}
404-
} else if next.Type == targetType {
405-
// if the next is an insert we need to snaffle the entity end off the one after that and add it to both.
406-
if i+2 < len(records) && records[i+2].Type == diffmatchpatch.DiffEqual {
407-
if terminal := unstartedEntiyRE.FindString(records[i+2].Text); len(terminal) > 0 {
408-
record.Text += terminal
409-
next.Text += terminal
410-
records[i+2].Text = records[i+2].Text[len(terminal):]
411-
}
412-
}
413-
}
414-
}
415-
}
416-
return records
417-
}
418-
419365
// DiffFile represents a file diff.
420366
type DiffFile struct {
421367
Name string

services/gitdiff/gitdiff_test.go

Lines changed: 15 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@ import (
1515
"code.gitea.io/gitea/models"
1616
"code.gitea.io/gitea/modules/git"
1717
"code.gitea.io/gitea/modules/setting"
18-
"github.com/sergi/go-diff/diffmatchpatch"
1918
dmp "github.com/sergi/go-diff/diffmatchpatch"
2019
"github.com/stretchr/testify/assert"
2120
"gopkg.in/ini.v1"
@@ -27,35 +26,6 @@ func assertEqual(t *testing.T, s1 string, s2 template.HTML) {
2726
}
2827
}
2928

30-
func TestUnsplitEntities(t *testing.T) {
31-
left := "sh &#34;useradd -u 111 jenkins&#34;"
32-
right := "sh &#39;useradd -u $(stat -c &#34;%u&#34; .gitignore) jenkins&#39;"
33-
diffRecord := diffMatchPatch.DiffMain(left, right, true)
34-
diffRecord = diffMatchPatch.DiffCleanupEfficiency(diffRecord)
35-
36-
// Now we need to clean up the split entities
37-
diffRecord = unsplitEntities(diffRecord)
38-
diffRecord = diffMatchPatch.DiffCleanupEfficiency(diffRecord)
39-
40-
leftRecombined := ""
41-
rightRecombined := ""
42-
for _, record := range diffRecord {
43-
assert.False(t, unterminatedEntityRE.MatchString(record.Text), "")
44-
switch record.Type {
45-
case diffmatchpatch.DiffDelete:
46-
leftRecombined += record.Text
47-
case diffmatchpatch.DiffInsert:
48-
rightRecombined += record.Text
49-
default:
50-
leftRecombined += record.Text
51-
rightRecombined += record.Text
52-
}
53-
}
54-
55-
assert.EqualValues(t, left, leftRecombined)
56-
assert.EqualValues(t, right, rightRecombined)
57-
}
58-
5929
func TestDiffToHTML(t *testing.T) {
6030
setting.Cfg = ini.Empty()
6131
assertEqual(t, "foo <span class=\"added-code\">bar</span> biz", diffToHTML("", []dmp.Diff{
@@ -113,6 +83,21 @@ func TestDiffToHTML(t *testing.T) {
11383
{Type: dmp.DiffEqual, Text: "<span class=\"sa\"></span><span class=\"s2\">&#34;</span><span class=\"s2\">// </span><span class=\"s2\">&#34;</span><span class=\"p\">,</span> <span class=\"n\">sys</span><span class=\"o\">.</span><span class=\"n\">argv</span>"},
11484
{Type: dmp.DiffInsert, Text: "<span class=\"p\">)</span>"},
11585
}, DiffLineAdd))
86+
87+
assertEqual(t, "sh <span class=\"added-code\">&#39;useradd -u $(stat -c &#34;%u&#34; .gitignore) jenkins</span>&#39;", diffToHTML("", []dmp.Diff{
88+
{Type: dmp.DiffEqual, Text: "sh &#3"},
89+
{Type: dmp.DiffDelete, Text: "4;useradd -u 111 jenkins&#34"},
90+
{Type: dmp.DiffInsert, Text: "9;useradd -u $(stat -c &#34;%u&#34; .gitignore) jenkins&#39"},
91+
{Type: dmp.DiffEqual, Text: ";"},
92+
}, DiffLineAdd))
93+
94+
assertEqual(t, "<span class=\"x\"> &lt;h<span class=\"added-code\">4 class=</span><span class=\"added-code\">&#34;release-list-title df ac&#34;</span>&gt;</span>", diffToHTML("", []dmp.Diff{
95+
{Type: dmp.DiffEqual, Text: "<span class=\"x\"> &lt;h"},
96+
{Type: dmp.DiffInsert, Text: "4 class=&#"},
97+
{Type: dmp.DiffEqual, Text: "3"},
98+
{Type: dmp.DiffInsert, Text: "4;release-list-title df ac&#34;"},
99+
{Type: dmp.DiffEqual, Text: "&gt;</span>"},
100+
}, DiffLineAdd))
116101
}
117102

118103
func TestParsePatch_singlefile(t *testing.T) {

0 commit comments

Comments
 (0)