Skip to content

Commit beb2058

Browse files
authored
Fix broken spans in diffs (#14678)
Gitea runs diff on highlighted code fragment for each line in order to provide code highlight diffs. Unfortunately this diff algorithm is not aware that span tags and entities are atomic and cannot be split. The current fixup code makes some attempt to fix these broken tags however, it cannot handle situations where a tag is split over multiple blocks. This PR provides a more algorithmic fixup mechanism whereby spans and entities are completely coalesced into their respective blocks. This may result in a incompletely reduced diff but - it will definitely prevent the broken entities and spans that are currently possible. As a result of this fixup several inconsistencies were discovered in our testcases and these were also fixed. Fix #14231 Signed-off-by: Andrew Thornton <[email protected]>
1 parent f3847c9 commit beb2058

File tree

2 files changed

+228
-6
lines changed

2 files changed

+228
-6
lines changed

services/gitdiff/gitdiff.go

Lines changed: 210 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -182,6 +182,8 @@ var (
182182
removedCodePrefix = []byte(`<span class="removed-code">`)
183183
codeTagSuffix = []byte(`</span>`)
184184
)
185+
186+
var unfinishedtagRegex = regexp.MustCompile(`<[^>]*$`)
185187
var trailingSpanRegex = regexp.MustCompile(`<span\s*[[:alpha:]="]*?[>]?$`)
186188
var entityRegex = regexp.MustCompile(`&[#]*?[0-9[:alpha:]]*$`)
187189

@@ -196,10 +198,218 @@ func shouldWriteInline(diff diffmatchpatch.Diff, lineType DiffLineType) bool {
196198
return false
197199
}
198200

201+
func fixupBrokenSpans(diffs []diffmatchpatch.Diff) []diffmatchpatch.Diff {
202+
203+
// Create a new array to store our fixed up blocks
204+
fixedup := make([]diffmatchpatch.Diff, 0, len(diffs))
205+
206+
// semantically label some numbers
207+
const insert, delete, equal = 0, 1, 2
208+
209+
// record the positions of the last type of each block in the fixedup blocks
210+
last := []int{-1, -1, -1}
211+
operation := []diffmatchpatch.Operation{diffmatchpatch.DiffInsert, diffmatchpatch.DiffDelete, diffmatchpatch.DiffEqual}
212+
213+
// create a writer for insert and deletes
214+
toWrite := []strings.Builder{
215+
{},
216+
{},
217+
}
218+
219+
// make some flags for insert and delete
220+
unfinishedTag := []bool{false, false}
221+
unfinishedEnt := []bool{false, false}
222+
223+
// store stores the provided text in the writer for the typ
224+
store := func(text string, typ int) {
225+
(&(toWrite[typ])).WriteString(text)
226+
}
227+
228+
// hasStored returns true if there is stored content
229+
hasStored := func(typ int) bool {
230+
return (&toWrite[typ]).Len() > 0
231+
}
232+
233+
// stored will return that content
234+
stored := func(typ int) string {
235+
return (&toWrite[typ]).String()
236+
}
237+
238+
// empty will empty the stored content
239+
empty := func(typ int) {
240+
(&toWrite[typ]).Reset()
241+
}
242+
243+
// pop will remove the stored content appending to a diff block for that typ
244+
pop := func(typ int, fixedup []diffmatchpatch.Diff) []diffmatchpatch.Diff {
245+
if hasStored(typ) {
246+
if last[typ] > last[equal] {
247+
fixedup[last[typ]].Text += stored(typ)
248+
} else {
249+
fixedup = append(fixedup, diffmatchpatch.Diff{
250+
Type: operation[typ],
251+
Text: stored(typ),
252+
})
253+
}
254+
empty(typ)
255+
}
256+
return fixedup
257+
}
258+
259+
// Now we walk the provided diffs and check the type of each block in turn
260+
for _, diff := range diffs {
261+
262+
typ := delete // flag for handling insert or delete typs
263+
switch diff.Type {
264+
case diffmatchpatch.DiffEqual:
265+
// First check if there is anything stored
266+
if hasStored(insert) || hasStored(delete) {
267+
// There are two reasons for storing content:
268+
// 1. Unfinished Entity <- Could be more efficient here by not doing this if we're looking for a tag
269+
if unfinishedEnt[insert] || unfinishedEnt[delete] {
270+
// we look for a ';' to finish an entity
271+
idx := strings.IndexRune(diff.Text, ';')
272+
if idx >= 0 {
273+
// if we find a ';' store the preceding content to both insert and delete
274+
store(diff.Text[:idx+1], insert)
275+
store(diff.Text[:idx+1], delete)
276+
277+
// and remove it from this block
278+
diff.Text = diff.Text[idx+1:]
279+
280+
// reset the ent flags
281+
unfinishedEnt[insert] = false
282+
unfinishedEnt[delete] = false
283+
} else {
284+
// otherwise store it all on insert and delete
285+
store(diff.Text, insert)
286+
store(diff.Text, delete)
287+
// and empty this block
288+
diff.Text = ""
289+
}
290+
}
291+
// 2. Unfinished Tag
292+
if unfinishedTag[insert] || unfinishedTag[delete] {
293+
// we look for a '>' to finish a tag
294+
idx := strings.IndexRune(diff.Text, '>')
295+
if idx >= 0 {
296+
store(diff.Text[:idx+1], insert)
297+
store(diff.Text[:idx+1], delete)
298+
diff.Text = diff.Text[idx+1:]
299+
unfinishedTag[insert] = false
300+
unfinishedTag[delete] = false
301+
} else {
302+
store(diff.Text, insert)
303+
store(diff.Text, delete)
304+
diff.Text = ""
305+
}
306+
}
307+
308+
// If we've completed the required tag/entities
309+
if !(unfinishedTag[insert] || unfinishedTag[delete] || unfinishedEnt[insert] || unfinishedEnt[delete]) {
310+
// pop off the stack
311+
fixedup = pop(insert, fixedup)
312+
fixedup = pop(delete, fixedup)
313+
}
314+
315+
// If that has left this diff block empty then shortcut
316+
if len(diff.Text) == 0 {
317+
continue
318+
}
319+
}
320+
321+
// check if this block ends in an unfinished tag?
322+
idx := unfinishedtagRegex.FindStringIndex(diff.Text)
323+
if idx != nil {
324+
unfinishedTag[insert] = true
325+
unfinishedTag[delete] = true
326+
} else {
327+
// otherwise does it end in an unfinished entity?
328+
idx = entityRegex.FindStringIndex(diff.Text)
329+
if idx != nil {
330+
unfinishedEnt[insert] = true
331+
unfinishedEnt[delete] = true
332+
}
333+
}
334+
335+
// If there is an unfinished component
336+
if idx != nil {
337+
// Store the fragment
338+
store(diff.Text[idx[0]:], insert)
339+
store(diff.Text[idx[0]:], delete)
340+
// and remove it from this block
341+
diff.Text = diff.Text[:idx[0]]
342+
}
343+
344+
// If that hasn't left the block empty
345+
if len(diff.Text) > 0 {
346+
// store the position of the last equal block and store it in our diffs
347+
last[equal] = len(fixedup)
348+
fixedup = append(fixedup, diff)
349+
}
350+
continue
351+
case diffmatchpatch.DiffInsert:
352+
typ = insert
353+
fallthrough
354+
case diffmatchpatch.DiffDelete:
355+
// First check if there is anything stored for this type
356+
if hasStored(typ) {
357+
// if there is prepend it to this block, empty the storage and reset our flags
358+
diff.Text = stored(typ) + diff.Text
359+
empty(typ)
360+
unfinishedEnt[typ] = false
361+
unfinishedTag[typ] = false
362+
}
363+
364+
// check if this block ends in an unfinished tag
365+
idx := unfinishedtagRegex.FindStringIndex(diff.Text)
366+
if idx != nil {
367+
unfinishedTag[typ] = true
368+
} else {
369+
// otherwise does it end in an unfinished entity
370+
idx = entityRegex.FindStringIndex(diff.Text)
371+
if idx != nil {
372+
unfinishedEnt[typ] = true
373+
}
374+
}
375+
376+
// If there is an unfinished component
377+
if idx != nil {
378+
// Store the fragment
379+
store(diff.Text[idx[0]:], typ)
380+
// and remove it from this block
381+
diff.Text = diff.Text[:idx[0]]
382+
}
383+
384+
// If that hasn't left the block empty
385+
if len(diff.Text) > 0 {
386+
// if the last block of this type was after the last equal block
387+
if last[typ] > last[equal] {
388+
// store this blocks content on that block
389+
fixedup[last[typ]].Text += diff.Text
390+
} else {
391+
// otherwise store the position of the last block of this type and store the block
392+
last[typ] = len(fixedup)
393+
fixedup = append(fixedup, diff)
394+
}
395+
}
396+
continue
397+
}
398+
}
399+
400+
// pop off any remaining stored content
401+
fixedup = pop(insert, fixedup)
402+
fixedup = pop(delete, fixedup)
403+
404+
return fixedup
405+
}
406+
199407
func diffToHTML(fileName string, diffs []diffmatchpatch.Diff, lineType DiffLineType) template.HTML {
200408
buf := bytes.NewBuffer(nil)
201409
match := ""
202410

411+
diffs = fixupBrokenSpans(diffs)
412+
203413
for _, diff := range diffs {
204414
if shouldWriteInline(diff, lineType) {
205415
if len(match) > 0 {

services/gitdiff/gitdiff_test.go

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ import (
1515

1616
"code.gitea.io/gitea/models"
1717
"code.gitea.io/gitea/modules/git"
18+
"code.gitea.io/gitea/modules/highlight"
1819
"code.gitea.io/gitea/modules/setting"
1920
dmp "github.com/sergi/go-diff/diffmatchpatch"
2021
"github.com/stretchr/testify/assert"
@@ -23,7 +24,7 @@ import (
2324

2425
func assertEqual(t *testing.T, s1 string, s2 template.HTML) {
2526
if s1 != string(s2) {
26-
t.Errorf("%s should be equal %s", s2, s1)
27+
t.Errorf("Did not receive expected results:\nExpected: %s\nActual: %s", s1, s2)
2728
}
2829
}
2930

@@ -61,22 +62,22 @@ func TestDiffToHTML(t *testing.T) {
6162
{Type: dmp.DiffEqual, Text: "</span><span class=\"p\">)</span>"},
6263
}, DiffLineDel))
6364

64-
assertEqual(t, "<span class=\"nx\">r</span><span class=\"p\">.</span><span class=\"nf\">WrapperRenderer</span><span class=\"p\">(</span><span class=\"nx\">w</span><span class=\"p\">,</span> <span class=\"removed-code\"><span class=\"nx\">language</span></span><span class=\"removed-code\"><span class=\"p\">,</span> <span class=\"kc\">true</span><span class=\"p\">,</span> <span class=\"nx\">attrs</span></span><span class=\"p\">,</span> <span class=\"kc\">false</span><span class=\"p\">)</span>", diffToHTML("", []dmp.Diff{
65+
assertEqual(t, "<span class=\"nx\">r</span><span class=\"p\">.</span><span class=\"nf\">WrapperRenderer</span><span class=\"p\">(</span><span class=\"nx\">w</span><span class=\"p\">,</span> <span class=\"removed-code\"><span class=\"nx\">language</span><span class=\"p\">,</span> <span class=\"kc\">true</span><span class=\"p\">,</span> <span class=\"nx\">attrs</span></span><span class=\"p\">,</span> <span class=\"kc\">false</span><span class=\"p\">)</span>", diffToHTML("", []dmp.Diff{
6566
{Type: dmp.DiffEqual, Text: "<span class=\"nx\">r</span><span class=\"p\">.</span><span class=\"nf\">WrapperRenderer</span><span class=\"p\">(</span><span class=\"nx\">w</span><span class=\"p\">,</span> <span class=\"nx\">"},
6667
{Type: dmp.DiffDelete, Text: "language</span><span "},
6768
{Type: dmp.DiffEqual, Text: "c"},
6869
{Type: dmp.DiffDelete, Text: "lass=\"p\">,</span> <span class=\"kc\">true</span><span class=\"p\">,</span> <span class=\"nx\">attrs"},
6970
{Type: dmp.DiffEqual, Text: "</span><span class=\"p\">,</span> <span class=\"kc\">false</span><span class=\"p\">)</span>"},
7071
}, DiffLineDel))
7172

72-
assertEqual(t, "<span class=\"added-code\">language</span></span><span class=\"added-code\"><span class=\"p\">,</span> <span class=\"kc\">true</span><span class=\"p\">,</span> <span class=\"nx\">attrs</span></span><span class=\"p\">,</span> <span class=\"kc\">false</span><span class=\"p\">)</span>", diffToHTML("", []dmp.Diff{
73+
assertEqual(t, "<span class=\"added-code\">language</span><span class=\"p\">,</span> <span class=\"kc\">true</span><span class=\"p\">,</span> <span class=\"nx\">attrs</span></span><span class=\"p\">,</span> <span class=\"kc\">false</span><span class=\"p\">)</span>", diffToHTML("", []dmp.Diff{
7374
{Type: dmp.DiffInsert, Text: "language</span><span "},
7475
{Type: dmp.DiffEqual, Text: "c"},
7576
{Type: dmp.DiffInsert, Text: "lass=\"p\">,</span> <span class=\"kc\">true</span><span class=\"p\">,</span> <span class=\"nx\">attrs"},
7677
{Type: dmp.DiffEqual, Text: "</span><span class=\"p\">,</span> <span class=\"kc\">false</span><span class=\"p\">)</span>"},
7778
}, DiffLineAdd))
7879

79-
assertEqual(t, "<span class=\"k\">print</span><span class=\"added-code\"></span><span class=\"added-code\"><span class=\"p\">(</span></span><span class=\"sa\"></span><span class=\"s2\">&#34;</span><span class=\"s2\">// </span><span class=\"s2\">&#34;</span><span class=\"p\">,</span> <span class=\"n\">sys</span><span class=\"o\">.</span><span class=\"n\">argv</span><span class=\"added-code\"><span class=\"p\">)</span></span>", diffToHTML("", []dmp.Diff{
80+
assertEqual(t, "<span class=\"k\">print</span><span class=\"added-code\"><span class=\"p\">(</span></span><span class=\"sa\"></span><span class=\"s2\">&#34;</span><span class=\"s2\">// </span><span class=\"s2\">&#34;</span><span class=\"p\">,</span> <span class=\"n\">sys</span><span class=\"o\">.</span><span class=\"n\">argv</span><span class=\"added-code\"><span class=\"p\">)</span></span>", diffToHTML("", []dmp.Diff{
8081
{Type: dmp.DiffEqual, Text: "<span class=\"k\">print</span>"},
8182
{Type: dmp.DiffInsert, Text: "<span"},
8283
{Type: dmp.DiffEqual, Text: " "},
@@ -85,14 +86,14 @@ func TestDiffToHTML(t *testing.T) {
8586
{Type: dmp.DiffInsert, Text: "<span class=\"p\">)</span>"},
8687
}, DiffLineAdd))
8788

88-
assertEqual(t, "sh <span class=\"added-code\">&#39;useradd -u $(stat -c &#34;%u&#34; .gitignore) jenkins</span>&#39;", diffToHTML("", []dmp.Diff{
89+
assertEqual(t, "sh <span class=\"added-code\">&#39;useradd -u $(stat -c &#34;%u&#34; .gitignore) jenkins&#39;</span>", diffToHTML("", []dmp.Diff{
8990
{Type: dmp.DiffEqual, Text: "sh &#3"},
9091
{Type: dmp.DiffDelete, Text: "4;useradd -u 111 jenkins&#34"},
9192
{Type: dmp.DiffInsert, Text: "9;useradd -u $(stat -c &#34;%u&#34; .gitignore) jenkins&#39"},
9293
{Type: dmp.DiffEqual, Text: ";"},
9394
}, DiffLineAdd))
9495

95-
assertEqual(t, "<span class=\"x\"> &lt;h<span class=\"added-code\">4 class=</span><span class=\"added-code\">&#34;release-list-title df ac&#34;</span>&gt;</span>", diffToHTML("", []dmp.Diff{
96+
assertEqual(t, "<span class=\"x\"> &lt;h<span class=\"added-code\">4 class=&#34;release-list-title df ac&#34;</span>&gt;</span>", diffToHTML("", []dmp.Diff{
9697
{Type: dmp.DiffEqual, Text: "<span class=\"x\"> &lt;h"},
9798
{Type: dmp.DiffInsert, Text: "4 class=&#"},
9899
{Type: dmp.DiffEqual, Text: "3"},
@@ -462,3 +463,14 @@ func TestGetDiffRangeWithWhitespaceBehavior(t *testing.T) {
462463
}
463464
}
464465
}
466+
467+
func TestDiffToHTML_14231(t *testing.T) {
468+
setting.Cfg = ini.Empty()
469+
diffRecord := diffMatchPatch.DiffMain(highlight.Code("main.v", " run()\n"), highlight.Code("main.v", " run(db)\n"), true)
470+
diffRecord = diffMatchPatch.DiffCleanupEfficiency(diffRecord)
471+
472+
expected := ` <span class="n">run</span><span class="added-code"><span class="o">(</span><span class="n">db</span></span><span class="o">)</span>`
473+
output := diffToHTML("main.v", diffRecord, DiffLineAdd)
474+
475+
assertEqual(t, expected, output)
476+
}

0 commit comments

Comments
 (0)