@@ -372,6 +372,101 @@ func (r *FileDiffReader) ReadExtendedHeaders() ([]string, error) {
372
372
}
373
373
}
374
374
375
+ // readQuotedFilename extracts a quoted filename from the beginning of a string,
376
+ // returning the unquoted filename and any remaining text after the filename.
377
+ func readQuotedFilename (text string ) (value string , remainder string , err error ) {
378
+ if text [0 ] != '"' {
379
+ panic ("caller must ensure filename is quoted! " + text )
380
+ }
381
+
382
+ // The end quote is the first quote NOT preceeded by an uneven number of backslashes.
383
+ var i , j int
384
+ for i = 1 ; i < len (text ); i ++ {
385
+ if text [i ] == '"' {
386
+ // walk backwards and find first non-backslash
387
+ for j = i - 1 ; j > 0 && text [j ] == '\\' ; j -- {
388
+ }
389
+ numberOfBackslashes := i - j - 1
390
+ if numberOfBackslashes % 2 == 0 {
391
+ break
392
+ }
393
+ }
394
+ }
395
+ if i == len (text ) {
396
+ return "" , "" , fmt .Errorf (`end of string found while searching for '"': %s` , text )
397
+ }
398
+
399
+ value , err = strconv .Unquote (text [:i + 1 ])
400
+ remainder = text [i + 1 :]
401
+ return
402
+ }
403
+
404
+ // parseDiffGitArgs extracts the two filenames from a 'diff --git' line.
405
+ func parseDiffGitArgs (diffArgs string ) (bool , string , string ) {
406
+ length := len (diffArgs )
407
+ if length < 3 {
408
+ return false , "" , ""
409
+ }
410
+
411
+ if diffArgs [0 ] != '"' && diffArgs [length - 1 ] != '"' {
412
+ // Both filenames are unquoted.
413
+ firstSpace := strings .IndexByte (diffArgs , ' ' )
414
+ if firstSpace <= 0 || firstSpace == length - 1 {
415
+ return false , "" , ""
416
+ }
417
+
418
+ secondSpace := strings .IndexByte (diffArgs [firstSpace + 1 :], ' ' )
419
+ if secondSpace == - 1 {
420
+ return true , diffArgs [:firstSpace ], diffArgs [firstSpace + 1 :]
421
+ }
422
+
423
+ // One or both filenames contain a space, but the names are
424
+ // unquoted. Here, the 'diff --git' syntax is ambiguous, and
425
+ // we have to obtain the filenames elsewhere (e.g. from the
426
+ // chunk headers or extended headers). HOWEVER, if the file
427
+ // is newly created and empty, there IS no other place to
428
+ // find the filename. In this case, the two filenames are
429
+ // identical (except for the leading 'a/' prefix), and we have
430
+ // to handle that case here.
431
+ first := diffArgs [:length / 2 ]
432
+ second := diffArgs [length / 2 + 1 :]
433
+ if len (first ) >= 3 && length % 2 == 1 && first [1 ] == '/' && first [1 :] == second [1 :] {
434
+ return true , first , second
435
+ }
436
+
437
+ // The syntax is (unfortunately) valid, but we could not extract
438
+ // the filenames.
439
+ return true , "" , ""
440
+ }
441
+
442
+ if diffArgs [0 ] == '"' {
443
+ first , remainder , err := readQuotedFilename (diffArgs )
444
+ if err != nil || len (remainder ) < 2 || remainder [0 ] != ' ' {
445
+ return false , "" , ""
446
+ }
447
+ if remainder [1 ] == '"' {
448
+ second , remainder , err := readQuotedFilename (remainder [1 :])
449
+ if remainder != "" || err != nil {
450
+ return false , "" , ""
451
+ }
452
+ return true , first , second
453
+ }
454
+ return true , first , remainder [1 :]
455
+ }
456
+
457
+ // In this case, second argument MUST be quoted (or it's a syntax error)
458
+ i := strings .IndexByte (diffArgs , '"' )
459
+ if i == - 1 || i + 2 >= length || diffArgs [i - 1 ] != ' ' {
460
+ return false , "" , ""
461
+ }
462
+
463
+ second , remainder , err := readQuotedFilename (diffArgs [i :])
464
+ if remainder != "" || err != nil {
465
+ return false , "" , ""
466
+ }
467
+ return true , diffArgs [:i - 1 ], second
468
+ }
469
+
375
470
// handleEmpty detects when FileDiff was an empty diff and will not have any hunks
376
471
// that follow. It updates fd fields from the parsed extended headers.
377
472
func handleEmpty (fd * FileDiff ) (wasEmpty bool ) {
@@ -388,6 +483,10 @@ func handleEmpty(fd *FileDiff) (wasEmpty bool) {
388
483
return lineHasPrefix (idx1 , prefix1 ) && lineHasPrefix (idx2 , prefix2 )
389
484
}
390
485
486
+ isCopy := (lineCount == 4 && linesHavePrefixes (2 , "copy from " , 3 , "copy to " )) ||
487
+ (lineCount == 6 && linesHavePrefixes (2 , "copy from " , 3 , "copy to " ) && lineHasPrefix (5 , "Binary files " )) ||
488
+ (lineCount == 6 && linesHavePrefixes (1 , "old mode " , 2 , "new mode " ) && linesHavePrefixes (4 , "copy from " , 5 , "copy to " ))
489
+
391
490
isRename := (lineCount == 4 && linesHavePrefixes (2 , "rename from " , 3 , "rename to " )) ||
392
491
(lineCount == 6 && linesHavePrefixes (2 , "rename from " , 3 , "rename to " ) && lineHasPrefix (5 , "Binary files " )) ||
393
492
(lineCount == 6 && linesHavePrefixes (1 , "old mode " , 2 , "new mode " ) && linesHavePrefixes (4 , "rename from " , 5 , "rename to " ))
@@ -402,22 +501,12 @@ func handleEmpty(fd *FileDiff) (wasEmpty bool) {
402
501
403
502
isBinaryPatch := lineCount == 3 && lineHasPrefix (2 , "Binary files " ) || lineCount > 3 && lineHasPrefix (2 , "GIT binary patch" )
404
503
405
- if ! isModeChange && ! isRename && ! isBinaryPatch && ! isNewFile && ! isDeletedFile {
504
+ if ! isModeChange && ! isCopy && ! isRename && ! isBinaryPatch && ! isNewFile && ! isDeletedFile {
406
505
return false
407
506
}
408
507
409
- names := strings .SplitN (fd .Extended [0 ][len ("diff --git " ):], " " , 2 )
410
-
411
- var err error
412
- fd .OrigName , err = strconv .Unquote (names [0 ])
413
- if err != nil {
414
- fd .OrigName = names [0 ]
415
- }
416
- fd .NewName , err = strconv .Unquote (names [1 ])
417
- if err != nil {
418
- fd .NewName = names [1 ]
419
- }
420
-
508
+ var success bool
509
+ success , fd .OrigName , fd .NewName = parseDiffGitArgs (fd .Extended [0 ][len ("diff --git " ):])
421
510
if isNewFile {
422
511
fd .OrigName = "/dev/null"
423
512
}
@@ -426,7 +515,39 @@ func handleEmpty(fd *FileDiff) (wasEmpty bool) {
426
515
fd .NewName = "/dev/null"
427
516
}
428
517
429
- return true
518
+ // For ambiguous 'diff --git' lines, try to reconstruct filenames using extended headers.
519
+ if success && (isCopy || isRename ) && fd .OrigName == "" && fd .NewName == "" {
520
+ diffArgs := fd .Extended [0 ][len ("diff --git " ):]
521
+
522
+ reconstruct := func (header string , prefix string , whichFile int , result * string ) bool {
523
+ if ! strings .HasPrefix (header , prefix ) {
524
+ return false
525
+ }
526
+ rawFilename := header [len (prefix ):]
527
+
528
+ // extract the filename prefix (e.g. "a/") from the 'diff --git' line.
529
+ var prefixLetterIndex int
530
+ if whichFile == 1 {
531
+ prefixLetterIndex = 0
532
+ } else if whichFile == 2 {
533
+ prefixLetterIndex = len (diffArgs ) - len (rawFilename ) - 2
534
+ }
535
+ if prefixLetterIndex < 0 || diffArgs [prefixLetterIndex + 1 ] != '/' {
536
+ return false
537
+ }
538
+
539
+ * result = diffArgs [prefixLetterIndex :prefixLetterIndex + 2 ] + rawFilename
540
+ return true
541
+ }
542
+
543
+ for _ , header := range fd .Extended {
544
+ _ = reconstruct (header , "copy from " , 1 , & fd .OrigName ) ||
545
+ reconstruct (header , "copy to " , 2 , & fd .NewName ) ||
546
+ reconstruct (header , "rename from " , 1 , & fd .OrigName ) ||
547
+ reconstruct (header , "rename to " , 2 , & fd .NewName )
548
+ }
549
+ }
550
+ return success
430
551
}
431
552
432
553
var (
0 commit comments