@@ -372,6 +372,102 @@ func (r *FileDiffReader) ReadExtendedHeaders() ([]string, error) {
372
372
}
373
373
}
374
374
375
+ // readQuotedFilename extracts a quoted filename from the beginning of a string,
376
+ // returning the unquoted filename and any remaining text after the filename.
377
+ func readQuotedFilename (text string ) (value string , remainder string , err error ) {
378
+ if text == "" || text [0 ] != '"' {
379
+ return "" , "" , fmt .Errorf (`string must start with a '"': %s` , text )
380
+ }
381
+
382
+ // The end quote is the first quote NOT preceeded by an uneven number of backslashes.
383
+ numberOfBackslashes := 0
384
+ for i , c := range text {
385
+ if c == '"' && i > 0 && numberOfBackslashes % 2 == 0 {
386
+ value , err = strconv .Unquote (text [:i + 1 ])
387
+ remainder = text [i + 1 :]
388
+ return
389
+ } else if c == '\\' {
390
+ numberOfBackslashes ++
391
+ } else {
392
+ numberOfBackslashes = 0
393
+ }
394
+ }
395
+ return "" , "" , fmt .Errorf (`end of string found while searching for '"': %s` , text )
396
+ }
397
+
398
+ // parseDiffGitArgs extracts the two filenames from a 'diff --git' line.
399
+ // Returns false on syntax error, true if syntax is valid. Even with a
400
+ // valid syntax, it may be impossible to extract filenames; if so, the
401
+ // function returns ("", "", true).
402
+ func parseDiffGitArgs (diffArgs string ) (string , string , bool ) {
403
+ length := len (diffArgs )
404
+ if length < 3 {
405
+ return "" , "" , false
406
+ }
407
+
408
+ if diffArgs [0 ] != '"' && diffArgs [length - 1 ] != '"' {
409
+ // Both filenames are unquoted.
410
+ firstSpace := strings .IndexByte (diffArgs , ' ' )
411
+ if firstSpace <= 0 || firstSpace == length - 1 {
412
+ return "" , "" , false
413
+ }
414
+
415
+ secondSpace := strings .IndexByte (diffArgs [firstSpace + 1 :], ' ' )
416
+ if secondSpace == - 1 {
417
+ if diffArgs [firstSpace + 1 ] == '"' {
418
+ // The second filename begins with '"', but doesn't end with one.
419
+ return "" , "" , false
420
+ }
421
+ return diffArgs [:firstSpace ], diffArgs [firstSpace + 1 :], true
422
+ }
423
+
424
+ // One or both filenames contain a space, but the names are
425
+ // unquoted. Here, the 'diff --git' syntax is ambiguous, and
426
+ // we have to obtain the filenames elsewhere (e.g. from the
427
+ // hunk headers or extended headers). HOWEVER, if the file
428
+ // is newly created and empty, there IS no other place to
429
+ // find the filename. In this case, the two filenames are
430
+ // identical (except for the leading 'a/' prefix), and we have
431
+ // to handle that case here.
432
+ first := diffArgs [:length / 2 ]
433
+ second := diffArgs [length / 2 + 1 :]
434
+ if len (first ) >= 3 && length % 2 == 1 && first [1 ] == '/' && first [1 :] == second [1 :] {
435
+ return first , second , true
436
+ }
437
+
438
+ // The syntax is (unfortunately) valid, but we could not extract
439
+ // the filenames.
440
+ return "" , "" , true
441
+ }
442
+
443
+ if diffArgs [0 ] == '"' {
444
+ first , remainder , err := readQuotedFilename (diffArgs )
445
+ if err != nil || len (remainder ) < 2 || remainder [0 ] != ' ' {
446
+ return "" , "" , false
447
+ }
448
+ if remainder [1 ] == '"' {
449
+ second , remainder , err := readQuotedFilename (remainder [1 :])
450
+ if remainder != "" || err != nil {
451
+ return "" , "" , false
452
+ }
453
+ return first , second , true
454
+ }
455
+ return first , remainder [1 :], true
456
+ }
457
+
458
+ // In this case, second argument MUST be quoted (or it's a syntax error)
459
+ i := strings .IndexByte (diffArgs , '"' )
460
+ if i == - 1 || i + 2 >= length || diffArgs [i - 1 ] != ' ' {
461
+ return "" , "" , false
462
+ }
463
+
464
+ second , remainder , err := readQuotedFilename (diffArgs [i :])
465
+ if remainder != "" || err != nil {
466
+ return "" , "" , false
467
+ }
468
+ return diffArgs [:i - 1 ], second , true
469
+ }
470
+
375
471
// handleEmpty detects when FileDiff was an empty diff and will not have any hunks
376
472
// that follow. It updates fd fields from the parsed extended headers.
377
473
func handleEmpty (fd * FileDiff ) (wasEmpty bool ) {
@@ -388,6 +484,10 @@ func handleEmpty(fd *FileDiff) (wasEmpty bool) {
388
484
return lineHasPrefix (idx1 , prefix1 ) && lineHasPrefix (idx2 , prefix2 )
389
485
}
390
486
487
+ isCopy := (lineCount == 4 && linesHavePrefixes (2 , "copy from " , 3 , "copy to " )) ||
488
+ (lineCount == 6 && linesHavePrefixes (2 , "copy from " , 3 , "copy to " ) && lineHasPrefix (5 , "Binary files " )) ||
489
+ (lineCount == 6 && linesHavePrefixes (1 , "old mode " , 2 , "new mode " ) && linesHavePrefixes (4 , "copy from " , 5 , "copy to " ))
490
+
391
491
isRename := (lineCount == 4 && linesHavePrefixes (2 , "rename from " , 3 , "rename to " )) ||
392
492
(lineCount == 6 && linesHavePrefixes (2 , "rename from " , 3 , "rename to " ) && lineHasPrefix (5 , "Binary files " )) ||
393
493
(lineCount == 6 && linesHavePrefixes (1 , "old mode " , 2 , "new mode " ) && linesHavePrefixes (4 , "rename from " , 5 , "rename to " ))
@@ -402,22 +502,12 @@ func handleEmpty(fd *FileDiff) (wasEmpty bool) {
402
502
403
503
isBinaryPatch := lineCount == 3 && lineHasPrefix (2 , "Binary files " ) || lineCount > 3 && lineHasPrefix (2 , "GIT binary patch" )
404
504
405
- if ! isModeChange && ! isRename && ! isBinaryPatch && ! isNewFile && ! isDeletedFile {
505
+ if ! isModeChange && ! isCopy && ! isRename && ! isBinaryPatch && ! isNewFile && ! isDeletedFile {
406
506
return false
407
507
}
408
508
409
- names := strings .SplitN (fd .Extended [0 ][len ("diff --git " ):], " " , 2 )
410
-
411
- var err error
412
- fd .OrigName , err = strconv .Unquote (names [0 ])
413
- if err != nil {
414
- fd .OrigName = names [0 ]
415
- }
416
- fd .NewName , err = strconv .Unquote (names [1 ])
417
- if err != nil {
418
- fd .NewName = names [1 ]
419
- }
420
-
509
+ var success bool
510
+ fd .OrigName , fd .NewName , success = parseDiffGitArgs (fd .Extended [0 ][len ("diff --git " ):])
421
511
if isNewFile {
422
512
fd .OrigName = "/dev/null"
423
513
}
@@ -426,7 +516,38 @@ func handleEmpty(fd *FileDiff) (wasEmpty bool) {
426
516
fd .NewName = "/dev/null"
427
517
}
428
518
429
- return true
519
+ // For ambiguous 'diff --git' lines, try to reconstruct filenames using extended headers.
520
+ if success && (isCopy || isRename ) && fd .OrigName == "" && fd .NewName == "" {
521
+ diffArgs := fd .Extended [0 ][len ("diff --git " ):]
522
+
523
+ tryReconstruct := func (header string , prefix string , whichFile int , result * string ) {
524
+ if ! strings .HasPrefix (header , prefix ) {
525
+ return
526
+ }
527
+ rawFilename := header [len (prefix ):]
528
+
529
+ // extract the filename prefix (e.g. "a/") from the 'diff --git' line.
530
+ var prefixLetterIndex int
531
+ if whichFile == 1 {
532
+ prefixLetterIndex = 0
533
+ } else if whichFile == 2 {
534
+ prefixLetterIndex = len (diffArgs ) - len (rawFilename ) - 2
535
+ }
536
+ if prefixLetterIndex < 0 || diffArgs [prefixLetterIndex + 1 ] != '/' {
537
+ return
538
+ }
539
+
540
+ * result = diffArgs [prefixLetterIndex :prefixLetterIndex + 2 ] + rawFilename
541
+ }
542
+
543
+ for _ , header := range fd .Extended {
544
+ tryReconstruct (header , "copy from " , 1 , & fd .OrigName )
545
+ tryReconstruct (header , "copy to " , 2 , & fd .NewName )
546
+ tryReconstruct (header , "rename from " , 1 , & fd .OrigName )
547
+ tryReconstruct (header , "rename to " , 2 , & fd .NewName )
548
+ }
549
+ }
550
+ return success
430
551
}
431
552
432
553
var (
0 commit comments