From 2ee63c7c9d6c31034eb9fe2d3e33c2318c2988d4 Mon Sep 17 00:00:00 2001 From: JJ Graham Date: Thu, 17 Oct 2019 22:15:45 -0500 Subject: [PATCH 1/5] Adding diff support for copied files, still working on test --- AUTHORS | 1 + git/diff.py | 29 +++++++++++++++++++------- git/test/fixtures/diff_copied_mode | 4 ++++ git/test/fixtures/diff_copied_mode_raw | 1 + git/test/test_diff.py | 21 +++++++++++++++++++ 5 files changed, 48 insertions(+), 8 deletions(-) create mode 100644 git/test/fixtures/diff_copied_mode create mode 100644 git/test/fixtures/diff_copied_mode_raw diff --git a/AUTHORS b/AUTHORS index e91fd78b1..a7a19d549 100644 --- a/AUTHORS +++ b/AUTHORS @@ -34,5 +34,6 @@ Contributors are: -Stefan Stancu -César Izurieta -Arthur Milchior +-JJ Graham Portions derived from other open source works and are clearly marked. diff --git a/git/diff.py b/git/diff.py index 10cb9f02c..5a63d44cf 100644 --- a/git/diff.py +++ b/git/diff.py @@ -193,6 +193,8 @@ def iter_change_type(self, change_type): yield diff elif change_type == "D" and diff.deleted_file: yield diff + elif change_type == "C" and diff.copied_file: + yield diff elif change_type == "R" and diff.renamed: yield diff elif change_type == "M" and diff.a_blob and diff.b_blob and diff.a_blob != diff.b_blob: @@ -243,6 +245,7 @@ class Diff(object): ^rename[ ]to[ ](?P.*)(?:\n|$))? (?:^new[ ]file[ ]mode[ ](?P.+)(?:\n|$))? (?:^deleted[ ]file[ ]mode[ ](?P.+)(?:\n|$))? + (?:^copied[ ]file[ ]mode[ ](?P.+)(?:\n|$))? (?:^index[ ](?P[0-9A-Fa-f]+) \.\.(?P[0-9A-Fa-f]+)[ ]?(?P.+)?(?:\n|$))? (?:^---[ ](?P[^\t\n\r\f\v]*)[\t\r\f\v]*(?:\n|$))? @@ -253,11 +256,11 @@ class Diff(object): NULL_BIN_SHA = b"\0" * 20 __slots__ = ("a_blob", "b_blob", "a_mode", "b_mode", "a_rawpath", "b_rawpath", - "new_file", "deleted_file", "raw_rename_from", "raw_rename_to", - "diff", "change_type", "score") + "new_file", "deleted_file", "copied_file", "raw_rename_from", + "raw_rename_to", "diff", "change_type", "score") def __init__(self, repo, a_rawpath, b_rawpath, a_blob_id, b_blob_id, a_mode, - b_mode, new_file, deleted_file, raw_rename_from, + b_mode, new_file, deleted_file, copied_file, raw_rename_from, raw_rename_to, diff, change_type, score): self.a_mode = a_mode @@ -285,6 +288,7 @@ def __init__(self, repo, a_rawpath, b_rawpath, a_blob_id, b_blob_id, a_mode, self.new_file = new_file self.deleted_file = deleted_file + self.copied_file = copied_file # be clear and use None instead of empty strings assert raw_rename_from is None or isinstance(raw_rename_from, binary_type) @@ -336,6 +340,8 @@ def __str__(self): msg += '\nfile deleted in rhs' if self.new_file: msg += '\nfile added in rhs' + if self.copied_file: + msg += '\nfile %r copied from %r' % (self.b_path, self.a_path) if self.rename_from: msg += '\nfile renamed from %r' % self.rename_from if self.rename_to: @@ -419,11 +425,12 @@ def _index_from_patch_format(cls, repo, proc): a_path_fallback, b_path_fallback, \ old_mode, new_mode, \ rename_from, rename_to, \ - new_file_mode, deleted_file_mode, \ + new_file_mode, deleted_file_mode, copied_file_mode, \ a_blob_id, b_blob_id, b_mode, \ a_path, b_path = header.groups() - new_file, deleted_file = bool(new_file_mode), bool(deleted_file_mode) + new_file, deleted_file, copied_file = \ + bool(new_file_mode), bool(deleted_file_mode), bool(copied_file_mode) a_path = cls._pick_best_path(a_path, rename_from, a_path_fallback) b_path = cls._pick_best_path(b_path, rename_to, b_path_fallback) @@ -445,7 +452,7 @@ def _index_from_patch_format(cls, repo, proc): b_blob_id and b_blob_id.decode(defenc), a_mode and a_mode.decode(defenc), b_mode and b_mode.decode(defenc), - new_file, deleted_file, + new_file, deleted_file, copied_file, rename_from, rename_to, None, None, None)) @@ -485,6 +492,7 @@ def handle_diff_line(line): b_path = path.encode(defenc) deleted_file = False new_file = False + copied_file = False rename_from = None rename_to = None @@ -496,6 +504,11 @@ def handle_diff_line(line): elif change_type == 'A': a_blob_id = None new_file = True + elif change_type == 'C': + copied_file = True + a_path, b_path = path.split('\t', 1) + a_path = a_path.encode(defenc) + b_path = b_path.encode(defenc) elif change_type == 'R': a_path, b_path = path.split('\t', 1) a_path = a_path.encode(defenc) @@ -507,8 +520,8 @@ def handle_diff_line(line): # END add/remove handling diff = Diff(repo, a_path, b_path, a_blob_id, b_blob_id, old_mode, new_mode, - new_file, deleted_file, rename_from, rename_to, '', - change_type, score) + new_file, deleted_file, copied_file, rename_from, rename_to, + '', change_type, score) index.append(diff) handle_process_output(proc, handle_diff_line, None, finalize_process, decode_streams=False) diff --git a/git/test/fixtures/diff_copied_mode b/git/test/fixtures/diff_copied_mode new file mode 100644 index 000000000..60707afc8 --- /dev/null +++ b/git/test/fixtures/diff_copied_mode @@ -0,0 +1,4 @@ +diff --git a/test1.txt b/test2.txt +similarity index 100% +copy from test1.txt +copy to test2.txt diff --git a/git/test/fixtures/diff_copied_mode_raw b/git/test/fixtures/diff_copied_mode_raw new file mode 100644 index 000000000..7f414d815 --- /dev/null +++ b/git/test/fixtures/diff_copied_mode_raw @@ -0,0 +1 @@ +:100644 100644 cfe9dea cfe9dea C100 test1.txt test2.txt diff --git a/git/test/test_diff.py b/git/test/test_diff.py index e47b93317..079f8bea0 100644 --- a/git/test/test_diff.py +++ b/git/test/test_diff.py @@ -112,6 +112,27 @@ def test_diff_with_rename(self): self.assertEqual(diff.score, 100) self.assertEqual(len(list(diffs.iter_change_type('R'))), 1) + def test_diff_with_copied_file(self): + output = StringProcessAdapter(fixture('diff_copied_mode')) + diffs = Diff._index_from_patch_format(self.rorepo, output) + self._assert_diff_format(diffs) + + assert_equal(1, len(diffs)) + + diff = diffs[0] + print(diff) + assert_true(diff.copied_file) + assert isinstance(str(diff), str) + + output = StringProcessAdapter(fixture('diff_copied_mode_raw')) + diffs = Diff._index_from_raw_format(self.rorepo, output) + self.assertEqual(len(diffs), 1) + diff = diffs[0] + self.assertEqual(diff.change_type, 'C') + self.assertEqual(diff.score, 100) + self.assertEqual(len(list(diffs.iter_change_type('C'))), 1) + + def test_diff_with_change_in_type(self): output = StringProcessAdapter(fixture('diff_change_in_type')) diffs = Diff._index_from_patch_format(self.rorepo, output) From 908a7550d70b5cbdb60f6dd98040992bd5ecf530 Mon Sep 17 00:00:00 2001 From: JJ Graham Date: Thu, 17 Oct 2019 23:04:22 -0500 Subject: [PATCH 2/5] Fixed new test for copied files --- git/diff.py | 6 ++++-- git/test/fixtures/diff_copied_mode_raw | 2 +- git/test/test_diff.py | 1 - 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/git/diff.py b/git/diff.py index 5a63d44cf..8bb0f8396 100644 --- a/git/diff.py +++ b/git/diff.py @@ -167,7 +167,7 @@ class DiffIndex(list): # R = Renamed # M = Modified # T = Changed in the type - change_type = ("A", "D", "R", "M", "T") + change_type = ("A", "C", "D", "R", "M", "T") def iter_change_type(self, change_type): """ @@ -245,7 +245,9 @@ class Diff(object): ^rename[ ]to[ ](?P.*)(?:\n|$))? (?:^new[ ]file[ ]mode[ ](?P.+)(?:\n|$))? (?:^deleted[ ]file[ ]mode[ ](?P.+)(?:\n|$))? - (?:^copied[ ]file[ ]mode[ ](?P.+)(?:\n|$))? + (?:^similarity[ ]index[ ]\d+%\n + ^copy[ ]from[ ].*\n + ^copy[ ]to[ ](?P.*)(?:\n|$))? (?:^index[ ](?P[0-9A-Fa-f]+) \.\.(?P[0-9A-Fa-f]+)[ ]?(?P.+)?(?:\n|$))? (?:^---[ ](?P[^\t\n\r\f\v]*)[\t\r\f\v]*(?:\n|$))? diff --git a/git/test/fixtures/diff_copied_mode_raw b/git/test/fixtures/diff_copied_mode_raw index 7f414d815..7640f3ab7 100644 --- a/git/test/fixtures/diff_copied_mode_raw +++ b/git/test/fixtures/diff_copied_mode_raw @@ -1 +1 @@ -:100644 100644 cfe9dea cfe9dea C100 test1.txt test2.txt +:100644 100644 cfe9deac6e10683917e80f877566b58644aa21df cfe9deac6e10683917e80f877566b58644aa21df C100 test1.txt test2.txt diff --git a/git/test/test_diff.py b/git/test/test_diff.py index 079f8bea0..4d71443f9 100644 --- a/git/test/test_diff.py +++ b/git/test/test_diff.py @@ -120,7 +120,6 @@ def test_diff_with_copied_file(self): assert_equal(1, len(diffs)) diff = diffs[0] - print(diff) assert_true(diff.copied_file) assert isinstance(str(diff), str) From 4b3e87b79a13248329a4ea401f248857eaa825e0 Mon Sep 17 00:00:00 2001 From: JJ Graham Date: Fri, 18 Oct 2019 10:27:38 -0500 Subject: [PATCH 3/5] Expanded new test for copied file --- git/test/test_diff.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/git/test/test_diff.py b/git/test/test_diff.py index 4d71443f9..e1b345b59 100644 --- a/git/test/test_diff.py +++ b/git/test/test_diff.py @@ -121,6 +121,8 @@ def test_diff_with_copied_file(self): diff = diffs[0] assert_true(diff.copied_file) + assert_true(diff.a_path, u'test1.txt') + assert_true(diff.b_path, u'test2.txt') assert isinstance(str(diff), str) output = StringProcessAdapter(fixture('diff_copied_mode_raw')) @@ -129,9 +131,10 @@ def test_diff_with_copied_file(self): diff = diffs[0] self.assertEqual(diff.change_type, 'C') self.assertEqual(diff.score, 100) + self.assertEqual(diff.a_path, u'test1.txt') + self.assertEqual(diff.b_path, u'test2.txt') self.assertEqual(len(list(diffs.iter_change_type('C'))), 1) - def test_diff_with_change_in_type(self): output = StringProcessAdapter(fixture('diff_change_in_type')) diffs = Diff._index_from_patch_format(self.rorepo, output) From fd1d9f5f963271be48c20dccafa72965d869dee4 Mon Sep 17 00:00:00 2001 From: JJ Graham Date: Fri, 18 Oct 2019 10:32:30 -0500 Subject: [PATCH 4/5] Updating variable name to more accurately reflect contents --- git/diff.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/git/diff.py b/git/diff.py index 8bb0f8396..80e0be6ac 100644 --- a/git/diff.py +++ b/git/diff.py @@ -247,7 +247,7 @@ class Diff(object): (?:^deleted[ ]file[ ]mode[ ](?P.+)(?:\n|$))? (?:^similarity[ ]index[ ]\d+%\n ^copy[ ]from[ ].*\n - ^copy[ ]to[ ](?P.*)(?:\n|$))? + ^copy[ ]to[ ](?P.*)(?:\n|$))? (?:^index[ ](?P[0-9A-Fa-f]+) \.\.(?P[0-9A-Fa-f]+)[ ]?(?P.+)?(?:\n|$))? (?:^---[ ](?P[^\t\n\r\f\v]*)[\t\r\f\v]*(?:\n|$))? @@ -427,12 +427,12 @@ def _index_from_patch_format(cls, repo, proc): a_path_fallback, b_path_fallback, \ old_mode, new_mode, \ rename_from, rename_to, \ - new_file_mode, deleted_file_mode, copied_file_mode, \ + new_file_mode, deleted_file_mode, copied_file_name, \ a_blob_id, b_blob_id, b_mode, \ a_path, b_path = header.groups() new_file, deleted_file, copied_file = \ - bool(new_file_mode), bool(deleted_file_mode), bool(copied_file_mode) + bool(new_file_mode), bool(deleted_file_mode), bool(copied_file_name) a_path = cls._pick_best_path(a_path, rename_from, a_path_fallback) b_path = cls._pick_best_path(b_path, rename_to, b_path_fallback) From a6e133c76220f5a4e546f99f38e7aabb22e35d13 Mon Sep 17 00:00:00 2001 From: JJ Graham Date: Fri, 18 Oct 2019 14:49:06 -0500 Subject: [PATCH 5/5] Satisfying flake8 --- git/diff.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/git/diff.py b/git/diff.py index 80e0be6ac..0150d6755 100644 --- a/git/diff.py +++ b/git/diff.py @@ -258,7 +258,7 @@ class Diff(object): NULL_BIN_SHA = b"\0" * 20 __slots__ = ("a_blob", "b_blob", "a_mode", "b_mode", "a_rawpath", "b_rawpath", - "new_file", "deleted_file", "copied_file", "raw_rename_from", + "new_file", "deleted_file", "copied_file", "raw_rename_from", "raw_rename_to", "diff", "change_type", "score") def __init__(self, repo, a_rawpath, b_rawpath, a_blob_id, b_blob_id, a_mode, @@ -432,7 +432,7 @@ def _index_from_patch_format(cls, repo, proc): a_path, b_path = header.groups() new_file, deleted_file, copied_file = \ - bool(new_file_mode), bool(deleted_file_mode), bool(copied_file_name) + bool(new_file_mode), bool(deleted_file_mode), bool(copied_file_name) a_path = cls._pick_best_path(a_path, rename_from, a_path_fallback) b_path = cls._pick_best_path(b_path, rename_to, b_path_fallback)