diff --git a/doc/source/changes.rst b/doc/source/changes.rst index 3f5b8c500..492d1055e 100644 --- a/doc/source/changes.rst +++ b/doc/source/changes.rst @@ -5,6 +5,9 @@ Changelog 2.0.6 - Fixes and Features ========================== +* API: Diffs now have `a_rawpath`, `b_rawpath`, `raw_rename_from`, + `raw_rename_to` properties, which are the raw-bytes equivalents of their + unicode path counterparts. * Fix: TypeError about passing keyword argument to string decode() on Python 2.6. * Feature: `setUrl API on Remotes `_ diff --git a/git/compat.py b/git/compat.py index 5b46255cc..b35724749 100644 --- a/git/compat.py +++ b/git/compat.py @@ -35,6 +35,7 @@ def mviter(d): return d.values() range = xrange unicode = str + binary_type = bytes else: FileType = file # usually, this is just ascii, which might not enough for our encoding needs @@ -44,6 +45,7 @@ def mviter(d): byte_ord = ord bchr = chr unicode = unicode + binary_type = str range = xrange def mviter(d): return d.itervalues() diff --git a/git/diff.py b/git/diff.py index aeaa67d53..06193920d 100644 --- a/git/diff.py +++ b/git/diff.py @@ -7,6 +7,7 @@ from gitdb.util import hex_to_bin +from .compat import binary_type from .objects.blob import Blob from .objects.util import mode_str_to_int @@ -245,18 +246,20 @@ class Diff(object): NULL_HEX_SHA = "0" * 40 NULL_BIN_SHA = b"\0" * 20 - __slots__ = ("a_blob", "b_blob", "a_mode", "b_mode", "a_path", "b_path", - "new_file", "deleted_file", "rename_from", "rename_to", "diff") + __slots__ = ("a_blob", "b_blob", "a_mode", "b_mode", "a_rawpath", "b_rawpath", + "new_file", "deleted_file", "raw_rename_from", "raw_rename_to", "diff") - def __init__(self, repo, a_path, b_path, a_blob_id, b_blob_id, a_mode, - b_mode, new_file, deleted_file, rename_from, - rename_to, diff): + def __init__(self, repo, a_rawpath, b_rawpath, a_blob_id, b_blob_id, a_mode, + b_mode, new_file, deleted_file, raw_rename_from, + raw_rename_to, diff): self.a_mode = a_mode self.b_mode = b_mode - self.a_path = a_path - self.b_path = b_path + assert a_rawpath is None or isinstance(a_rawpath, binary_type) + assert b_rawpath is None or isinstance(b_rawpath, binary_type) + self.a_rawpath = a_rawpath + self.b_rawpath = b_rawpath if self.a_mode: self.a_mode = mode_str_to_int(self.a_mode) @@ -266,19 +269,21 @@ def __init__(self, repo, a_path, b_path, a_blob_id, b_blob_id, a_mode, if a_blob_id is None or a_blob_id == self.NULL_HEX_SHA: self.a_blob = None else: - self.a_blob = Blob(repo, hex_to_bin(a_blob_id), mode=self.a_mode, path=a_path) + self.a_blob = Blob(repo, hex_to_bin(a_blob_id), mode=self.a_mode, path=self.a_path) if b_blob_id is None or b_blob_id == self.NULL_HEX_SHA: self.b_blob = None else: - self.b_blob = Blob(repo, hex_to_bin(b_blob_id), mode=self.b_mode, path=b_path) + self.b_blob = Blob(repo, hex_to_bin(b_blob_id), mode=self.b_mode, path=self.b_path) self.new_file = new_file self.deleted_file = deleted_file # be clear and use None instead of empty strings - self.rename_from = rename_from or None - self.rename_to = rename_to or None + assert raw_rename_from is None or isinstance(raw_rename_from, binary_type) + assert raw_rename_to is None or isinstance(raw_rename_to, binary_type) + self.raw_rename_from = raw_rename_from or None + self.raw_rename_to = raw_rename_to or None self.diff = diff @@ -344,6 +349,22 @@ def __str__(self): # end return res + @property + def a_path(self): + return self.a_rawpath.decode(defenc, 'replace') if self.a_rawpath else None + + @property + def b_path(self): + return self.b_rawpath.decode(defenc, 'replace') if self.b_rawpath else None + + @property + def rename_from(self): + return self.raw_rename_from.decode(defenc, 'replace') if self.raw_rename_from else None + + @property + def rename_to(self): + return self.raw_rename_to.decode(defenc, 'replace') if self.raw_rename_to else None + @property def renamed(self): """:returns: True if the blob of our diff has been renamed @@ -388,6 +409,7 @@ def _index_from_patch_format(cls, repo, stream): new_file_mode, deleted_file_mode, \ a_blob_id, b_blob_id, b_mode, \ a_path, b_path = header.groups() + new_file, deleted_file = bool(new_file_mode), bool(deleted_file_mode) a_path = cls._pick_best_path(a_path, rename_from, a_path_fallback) @@ -404,15 +426,15 @@ def _index_from_patch_format(cls, repo, stream): a_mode = old_mode or deleted_file_mode or (a_path and (b_mode or new_mode or new_file_mode)) b_mode = b_mode or new_mode or new_file_mode or (b_path and a_mode) index.append(Diff(repo, - a_path and a_path.decode(defenc, 'replace'), - b_path and b_path.decode(defenc, 'replace'), + a_path, + b_path, a_blob_id and a_blob_id.decode(defenc), b_blob_id and b_blob_id.decode(defenc), a_mode and a_mode.decode(defenc), b_mode and b_mode.decode(defenc), new_file, deleted_file, - rename_from and rename_from.decode(defenc, 'replace'), - rename_to and rename_to.decode(defenc, 'replace'), + rename_from, + rename_to, None)) previous_header = header @@ -438,8 +460,8 @@ def _index_from_raw_format(cls, repo, stream): meta, _, path = line[1:].partition('\t') old_mode, new_mode, a_blob_id, b_blob_id, change_type = meta.split(None, 4) path = path.strip() - a_path = path - b_path = path + a_path = path.encode(defenc) + b_path = path.encode(defenc) deleted_file = False new_file = False rename_from = None @@ -455,6 +477,8 @@ def _index_from_raw_format(cls, repo, stream): new_file = True elif change_type[0] == 'R': # parses RXXX, where XXX is a confidence value a_path, b_path = path.split('\t', 1) + a_path = a_path.encode(defenc) + b_path = b_path.encode(defenc) rename_from, rename_to = a_path, b_path # END add/remove handling diff --git a/git/test/test_diff.py b/git/test/test_diff.py index 8d189b121..ba0d2d13f 100644 --- a/git/test/test_diff.py +++ b/git/test/test_diff.py @@ -90,6 +90,8 @@ def test_diff_with_rename(self): assert_true(diff.renamed) assert_equal(diff.rename_from, u'Jérôme') assert_equal(diff.rename_to, u'müller') + assert_equal(diff.raw_rename_from, b'J\xc3\xa9r\xc3\xb4me') + assert_equal(diff.raw_rename_to, b'm\xc3\xbcller') assert isinstance(str(diff), str) output = StringProcessAdapter(fixture('diff_rename_raw')) @@ -129,7 +131,7 @@ def test_diff_index_raw_format(self): output = StringProcessAdapter(fixture('diff_index_raw')) res = Diff._index_from_raw_format(None, output.stdout) assert res[0].deleted_file - assert res[0].b_path == '' + assert res[0].b_path is None def test_diff_initial_commit(self): initial_commit = self.rorepo.commit('33ebe7acec14b25c5f84f35a664803fcab2f7781') @@ -162,7 +164,9 @@ def test_diff_unsafe_paths(self): self.assertEqual(res[7].b_path, u'path/with-question-mark?') self.assertEqual(res[8].b_path, u'path/¯\\_(ツ)_|¯') self.assertEqual(res[9].b_path, u'path/💩.txt') + self.assertEqual(res[9].b_rawpath, b'path/\xf0\x9f\x92\xa9.txt') self.assertEqual(res[10].b_path, u'path/�-invalid-unicode-path.txt') + self.assertEqual(res[10].b_rawpath, b'path/\x80-invalid-unicode-path.txt') # The "Moves" # NOTE: The path prefixes a/ and b/ here are legit! We're actually