Skip to content

Store raw path bytes in Diff instances #474

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jun 20, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions doc/source/changes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@ Changelog
2.0.6 - Fixes and Features
==========================

* API: Diffs now have `a_rawpath`, `b_rawpath`, `raw_rename_from`,
`raw_rename_to` properties, which are the raw-bytes equivalents of their
unicode path counterparts.
* Fix: TypeError about passing keyword argument to string decode() on
Python 2.6.
* Feature: `setUrl API on Remotes <https://github.com/gitpython-developers/GitPython/pull/446#issuecomment-224670539>`_
Expand Down
2 changes: 2 additions & 0 deletions git/compat.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ def mviter(d):
return d.values()
range = xrange
unicode = str
binary_type = bytes
else:
FileType = file
# usually, this is just ascii, which might not enough for our encoding needs
Expand All @@ -44,6 +45,7 @@ def mviter(d):
byte_ord = ord
bchr = chr
unicode = unicode
binary_type = str
range = xrange
def mviter(d):
return d.itervalues()
Expand Down
58 changes: 41 additions & 17 deletions git/diff.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

from gitdb.util import hex_to_bin

from .compat import binary_type
from .objects.blob import Blob
from .objects.util import mode_str_to_int

Expand Down Expand Up @@ -245,18 +246,20 @@ class Diff(object):
NULL_HEX_SHA = "0" * 40
NULL_BIN_SHA = b"\0" * 20

__slots__ = ("a_blob", "b_blob", "a_mode", "b_mode", "a_path", "b_path",
"new_file", "deleted_file", "rename_from", "rename_to", "diff")
__slots__ = ("a_blob", "b_blob", "a_mode", "b_mode", "a_rawpath", "b_rawpath",
"new_file", "deleted_file", "raw_rename_from", "raw_rename_to", "diff")

def __init__(self, repo, a_path, b_path, a_blob_id, b_blob_id, a_mode,
b_mode, new_file, deleted_file, rename_from,
rename_to, diff):
def __init__(self, repo, a_rawpath, b_rawpath, a_blob_id, b_blob_id, a_mode,
b_mode, new_file, deleted_file, raw_rename_from,
raw_rename_to, diff):

self.a_mode = a_mode
self.b_mode = b_mode

self.a_path = a_path
self.b_path = b_path
assert a_rawpath is None or isinstance(a_rawpath, binary_type)
assert b_rawpath is None or isinstance(b_rawpath, binary_type)
self.a_rawpath = a_rawpath
self.b_rawpath = b_rawpath

if self.a_mode:
self.a_mode = mode_str_to_int(self.a_mode)
Expand All @@ -266,19 +269,21 @@ def __init__(self, repo, a_path, b_path, a_blob_id, b_blob_id, a_mode,
if a_blob_id is None or a_blob_id == self.NULL_HEX_SHA:
self.a_blob = None
else:
self.a_blob = Blob(repo, hex_to_bin(a_blob_id), mode=self.a_mode, path=a_path)
self.a_blob = Blob(repo, hex_to_bin(a_blob_id), mode=self.a_mode, path=self.a_path)

if b_blob_id is None or b_blob_id == self.NULL_HEX_SHA:
self.b_blob = None
else:
self.b_blob = Blob(repo, hex_to_bin(b_blob_id), mode=self.b_mode, path=b_path)
self.b_blob = Blob(repo, hex_to_bin(b_blob_id), mode=self.b_mode, path=self.b_path)

self.new_file = new_file
self.deleted_file = deleted_file

# be clear and use None instead of empty strings
self.rename_from = rename_from or None
self.rename_to = rename_to or None
assert raw_rename_from is None or isinstance(raw_rename_from, binary_type)
assert raw_rename_to is None or isinstance(raw_rename_to, binary_type)
self.raw_rename_from = raw_rename_from or None
self.raw_rename_to = raw_rename_to or None

self.diff = diff

Expand Down Expand Up @@ -344,6 +349,22 @@ def __str__(self):
# end
return res

@property
def a_path(self):
return self.a_rawpath.decode(defenc, 'replace') if self.a_rawpath else None

@property
def b_path(self):
return self.b_rawpath.decode(defenc, 'replace') if self.b_rawpath else None

@property
def rename_from(self):
return self.raw_rename_from.decode(defenc, 'replace') if self.raw_rename_from else None

@property
def rename_to(self):
return self.raw_rename_to.decode(defenc, 'replace') if self.raw_rename_to else None

@property
def renamed(self):
""":returns: True if the blob of our diff has been renamed
Expand Down Expand Up @@ -388,6 +409,7 @@ def _index_from_patch_format(cls, repo, stream):
new_file_mode, deleted_file_mode, \
a_blob_id, b_blob_id, b_mode, \
a_path, b_path = header.groups()

new_file, deleted_file = bool(new_file_mode), bool(deleted_file_mode)

a_path = cls._pick_best_path(a_path, rename_from, a_path_fallback)
Expand All @@ -404,15 +426,15 @@ def _index_from_patch_format(cls, repo, stream):
a_mode = old_mode or deleted_file_mode or (a_path and (b_mode or new_mode or new_file_mode))
b_mode = b_mode or new_mode or new_file_mode or (b_path and a_mode)
index.append(Diff(repo,
a_path and a_path.decode(defenc, 'replace'),
b_path and b_path.decode(defenc, 'replace'),
a_path,
b_path,
a_blob_id and a_blob_id.decode(defenc),
b_blob_id and b_blob_id.decode(defenc),
a_mode and a_mode.decode(defenc),
b_mode and b_mode.decode(defenc),
new_file, deleted_file,
rename_from and rename_from.decode(defenc, 'replace'),
rename_to and rename_to.decode(defenc, 'replace'),
rename_from,
rename_to,
None))

previous_header = header
Expand All @@ -438,8 +460,8 @@ def _index_from_raw_format(cls, repo, stream):
meta, _, path = line[1:].partition('\t')
old_mode, new_mode, a_blob_id, b_blob_id, change_type = meta.split(None, 4)
path = path.strip()
a_path = path
b_path = path
a_path = path.encode(defenc)
b_path = path.encode(defenc)
deleted_file = False
new_file = False
rename_from = None
Expand All @@ -455,6 +477,8 @@ def _index_from_raw_format(cls, repo, stream):
new_file = True
elif change_type[0] == 'R': # parses RXXX, where XXX is a confidence value
a_path, b_path = path.split('\t', 1)
a_path = a_path.encode(defenc)
b_path = b_path.encode(defenc)
rename_from, rename_to = a_path, b_path
# END add/remove handling

Expand Down
6 changes: 5 additions & 1 deletion git/test/test_diff.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,8 @@ def test_diff_with_rename(self):
assert_true(diff.renamed)
assert_equal(diff.rename_from, u'Jérôme')
assert_equal(diff.rename_to, u'müller')
assert_equal(diff.raw_rename_from, b'J\xc3\xa9r\xc3\xb4me')
assert_equal(diff.raw_rename_to, b'm\xc3\xbcller')
assert isinstance(str(diff), str)

output = StringProcessAdapter(fixture('diff_rename_raw'))
Expand Down Expand Up @@ -129,7 +131,7 @@ def test_diff_index_raw_format(self):
output = StringProcessAdapter(fixture('diff_index_raw'))
res = Diff._index_from_raw_format(None, output.stdout)
assert res[0].deleted_file
assert res[0].b_path == ''
assert res[0].b_path is None

def test_diff_initial_commit(self):
initial_commit = self.rorepo.commit('33ebe7acec14b25c5f84f35a664803fcab2f7781')
Expand Down Expand Up @@ -162,7 +164,9 @@ def test_diff_unsafe_paths(self):
self.assertEqual(res[7].b_path, u'path/with-question-mark?')
self.assertEqual(res[8].b_path, u'path/¯\\_(ツ)_|¯')
self.assertEqual(res[9].b_path, u'path/💩.txt')
self.assertEqual(res[9].b_rawpath, b'path/\xf0\x9f\x92\xa9.txt')
self.assertEqual(res[10].b_path, u'path/�-invalid-unicode-path.txt')
self.assertEqual(res[10].b_rawpath, b'path/\x80-invalid-unicode-path.txt')

# The "Moves"
# NOTE: The path prefixes a/ and b/ here are legit! We're actually
Expand Down