Skip to content

Commit 8bbf1a3

Browse files
committed
Merge pull request #412 from nvie/fix-diff-patch-parsing
Fix diff patch parsing
2 parents 0d7a40f + 1445b59 commit 8bbf1a3

File tree

5 files changed

+58
-19
lines changed

5 files changed

+58
-19
lines changed

doc/source/changes.rst

+17-3
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,23 @@ Changelog
55
1.0.3 - Fixes
66
=============
77

8-
* `Commit.diff()` now supports diffing the root commit via `Commit.diff(NULL_TREE)`.
9-
* `Repo.blame()` now respects `incremental=True`, supporting incremental blames. Incremental blames are slightly faster since they don't include the file's contents in them.
10-
* IMPORTANT: This release drops support for python 2.6, which is officially deprecated by the python maintainers.
8+
* `Commit.diff()` now supports diffing the root commit via
9+
`Commit.diff(NULL_TREE)`.
10+
* `Repo.blame()` now respects `incremental=True`, supporting incremental
11+
blames. Incremental blames are slightly faster since they don't include
12+
the file's contents in them.
13+
* Fix: `Diff` objects created with patch output will now have their
14+
`a_path` and `b_path` properties parsed out correctly. Previously, some
15+
values may have been populated incorrectly when a file was added or
16+
deleted.
17+
* IMPORTANT: This release drops support for python 2.6, which is
18+
officially deprecated by the python maintainers.
19+
* CRITICAL: `Diff` objects created with patch output will now not carry
20+
the --- and +++ header lines anymore. All diffs now start with the
21+
@@ header line directly. Users that rely on the old behaviour can now
22+
(reliably) read this information from the a_path and b_path properties
23+
without having to parse these lines manually.
24+
1125

1226
1.0.2 - Fixes
1327
=============

git/diff.py

+24-11
Original file line numberDiff line numberDiff line change
@@ -198,16 +198,18 @@ class Diff(object):
198198
# precompiled regex
199199
re_header = re.compile(r"""
200200
^diff[ ]--git
201-
[ ](?:a/)?(?P<a_path>.+?)[ ](?:b/)?(?P<b_path>.+?)\n
202-
(?:^similarity[ ]index[ ](?P<similarity_index>\d+)%\n
203-
^rename[ ]from[ ](?P<rename_from>\S+)\n
204-
^rename[ ]to[ ](?P<rename_to>\S+)(?:\n|$))?
201+
[ ](?:a/)?(?P<a_path_fallback>.+?)[ ](?:b/)?(?P<b_path_fallback>.+?)\n
205202
(?:^old[ ]mode[ ](?P<old_mode>\d+)\n
206203
^new[ ]mode[ ](?P<new_mode>\d+)(?:\n|$))?
204+
(?:^similarity[ ]index[ ]\d+%\n
205+
^rename[ ]from[ ](?P<rename_from>.*)\n
206+
^rename[ ]to[ ](?P<rename_to>.*)(?:\n|$))?
207207
(?:^new[ ]file[ ]mode[ ](?P<new_file_mode>.+)(?:\n|$))?
208208
(?:^deleted[ ]file[ ]mode[ ](?P<deleted_file_mode>.+)(?:\n|$))?
209209
(?:^index[ ](?P<a_blob_id>[0-9A-Fa-f]+)
210210
\.\.(?P<b_blob_id>[0-9A-Fa-f]+)[ ]?(?P<b_mode>.+)?(?:\n|$))?
211+
(?:^---[ ](?:a/)?(?P<a_path>[^\t\n\r\f\v]*)[\t\r\f\v]*(?:\n|$))?
212+
(?:^\+\+\+[ ](?:b/)?(?P<b_path>[^\t\n\r\f\v]*)[\t\r\f\v]*(?:\n|$))?
211213
""".encode('ascii'), re.VERBOSE | re.MULTILINE)
212214
# can be used for comparisons
213215
NULL_HEX_SHA = "0" * 40
@@ -231,15 +233,14 @@ def __init__(self, repo, a_path, b_path, a_blob_id, b_blob_id, a_mode,
231233
if self.b_mode:
232234
self.b_mode = mode_str_to_int(self.b_mode)
233235

234-
if a_blob_id is None:
236+
if a_blob_id is None or a_blob_id == self.NULL_HEX_SHA:
235237
self.a_blob = None
236238
else:
237-
assert self.a_mode is not None
238239
self.a_blob = Blob(repo, hex_to_bin(a_blob_id), mode=self.a_mode, path=a_path)
239-
if b_blob_id is None:
240+
241+
if b_blob_id is None or b_blob_id == self.NULL_HEX_SHA:
240242
self.b_blob = None
241243
else:
242-
assert self.b_mode is not None
243244
self.b_blob = Blob(repo, hex_to_bin(b_blob_id), mode=self.b_mode, path=b_path)
244245

245246
self.new_file = new_file
@@ -329,11 +330,23 @@ def _index_from_patch_format(cls, repo, stream):
329330
index = DiffIndex()
330331
previous_header = None
331332
for header in cls.re_header.finditer(text):
332-
a_path, b_path, similarity_index, rename_from, rename_to, \
333-
old_mode, new_mode, new_file_mode, deleted_file_mode, \
334-
a_blob_id, b_blob_id, b_mode = header.groups()
333+
a_path_fallback, b_path_fallback, \
334+
old_mode, new_mode, \
335+
rename_from, rename_to, \
336+
new_file_mode, deleted_file_mode, \
337+
a_blob_id, b_blob_id, b_mode, \
338+
a_path, b_path = header.groups()
335339
new_file, deleted_file = bool(new_file_mode), bool(deleted_file_mode)
336340

341+
a_path = a_path or rename_from or a_path_fallback
342+
b_path = b_path or rename_to or b_path_fallback
343+
344+
if a_path == b'/dev/null':
345+
a_path = None
346+
347+
if b_path == b'/dev/null':
348+
b_path = None
349+
337350
# Our only means to find the actual text is to see what has not been matched by our regex,
338351
# and then retro-actively assin it to our index
339352
if previous_header is not None:
+7
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
diff --git a/file with spaces b/file with spaces
2+
new file mode 100644
3+
index 0000000000000000000000000000000000000000..75c620d7b0d3b0100415421a97f553c979d75174
4+
--- /dev/null
5+
+++ b/file with spaces
6+
@@ -0,0 +1 @@
7+
+ohai

git/test/fixtures/diff_initial

-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
--- /dev/null
2-
+++ b/CHANGES
31
@@ -0,0 +1,7 @@
42
+=======
53
+CHANGES

git/test/test_diff.py

+10-3
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ def test_list_from_string_new_mode(self):
7676
self._assert_diff_format(diffs)
7777

7878
assert_equal(1, len(diffs))
79-
assert_equal(10, len(diffs[0].diff.splitlines()))
79+
assert_equal(8, len(diffs[0].diff.splitlines()))
8080

8181
def test_diff_with_rename(self):
8282
output = StringProcessAdapter(fixture('diff_rename'))
@@ -116,7 +116,7 @@ def test_diff_index(self):
116116
res = Diff._index_from_patch_format(None, output.stdout)
117117
assert len(res) == 6
118118
for dr in res:
119-
assert dr.diff
119+
assert dr.diff.startswith(b'@@')
120120
assert str(dr), "Diff to string conversion should be possible"
121121
# end for each diff
122122

@@ -140,7 +140,8 @@ def test_diff_initial_commit(self):
140140

141141
# ...and with creating a patch
142142
diff_index = initial_commit.diff(NULL_TREE, create_patch=True)
143-
assert diff_index[0].b_path == 'CHANGES'
143+
assert diff_index[0].a_path is None, repr(diff_index[0].a_path)
144+
assert diff_index[0].b_path == 'CHANGES', repr(diff_index[0].b_path)
144145
assert diff_index[0].new_file
145146
assert diff_index[0].diff == fixture('diff_initial')
146147

@@ -156,6 +157,12 @@ def test_diff_patch_format(self):
156157
Diff._index_from_patch_format(self.rorepo, diff_proc.stdout)
157158
# END for each fixture
158159

160+
def test_diff_with_spaces(self):
161+
data = StringProcessAdapter(fixture('diff_file_with_spaces'))
162+
diff_index = Diff._index_from_patch_format(self.rorepo, data.stdout)
163+
assert diff_index[0].a_path is None, repr(diff_index[0].a_path)
164+
assert diff_index[0].b_path == u'file with spaces', repr(diff_index[0].b_path)
165+
159166
def test_diff_interface(self):
160167
# test a few variations of the main diff routine
161168
assertion_map = dict()

0 commit comments

Comments
 (0)