Skip to content

Fix diff patch parser for paths with unsafe chars #415

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Apr 20, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 37 additions & 13 deletions git/diff.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,23 @@
NULL_TREE = object()


def decode_path(path, has_ab_prefix=True):
if path == b'/dev/null':
return None

if path.startswith(b'"') and path.endswith(b'"'):
path = (path[1:-1].replace(b'\\n', b'\n')
.replace(b'\\t', b'\t')
.replace(b'\\"', b'"')
.replace(b'\\\\', b'\\'))

if has_ab_prefix:
assert path.startswith(b'a/') or path.startswith(b'b/')
path = path[2:]

return path


class Diffable(object):

"""Common interface for all object that can be diffed against another object of compatible type.
Expand Down Expand Up @@ -196,9 +213,9 @@ class Diff(object):
be different to the version in the index or tree, and hence has been modified."""

# precompiled regex
re_header = re.compile(r"""
re_header = re.compile(br"""
^diff[ ]--git
[ ](?:a/)?(?P<a_path_fallback>.+?)[ ](?:b/)?(?P<b_path_fallback>.+?)\n
[ ](?P<a_path_fallback>"?a/.+?"?)[ ](?P<b_path_fallback>"?b/.+?"?)\n
(?:^old[ ]mode[ ](?P<old_mode>\d+)\n
^new[ ]mode[ ](?P<new_mode>\d+)(?:\n|$))?
(?:^similarity[ ]index[ ]\d+%\n
Expand All @@ -208,9 +225,9 @@ class Diff(object):
(?:^deleted[ ]file[ ]mode[ ](?P<deleted_file_mode>.+)(?:\n|$))?
(?:^index[ ](?P<a_blob_id>[0-9A-Fa-f]+)
\.\.(?P<b_blob_id>[0-9A-Fa-f]+)[ ]?(?P<b_mode>.+)?(?:\n|$))?
(?:^---[ ](?:a/)?(?P<a_path>[^\t\n\r\f\v]*)[\t\r\f\v]*(?:\n|$))?
(?:^\+\+\+[ ](?:b/)?(?P<b_path>[^\t\n\r\f\v]*)[\t\r\f\v]*(?:\n|$))?
""".encode('ascii'), re.VERBOSE | re.MULTILINE)
(?:^---[ ](?P<a_path>[^\t\n\r\f\v]*)[\t\r\f\v]*(?:\n|$))?
(?:^\+\+\+[ ](?P<b_path>[^\t\n\r\f\v]*)[\t\r\f\v]*(?:\n|$))?
""", re.VERBOSE | re.MULTILINE)
# can be used for comparisons
NULL_HEX_SHA = "0" * 40
NULL_BIN_SHA = b"\0" * 20
Expand Down Expand Up @@ -319,6 +336,19 @@ def renamed(self):
""":returns: True if the blob of our diff has been renamed"""
return self.rename_from != self.rename_to

@classmethod
def _pick_best_path(cls, path_match, rename_match, path_fallback_match):
if path_match:
return decode_path(path_match)

if rename_match:
return decode_path(rename_match, has_ab_prefix=False)

if path_fallback_match:
return decode_path(path_fallback_match)

return None

@classmethod
def _index_from_patch_format(cls, repo, stream):
"""Create a new DiffIndex from the given text which must be in patch format
Expand All @@ -338,14 +368,8 @@ def _index_from_patch_format(cls, repo, stream):
a_path, b_path = header.groups()
new_file, deleted_file = bool(new_file_mode), bool(deleted_file_mode)

a_path = a_path or rename_from or a_path_fallback
b_path = b_path or rename_to or b_path_fallback

if a_path == b'/dev/null':
a_path = None

if b_path == b'/dev/null':
b_path = None
a_path = cls._pick_best_path(a_path, rename_from, a_path_fallback)
b_path = cls._pick_best_path(b_path, rename_to, b_path_fallback)

# Our only means to find the actual text is to see what has not been matched by our regex,
# and then retro-actively assin it to our index
Expand Down
75 changes: 75 additions & 0 deletions git/test/fixtures/diff_patch_unsafe_paths
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
diff --git a/path/ starting with a space b/path/ starting with a space
new file mode 100644
index 0000000000000000000000000000000000000000..eaf5f7510320b6a327fb308379de2f94d8859a54
--- /dev/null
+++ b/path/ starting with a space
@@ -0,0 +1 @@
+dummy content
diff --git "a/path/\"with-quotes\"" "b/path/\"with-quotes\""
new file mode 100644
index 0000000000000000000000000000000000000000..eaf5f7510320b6a327fb308379de2f94d8859a54
--- /dev/null
+++ "b/path/\"with-quotes\""
@@ -0,0 +1 @@
+dummy content
diff --git a/path/'with-single-quotes' b/path/'with-single-quotes'
new file mode 100644
index 0000000000000000000000000000000000000000..eaf5f7510320b6a327fb308379de2f94d8859a54
--- /dev/null
+++ b/path/'with-single-quotes'
@@ -0,0 +1 @@
+dummy content
diff --git a/path/ending in a space b/path/ending in a space
new file mode 100644
index 0000000000000000000000000000000000000000..eaf5f7510320b6a327fb308379de2f94d8859a54
--- /dev/null
+++ b/path/ending in a space
@@ -0,0 +1 @@
+dummy content
diff --git "a/path/with\ttab" "b/path/with\ttab"
new file mode 100644
index 0000000000000000000000000000000000000000..eaf5f7510320b6a327fb308379de2f94d8859a54
--- /dev/null
+++ "b/path/with\ttab"
@@ -0,0 +1 @@
+dummy content
diff --git "a/path/with\nnewline" "b/path/with\nnewline"
new file mode 100644
index 0000000000000000000000000000000000000000..eaf5f7510320b6a327fb308379de2f94d8859a54
--- /dev/null
+++ "b/path/with\nnewline"
@@ -0,0 +1 @@
+dummy content
diff --git a/path/with spaces b/path/with spaces
new file mode 100644
index 0000000000000000000000000000000000000000..eaf5f7510320b6a327fb308379de2f94d8859a54
--- /dev/null
+++ b/path/with spaces
@@ -0,0 +1 @@
+dummy content
diff --git a/path/with-question-mark? b/path/with-question-mark?
new file mode 100644
index 0000000000000000000000000000000000000000..eaf5f7510320b6a327fb308379de2f94d8859a54
--- /dev/null
+++ b/path/with-question-mark?
@@ -0,0 +1 @@
+dummy content
diff --git "a/path/¯\\_(ツ)_|¯" "b/path/¯\\_(ツ)_|¯"
new file mode 100644
index 0000000000000000000000000000000000000000..eaf5f7510320b6a327fb308379de2f94d8859a54
--- /dev/null
+++ "b/path/¯\\_(ツ)_|¯"
@@ -0,0 +1 @@
+dummy content
diff --git a/a/with spaces b/b/with some spaces
similarity index 100%
rename from a/with spaces
rename to b/with some spaces
diff --git a/a/ending in a space b/b/ending with space
similarity index 100%
rename from a/ending in a space
rename to b/ending with space
diff --git "a/a/\"with-quotes\"" "b/b/\"with even more quotes\""
similarity index 100%
rename from "a/\"with-quotes\""
rename to "b/\"with even more quotes\""
29 changes: 27 additions & 2 deletions git/test/test_diff.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#-*-coding:utf-8-*-
# coding: utf-8
# test_diff.py
# Copyright (C) 2008, 2009 Michael Trier ([email protected]) and contributors
#
Expand Down Expand Up @@ -145,12 +145,37 @@ def test_diff_initial_commit(self):
assert diff_index[0].new_file
assert diff_index[0].diff == fixture('diff_initial')

def test_diff_unsafe_paths(self):
output = StringProcessAdapter(fixture('diff_patch_unsafe_paths'))
res = Diff._index_from_patch_format(None, output.stdout)

# The "Additions"
self.assertEqual(res[0].b_path, u'path/ starting with a space')
self.assertEqual(res[1].b_path, u'path/"with-quotes"')
self.assertEqual(res[2].b_path, u"path/'with-single-quotes'")
self.assertEqual(res[3].b_path, u'path/ending in a space ')
self.assertEqual(res[4].b_path, u'path/with\ttab')
self.assertEqual(res[5].b_path, u'path/with\nnewline')
self.assertEqual(res[6].b_path, u'path/with spaces')
self.assertEqual(res[7].b_path, u'path/with-question-mark?')
self.assertEqual(res[8].b_path, u'path/¯\\_(ツ)_|¯')

# The "Moves"
# NOTE: The path prefixes a/ and b/ here are legit! We're actually
# verifying that it's not "a/a/" that shows up, see the fixture data.
self.assertEqual(res[9].a_path, u'a/with spaces') # NOTE: path a/ here legit!
self.assertEqual(res[9].b_path, u'b/with some spaces') # NOTE: path b/ here legit!
self.assertEqual(res[10].a_path, u'a/ending in a space ')
self.assertEqual(res[10].b_path, u'b/ending with space ')
self.assertEqual(res[11].a_path, u'a/"with-quotes"')
self.assertEqual(res[11].b_path, u'b/"with even more quotes"')

def test_diff_patch_format(self):
# test all of the 'old' format diffs for completness - it should at least
# be able to deal with it
fixtures = ("diff_2", "diff_2f", "diff_f", "diff_i", "diff_mode_only",
"diff_new_mode", "diff_numstat", "diff_p", "diff_rename",
"diff_tree_numstat_root")
"diff_tree_numstat_root", "diff_patch_unsafe_paths")

for fixture_name in fixtures:
diff_proc = StringProcessAdapter(fixture(fixture_name))
Expand Down