diff --git a/git/diff.py b/git/diff.py index d4affd302..764269408 100644 --- a/git/diff.py +++ b/git/diff.py @@ -22,6 +22,23 @@ NULL_TREE = object() +def decode_path(path, has_ab_prefix=True): + if path == b'/dev/null': + return None + + if path.startswith(b'"') and path.endswith(b'"'): + path = (path[1:-1].replace(b'\\n', b'\n') + .replace(b'\\t', b'\t') + .replace(b'\\"', b'"') + .replace(b'\\\\', b'\\')) + + if has_ab_prefix: + assert path.startswith(b'a/') or path.startswith(b'b/') + path = path[2:] + + return path + + class Diffable(object): """Common interface for all object that can be diffed against another object of compatible type. @@ -196,9 +213,9 @@ class Diff(object): be different to the version in the index or tree, and hence has been modified.""" # precompiled regex - re_header = re.compile(r""" + re_header = re.compile(br""" ^diff[ ]--git - [ ](?:a/)?(?P.+?)[ ](?:b/)?(?P.+?)\n + [ ](?P"?a/.+?"?)[ ](?P"?b/.+?"?)\n (?:^old[ ]mode[ ](?P\d+)\n ^new[ ]mode[ ](?P\d+)(?:\n|$))? (?:^similarity[ ]index[ ]\d+%\n @@ -208,9 +225,9 @@ class Diff(object): (?:^deleted[ ]file[ ]mode[ ](?P.+)(?:\n|$))? (?:^index[ ](?P[0-9A-Fa-f]+) \.\.(?P[0-9A-Fa-f]+)[ ]?(?P.+)?(?:\n|$))? - (?:^---[ ](?:a/)?(?P[^\t\n\r\f\v]*)[\t\r\f\v]*(?:\n|$))? - (?:^\+\+\+[ ](?:b/)?(?P[^\t\n\r\f\v]*)[\t\r\f\v]*(?:\n|$))? - """.encode('ascii'), re.VERBOSE | re.MULTILINE) + (?:^---[ ](?P[^\t\n\r\f\v]*)[\t\r\f\v]*(?:\n|$))? + (?:^\+\+\+[ ](?P[^\t\n\r\f\v]*)[\t\r\f\v]*(?:\n|$))? + """, re.VERBOSE | re.MULTILINE) # can be used for comparisons NULL_HEX_SHA = "0" * 40 NULL_BIN_SHA = b"\0" * 20 @@ -319,6 +336,19 @@ def renamed(self): """:returns: True if the blob of our diff has been renamed""" return self.rename_from != self.rename_to + @classmethod + def _pick_best_path(cls, path_match, rename_match, path_fallback_match): + if path_match: + return decode_path(path_match) + + if rename_match: + return decode_path(rename_match, has_ab_prefix=False) + + if path_fallback_match: + return decode_path(path_fallback_match) + + return None + @classmethod def _index_from_patch_format(cls, repo, stream): """Create a new DiffIndex from the given text which must be in patch format @@ -338,14 +368,8 @@ def _index_from_patch_format(cls, repo, stream): a_path, b_path = header.groups() new_file, deleted_file = bool(new_file_mode), bool(deleted_file_mode) - a_path = a_path or rename_from or a_path_fallback - b_path = b_path or rename_to or b_path_fallback - - if a_path == b'/dev/null': - a_path = None - - if b_path == b'/dev/null': - b_path = None + a_path = cls._pick_best_path(a_path, rename_from, a_path_fallback) + b_path = cls._pick_best_path(b_path, rename_to, b_path_fallback) # Our only means to find the actual text is to see what has not been matched by our regex, # and then retro-actively assin it to our index diff --git a/git/test/fixtures/diff_patch_unsafe_paths b/git/test/fixtures/diff_patch_unsafe_paths new file mode 100644 index 000000000..14375f791 --- /dev/null +++ b/git/test/fixtures/diff_patch_unsafe_paths @@ -0,0 +1,75 @@ +diff --git a/path/ starting with a space b/path/ starting with a space +new file mode 100644 +index 0000000000000000000000000000000000000000..eaf5f7510320b6a327fb308379de2f94d8859a54 +--- /dev/null ++++ b/path/ starting with a space +@@ -0,0 +1 @@ ++dummy content +diff --git "a/path/\"with-quotes\"" "b/path/\"with-quotes\"" +new file mode 100644 +index 0000000000000000000000000000000000000000..eaf5f7510320b6a327fb308379de2f94d8859a54 +--- /dev/null ++++ "b/path/\"with-quotes\"" +@@ -0,0 +1 @@ ++dummy content +diff --git a/path/'with-single-quotes' b/path/'with-single-quotes' +new file mode 100644 +index 0000000000000000000000000000000000000000..eaf5f7510320b6a327fb308379de2f94d8859a54 +--- /dev/null ++++ b/path/'with-single-quotes' +@@ -0,0 +1 @@ ++dummy content +diff --git a/path/ending in a space b/path/ending in a space +new file mode 100644 +index 0000000000000000000000000000000000000000..eaf5f7510320b6a327fb308379de2f94d8859a54 +--- /dev/null ++++ b/path/ending in a space +@@ -0,0 +1 @@ ++dummy content +diff --git "a/path/with\ttab" "b/path/with\ttab" +new file mode 100644 +index 0000000000000000000000000000000000000000..eaf5f7510320b6a327fb308379de2f94d8859a54 +--- /dev/null ++++ "b/path/with\ttab" +@@ -0,0 +1 @@ ++dummy content +diff --git "a/path/with\nnewline" "b/path/with\nnewline" +new file mode 100644 +index 0000000000000000000000000000000000000000..eaf5f7510320b6a327fb308379de2f94d8859a54 +--- /dev/null ++++ "b/path/with\nnewline" +@@ -0,0 +1 @@ ++dummy content +diff --git a/path/with spaces b/path/with spaces +new file mode 100644 +index 0000000000000000000000000000000000000000..eaf5f7510320b6a327fb308379de2f94d8859a54 +--- /dev/null ++++ b/path/with spaces +@@ -0,0 +1 @@ ++dummy content +diff --git a/path/with-question-mark? b/path/with-question-mark? +new file mode 100644 +index 0000000000000000000000000000000000000000..eaf5f7510320b6a327fb308379de2f94d8859a54 +--- /dev/null ++++ b/path/with-question-mark? +@@ -0,0 +1 @@ ++dummy content +diff --git "a/path/¯\\_(ツ)_|¯" "b/path/¯\\_(ツ)_|¯" +new file mode 100644 +index 0000000000000000000000000000000000000000..eaf5f7510320b6a327fb308379de2f94d8859a54 +--- /dev/null ++++ "b/path/¯\\_(ツ)_|¯" +@@ -0,0 +1 @@ ++dummy content +diff --git a/a/with spaces b/b/with some spaces +similarity index 100% +rename from a/with spaces +rename to b/with some spaces +diff --git a/a/ending in a space b/b/ending with space +similarity index 100% +rename from a/ending in a space +rename to b/ending with space +diff --git "a/a/\"with-quotes\"" "b/b/\"with even more quotes\"" +similarity index 100% +rename from "a/\"with-quotes\"" +rename to "b/\"with even more quotes\"" diff --git a/git/test/test_diff.py b/git/test/test_diff.py index 0c670f0b3..858b39943 100644 --- a/git/test/test_diff.py +++ b/git/test/test_diff.py @@ -1,4 +1,4 @@ -#-*-coding:utf-8-*- +# coding: utf-8 # test_diff.py # Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors # @@ -145,12 +145,37 @@ def test_diff_initial_commit(self): assert diff_index[0].new_file assert diff_index[0].diff == fixture('diff_initial') + def test_diff_unsafe_paths(self): + output = StringProcessAdapter(fixture('diff_patch_unsafe_paths')) + res = Diff._index_from_patch_format(None, output.stdout) + + # The "Additions" + self.assertEqual(res[0].b_path, u'path/ starting with a space') + self.assertEqual(res[1].b_path, u'path/"with-quotes"') + self.assertEqual(res[2].b_path, u"path/'with-single-quotes'") + self.assertEqual(res[3].b_path, u'path/ending in a space ') + self.assertEqual(res[4].b_path, u'path/with\ttab') + self.assertEqual(res[5].b_path, u'path/with\nnewline') + self.assertEqual(res[6].b_path, u'path/with spaces') + self.assertEqual(res[7].b_path, u'path/with-question-mark?') + self.assertEqual(res[8].b_path, u'path/¯\\_(ツ)_|¯') + + # The "Moves" + # NOTE: The path prefixes a/ and b/ here are legit! We're actually + # verifying that it's not "a/a/" that shows up, see the fixture data. + self.assertEqual(res[9].a_path, u'a/with spaces') # NOTE: path a/ here legit! + self.assertEqual(res[9].b_path, u'b/with some spaces') # NOTE: path b/ here legit! + self.assertEqual(res[10].a_path, u'a/ending in a space ') + self.assertEqual(res[10].b_path, u'b/ending with space ') + self.assertEqual(res[11].a_path, u'a/"with-quotes"') + self.assertEqual(res[11].b_path, u'b/"with even more quotes"') + def test_diff_patch_format(self): # test all of the 'old' format diffs for completness - it should at least # be able to deal with it fixtures = ("diff_2", "diff_2f", "diff_f", "diff_i", "diff_mode_only", "diff_new_mode", "diff_numstat", "diff_p", "diff_rename", - "diff_tree_numstat_root") + "diff_tree_numstat_root", "diff_patch_unsafe_paths") for fixture_name in fixtures: diff_proc = StringProcessAdapter(fixture(fixture_name))