From 65044284c27d9e0520260b258d046d8c3f24ab37 Mon Sep 17 00:00:00 2001 From: Ket3r Date: Wed, 29 Sep 2021 21:04:14 +0200 Subject: [PATCH 1/4] Fix broken test requirements The ddt package changed the function signature in version 1.4.3 from idata(iterable) to idata(iterable, index_len). Hopefully this was just a mistake and the new argument will be optional in future versions (see issue datadriventests/ddt#97) --- test-requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test-requirements.txt b/test-requirements.txt index deaafe214..d5d2346a0 100644 --- a/test-requirements.txt +++ b/test-requirements.txt @@ -1,4 +1,4 @@ -ddt>=1.1.1 +ddt>=1.1.1, !=1.4.3 mypy flake8 From 9cf0c74abf5c5622d4b25d25bc9d9cc8102d3655 Mon Sep 17 00:00:00 2001 From: Peter Kempter Date: Wed, 29 Sep 2021 12:08:30 +0200 Subject: [PATCH 2/4] Add failing unit test --- test/test_commit.py | 46 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/test/test_commit.py b/test/test_commit.py index 67dc7d732..5aeef2e6c 100644 --- a/test/test_commit.py +++ b/test/test_commit.py @@ -4,6 +4,7 @@ # # This module is part of GitPython and is released under # the BSD License: http://www.opensource.org/licenses/bsd-license.php +import copy from datetime import datetime from io import BytesIO import re @@ -429,3 +430,48 @@ def test_datetimes(self): datetime(2009, 10, 8, 20, 22, 51, tzinfo=tzoffset(-7200))) self.assertEqual(commit.committed_datetime, datetime(2009, 10, 8, 18, 22, 51, tzinfo=utc), commit.committed_datetime) + + def test_trailers(self): + KEY_1 = "Hello" + VALUE_1 = "World" + KEY_2 = "Key" + VALUE_2 = "Value" + + # Check if KEY 1 & 2 with Value 1 & 2 is extracted from multiple msg variations + msgs = [] + msgs.append(f"Subject\n\n{KEY_1}: {VALUE_1}\n{KEY_2}: {VALUE_2}\n") + msgs.append(f"Subject\n \nSome body of a function\n \n{KEY_1}: {VALUE_1}\n{KEY_2}: {VALUE_2}\n") + msgs.append(f"Subject\n \nSome body of a function\n\nnon-key: non-value\n\n{KEY_1}: {VALUE_1}\n{KEY_2}: {VALUE_2}\n") + msgs.append(f"Subject\n \nSome multiline\n body of a function\n\nnon-key: non-value\n\n{KEY_1}: {VALUE_1}\n{KEY_2}: {VALUE_2}\n") + + for msg in msgs: + commit = self.rorepo.commit('master') + commit = copy.copy(commit) + commit.message = msg + assert KEY_1 in commit.trailers.keys() + assert KEY_2 in commit.trailers.keys() + assert commit.trailers[KEY_1] == VALUE_1 + assert commit.trailers[KEY_2] == VALUE_2 + + # Check that trailer stays empty for multiple msg combinations + msgs = [] + msgs.append(f"Subject\n") + msgs.append(f"Subject\n\nBody with some\nText\n") + msgs.append(f"Subject\n\nBody with\nText\n\nContinuation but\n doesn't contain colon\n") + msgs.append(f"Subject\n\nBody with\nText\n\nContinuation but\n only contains one :\n") + msgs.append(f"Subject\n\nBody with\nText\n\nKey: Value\nLine without colon\n") + msgs.append(f"Subject\n\nBody with\nText\n\nLine without colon\nKey: Value\n") + + for msg in msgs: + commit = self.rorepo.commit('master') + commit = copy.copy(commit) + commit.message = msg + assert len(commit.trailers.keys()) == 0 + + # check that only the last key value paragraph is evaluated + commit = self.rorepo.commit('master') + commit = copy.copy(commit) + commit.message = f"Subject\n\nMultiline\nBody\n\n{KEY_1}: {VALUE_1}\n\n{KEY_2}: {VALUE_2}\n" + assert KEY_1 not in commit.trailers.keys() + assert KEY_2 in commit.trailers.keys() + assert commit.trailers[KEY_2] == VALUE_2 From acdc05eb2e4c0344237b6e8cea9d2c27dca261ed Mon Sep 17 00:00:00 2001 From: Peter Kempter Date: Wed, 29 Sep 2021 12:08:56 +0200 Subject: [PATCH 3/4] Add trailer as commit property With the command `git interpret-trailers` git provides a way to interact with trailer lines in the commit messages that look similar to RFC 822 e-mail headers (see: https://git-scm.com/docs/git-interpret-trailers). The new property returns those parsed trailer lines from the message as dictionary. --- git/objects/commit.py | 41 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 40 insertions(+), 1 deletion(-) diff --git a/git/objects/commit.py b/git/objects/commit.py index b36cd46d2..780461a0c 100644 --- a/git/objects/commit.py +++ b/git/objects/commit.py @@ -4,6 +4,7 @@ # This module is part of GitPython and is released under # the BSD License: http://www.opensource.org/licenses/bsd-license.php import datetime +import re from subprocess import Popen from gitdb import IStream from git.util import ( @@ -39,7 +40,7 @@ # typing ------------------------------------------------------------------ -from typing import Any, IO, Iterator, List, Sequence, Tuple, Union, TYPE_CHECKING, cast +from typing import Any, IO, Iterator, List, Sequence, Tuple, Union, TYPE_CHECKING, cast, Dict from git.types import PathLike, Literal @@ -315,6 +316,44 @@ def stats(self) -> Stats: text = self.repo.git.diff(self.parents[0].hexsha, self.hexsha, '--', numstat=True) return Stats._list_from_string(self.repo, text) + @property + def trailers(self) -> Dict: + """Get the trailers of the message as dictionary + + Git messages can contain trailer information that are similar to RFC 822 + e-mail headers (see: https://git-scm.com/docs/git-interpret-trailers). + + The trailer is thereby the last paragraph (seperated by a empty line + from the subject/body). This trailer paragraph must contain a ``:`` as + seperator for key and value in every line. + + Valid message with trailer: + + .. code-block:: + + Subject line + + some body information + + another information + + key1: value1 + key2: value2 + + :return: Dictionary containing whitespace stripped trailer information + """ + d: Dict[str, str] = {} + match = re.search(r".+^\s*$\n([\w\n\s:]+?)\s*\Z", str(self.message), re.MULTILINE | re.DOTALL) + if match is None: + return d + last_paragraph = match.group(1) + if not all(':' in line for line in last_paragraph.split('\n')): + return d + for line in last_paragraph.split('\n'): + key, value = line.split(':', 1) + d[key.strip()] = value.strip() + return d + @ classmethod def _iter_from_process_or_stream(cls, repo: 'Repo', proc_or_stream: Union[Popen, IO]) -> Iterator['Commit']: """Parse out commit information into a list of Commit objects From 3f799cef9b2a20d2791229dfc01249921a0eea21 Mon Sep 17 00:00:00 2001 From: Ket3r Date: Thu, 30 Sep 2021 16:07:05 +0200 Subject: [PATCH 4/4] Use git interpret-trailers for trailers property The whitespace handling and trailer selection isn't very trivial or good documented. It therefore seemed easier and less error prone to just call git to parse the message for the trailers section and remove superfluos whitespaces. --- git/objects/commit.py | 43 ++++++++++++++++++++++++++----------------- test/test_commit.py | 4 ++-- 2 files changed, 28 insertions(+), 19 deletions(-) diff --git a/git/objects/commit.py b/git/objects/commit.py index 780461a0c..bbd485da8 100644 --- a/git/objects/commit.py +++ b/git/objects/commit.py @@ -4,8 +4,7 @@ # This module is part of GitPython and is released under # the BSD License: http://www.opensource.org/licenses/bsd-license.php import datetime -import re -from subprocess import Popen +from subprocess import Popen, PIPE from gitdb import IStream from git.util import ( hex_to_bin, @@ -14,6 +13,7 @@ finalize_process ) from git.diff import Diffable +from git.cmd import Git from .tree import Tree from . import base @@ -322,10 +322,10 @@ def trailers(self) -> Dict: Git messages can contain trailer information that are similar to RFC 822 e-mail headers (see: https://git-scm.com/docs/git-interpret-trailers). - - The trailer is thereby the last paragraph (seperated by a empty line - from the subject/body). This trailer paragraph must contain a ``:`` as - seperator for key and value in every line. + + This funcions calls ``git interpret-trailers --parse`` onto the message + to extract the trailer information. The key value pairs are stripped of + leading and trailing whitespaces before they get saved into a dictionary. Valid message with trailer: @@ -338,20 +338,29 @@ def trailers(self) -> Dict: another information key1: value1 - key2: value2 + key2 : value 2 with inner spaces + + dictionary will look like this: + .. code-block:: + + { + "key1": "value1", + "key2": "value 2 with inner spaces" + } :return: Dictionary containing whitespace stripped trailer information + """ - d: Dict[str, str] = {} - match = re.search(r".+^\s*$\n([\w\n\s:]+?)\s*\Z", str(self.message), re.MULTILINE | re.DOTALL) - if match is None: - return d - last_paragraph = match.group(1) - if not all(':' in line for line in last_paragraph.split('\n')): - return d - for line in last_paragraph.split('\n'): - key, value = line.split(':', 1) - d[key.strip()] = value.strip() + d = {} + cmd = ['git', 'interpret-trailers', '--parse'] + proc: Git.AutoInterrupt = self.repo.git.execute(cmd, as_process=True, istream=PIPE) # type: ignore + trailer: str = proc.communicate(str(self.message).encode())[0].decode() + if trailer.endswith('\n'): + trailer = trailer[0:-1] + if trailer != '': + for line in trailer.split('\n'): + key, value = line.split(':', 1) + d[key.strip()] = value.strip() return d @ classmethod diff --git a/test/test_commit.py b/test/test_commit.py index 5aeef2e6c..40cf7dd26 100644 --- a/test/test_commit.py +++ b/test/test_commit.py @@ -435,14 +435,14 @@ def test_trailers(self): KEY_1 = "Hello" VALUE_1 = "World" KEY_2 = "Key" - VALUE_2 = "Value" + VALUE_2 = "Value with inner spaces" # Check if KEY 1 & 2 with Value 1 & 2 is extracted from multiple msg variations msgs = [] msgs.append(f"Subject\n\n{KEY_1}: {VALUE_1}\n{KEY_2}: {VALUE_2}\n") msgs.append(f"Subject\n \nSome body of a function\n \n{KEY_1}: {VALUE_1}\n{KEY_2}: {VALUE_2}\n") msgs.append(f"Subject\n \nSome body of a function\n\nnon-key: non-value\n\n{KEY_1}: {VALUE_1}\n{KEY_2}: {VALUE_2}\n") - msgs.append(f"Subject\n \nSome multiline\n body of a function\n\nnon-key: non-value\n\n{KEY_1}: {VALUE_1}\n{KEY_2}: {VALUE_2}\n") + msgs.append(f"Subject\n \nSome multiline\n body of a function\n\nnon-key: non-value\n\n{KEY_1}: {VALUE_1}\n{KEY_2} : {VALUE_2}\n") for msg in msgs: commit = self.rorepo.commit('master')