From ce8a69a4141d2149bac2cbf56ea7d4b1f2ed7257 Mon Sep 17 00:00:00 2001
From: Jonas Scharpf <jonas.scharpf@checkmk.com>
Date: Wed, 17 Jul 2024 11:01:09 +0200
Subject: [PATCH] Add type of change to files_dict of a commit

This allows to not only get the total, inserted or deleted number of
lines being changed but also the type of change like
Added (A), Copied (C), Deleted (D), Modified (M), Renamed (R),
type changed (T), Unmerged (U), Unknown (X), or pairing Broken (B)
---
 AUTHORS                    |  1 +
 git/objects/commit.py      | 24 +++++++++++++++++-------
 git/types.py               |  1 +
 git/util.py                |  4 +++-
 test/fixtures/diff_numstat |  5 +++--
 test/test_commit.py        |  9 ++++++---
 test/test_stats.py         | 12 +++++++++---
 7 files changed, 40 insertions(+), 16 deletions(-)

diff --git a/AUTHORS b/AUTHORS
index 9311b3962..45b14c961 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -54,5 +54,6 @@ Contributors are:
 -Wenhan Zhu <wzhu.cosmos _at_ gmail.com>
 -Eliah Kagan <eliah.kagan _at_ gmail.com>
 -Ethan Lin <et.repositories _at_ gmail.com>
+-Jonas Scharpf <jonas.scharpf _at_ checkmk.com>
 
 Portions derived from other open source works and are clearly marked.
diff --git a/git/objects/commit.py b/git/objects/commit.py
index d957c9051..0ceb46609 100644
--- a/git/objects/commit.py
+++ b/git/objects/commit.py
@@ -377,15 +377,25 @@ def stats(self) -> Stats:
         :return:
             :class:`Stats`
         """
-        if not self.parents:
-            text = self.repo.git.diff_tree(self.hexsha, "--", numstat=True, no_renames=True, root=True)
-            text2 = ""
-            for line in text.splitlines()[1:]:
+
+        def process_lines(lines: List[str]) -> str:
+            text = ""
+            for file_info, line in zip(lines, lines[len(lines) // 2 :]):
+                change_type = file_info.split("\t")[0][-1]
                 (insertions, deletions, filename) = line.split("\t")
-                text2 += "%s\t%s\t%s\n" % (insertions, deletions, filename)
-            text = text2
+                text += "%s\t%s\t%s\t%s\n" % (change_type, insertions, deletions, filename)
+            return text
+
+        if not self.parents:
+            lines = self.repo.git.diff_tree(
+                self.hexsha, "--", numstat=True, no_renames=True, root=True, raw=True
+            ).splitlines()[1:]
+            text = process_lines(lines)
         else:
-            text = self.repo.git.diff(self.parents[0].hexsha, self.hexsha, "--", numstat=True, no_renames=True)
+            lines = self.repo.git.diff(
+                self.parents[0].hexsha, self.hexsha, "--", numstat=True, no_renames=True, raw=True
+            ).splitlines()
+            text = process_lines(lines)
         return Stats._list_from_string(self.repo, text)
 
     @property
diff --git a/git/types.py b/git/types.py
index 584450146..cce184530 100644
--- a/git/types.py
+++ b/git/types.py
@@ -248,6 +248,7 @@ class Files_TD(TypedDict):
     insertions: int
     deletions: int
     lines: int
+    change_type: str
 
 
 class Total_TD(TypedDict):
diff --git a/git/util.py b/git/util.py
index 11f963e02..9e8ac821d 100644
--- a/git/util.py
+++ b/git/util.py
@@ -910,6 +910,7 @@ class Stats:
       deletions = number of deleted lines as int
       insertions = number of inserted lines as int
       lines = total number of lines changed as int, or deletions + insertions
+      change_type = type of change as str, A|C|D|M|R|T|U|X|B
 
     ``full-stat-dict``
 
@@ -938,7 +939,7 @@ def _list_from_string(cls, repo: "Repo", text: str) -> "Stats":
             "files": {},
         }
         for line in text.splitlines():
-            (raw_insertions, raw_deletions, filename) = line.split("\t")
+            (change_type, raw_insertions, raw_deletions, filename) = line.split("\t")
             insertions = raw_insertions != "-" and int(raw_insertions) or 0
             deletions = raw_deletions != "-" and int(raw_deletions) or 0
             hsh["total"]["insertions"] += insertions
@@ -949,6 +950,7 @@ def _list_from_string(cls, repo: "Repo", text: str) -> "Stats":
                 "insertions": insertions,
                 "deletions": deletions,
                 "lines": insertions + deletions,
+                "change_type": change_type,
             }
             hsh["files"][filename.strip()] = files_dict
         return Stats(hsh["total"], hsh["files"])
diff --git a/test/fixtures/diff_numstat b/test/fixtures/diff_numstat
index 44c6ca2d5..b76e467eb 100644
--- a/test/fixtures/diff_numstat
+++ b/test/fixtures/diff_numstat
@@ -1,2 +1,3 @@
-29	18	a.txt
-0	5	b.txt
+M	29	18	a.txt
+M	0	5	b.txt
+A	7	0	c.txt
\ No newline at end of file
diff --git a/test/test_commit.py b/test/test_commit.py
index 5832258de..37c66e3e7 100644
--- a/test/test_commit.py
+++ b/test/test_commit.py
@@ -135,9 +135,12 @@ def test_stats(self):
         commit = self.rorepo.commit("33ebe7acec14b25c5f84f35a664803fcab2f7781")
         stats = commit.stats
 
-        def check_entries(d):
+        def check_entries(d, has_change_type=False):
             assert isinstance(d, dict)
-            for key in ("insertions", "deletions", "lines"):
+            keys = ("insertions", "deletions", "lines")
+            if has_change_type:
+                keys += ("change_type",)
+            for key in keys:
                 assert key in d
 
         # END assertion helper
@@ -148,7 +151,7 @@ def check_entries(d):
         assert "files" in stats.total
 
         for _filepath, d in stats.files.items():
-            check_entries(d)
+            check_entries(d, True)
         # END for each stated file
 
         # Check that data is parsed properly.
diff --git a/test/test_stats.py b/test/test_stats.py
index eec73c802..91d2cf6ae 100644
--- a/test/test_stats.py
+++ b/test/test_stats.py
@@ -14,13 +14,19 @@ def test_list_from_string(self):
         output = fixture("diff_numstat").decode(defenc)
         stats = Stats._list_from_string(self.rorepo, output)
 
-        self.assertEqual(2, stats.total["files"])
-        self.assertEqual(52, stats.total["lines"])
-        self.assertEqual(29, stats.total["insertions"])
+        self.assertEqual(3, stats.total["files"])
+        self.assertEqual(59, stats.total["lines"])
+        self.assertEqual(36, stats.total["insertions"])
         self.assertEqual(23, stats.total["deletions"])
 
         self.assertEqual(29, stats.files["a.txt"]["insertions"])
         self.assertEqual(18, stats.files["a.txt"]["deletions"])
+        self.assertEqual("M", stats.files["a.txt"]["change_type"])
 
         self.assertEqual(0, stats.files["b.txt"]["insertions"])
         self.assertEqual(5, stats.files["b.txt"]["deletions"])
+        self.assertEqual("M", stats.files["b.txt"]["change_type"])
+
+        self.assertEqual(7, stats.files["c.txt"]["insertions"])
+        self.assertEqual(0, stats.files["c.txt"]["deletions"])
+        self.assertEqual("A", stats.files["c.txt"]["change_type"])