From c84e643c6aa177f364ebe28e4c7bab1e37fb0242 Mon Sep 17 00:00:00 2001
From: David Lakin <github@themoderndev.com>
Date: Sun, 28 Apr 2024 22:41:11 -0400
Subject: [PATCH] Replace the suboptimal fuzz_tree harness with a better
 alternative

As discussed in the initial fuzzing integration PR[^1], `fuzz_tree.py`'s
implementation was not ideal in terms of coverage and its reading/writing to
hard-coded paths inside `/tmp` was problematic as (among other concerns), it
causes intermittent crashes on ClusterFuzz[^2] when multiple workers execute
the test at the same time on the same machine.

The changes here replace `fuzz_tree.py` completely with a completely new
`fuzz_repo.py` fuzz target which:

- Uses `tempfile.TemporaryDirectory()` to safely manage tmpdir creation and
  tear down, including during multi-worker execution runs.
- Retains the same feature coverage as `fuzz_tree.py`, but it also adds
  considerably more from much smaller data inputs and with less memory consumed
  (and it doesn't even have a seed corpus or target specific dictionary yet.)
- Can likely be improved further in the future by exercising additional features
  of `Repo` to the harness.

Because `fuzz_tree.py` was removed and `fuzz_repo.py` was not derived from it,
the Apache License call outs in the docs were also updated as they only apply to
the singe `fuzz_config.py` file now.

[^1]: https://github.com/gitpython-developers/GitPython/pull/1901#discussion_r1565001609
[^2]: https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=68355
---
 README.md                           |  4 +-
 fuzzing/README.md                   | 16 +++----
 fuzzing/dictionaries/fuzz_tree.dict | 13 ------
 fuzzing/fuzz-targets/fuzz_repo.py   | 47 ++++++++++++++++++++
 fuzzing/fuzz-targets/fuzz_tree.py   | 67 -----------------------------
 5 files changed, 57 insertions(+), 90 deletions(-)
 delete mode 100644 fuzzing/dictionaries/fuzz_tree.dict
 create mode 100644 fuzzing/fuzz-targets/fuzz_repo.py
 delete mode 100644 fuzzing/fuzz-targets/fuzz_tree.py

diff --git a/README.md b/README.md
index 987e40e6c..d365a6584 100644
--- a/README.md
+++ b/README.md
@@ -240,8 +240,8 @@ Please have a look at the [contributions file][contributing].
 
 [3-Clause BSD License](https://opensource.org/license/bsd-3-clause/), also known as the New BSD License. See the [LICENSE file][license].
 
-Two files exclusively used for fuzz testing are subject to [a separate license, detailed here](./fuzzing/README.md#license).
-These files are not included in the wheel or sdist packages published by the maintainers of GitPython.
+One file exclusively used for fuzz testing is subject to [a separate license, detailed here](./fuzzing/README.md#license).
+This file is not included in the wheel or sdist packages published by the maintainers of GitPython.
 
 [contributing]: https://github.com/gitpython-developers/GitPython/blob/main/CONTRIBUTING.md
 [license]: https://github.com/gitpython-developers/GitPython/blob/main/LICENSE
diff --git a/fuzzing/README.md b/fuzzing/README.md
index 0a62b4c85..9d02bf72f 100644
--- a/fuzzing/README.md
+++ b/fuzzing/README.md
@@ -225,14 +225,14 @@ to [the official OSS-Fuzz documentation][oss-fuzz-docs].
 ## LICENSE
 
 All files located within the `fuzzing/` directory are subject to [the same license](../LICENSE)
-as [the other files in this repository](../README.md#license) with two exceptions:
-
-Two files located in this directory, [`fuzz_config.py`](./fuzz-targets/fuzz_config.py)
-and [`fuzz_tree.py`](./fuzz-targets/fuzz_tree.py), have been migrated here from the OSS-Fuzz project repository where
-they were originally created. As such, these two files retain their original license and copyright notice (Apache
-License, Version 2.0 and Copyright 2023 Google LLC respectively.) Each file includes a notice in their respective header
-comments stating that they have been modified. [LICENSE-APACHE](./LICENSE-APACHE) contains the original license used by
-the OSS-Fuzz project repository at the time they were migrated.
+as [the other files in this repository](../README.md#license) with one exception:
+
+[`fuzz_config.py`](./fuzz-targets/fuzz_config.py) was migrated to this repository from the OSS-Fuzz project's repository
+where it was originally created. As such, [`fuzz_config.py`](./fuzz-targets/fuzz_config.py) retains its original license
+and copyright notice (Apache License, Version 2.0 and Copyright 2023 Google LLC respectively) as in a header
+comment, followed by a notice stating that it has have been modified contributors to GitPython.
+[LICENSE-APACHE](./LICENSE-APACHE) contains the original license used by the OSS-Fuzz project repository at the time the
+file was migrated.
 
 [oss-fuzz-repo]: https://github.com/google/oss-fuzz
 
diff --git a/fuzzing/dictionaries/fuzz_tree.dict b/fuzzing/dictionaries/fuzz_tree.dict
deleted file mode 100644
index 3ebe52b7f..000000000
--- a/fuzzing/dictionaries/fuzz_tree.dict
+++ /dev/null
@@ -1,13 +0,0 @@
-"\\001\\000\\000\\000"
-"_join_multiline_va"
-"setdef"
-"1\\000\\000\\000\\000\\000\\000\\000"
-"\\000\\000\\000\\000\\000\\000\\000\\020"
-"\\377\\377\\377\\377\\377\\377\\377r"
-"\\001\\000\\000\\000\\000\\000\\000\\001"
-"\\000\\000\\000\\000\\000\\000\\000\\014"
-"\\000\\000\\000\\000\\000\\000\\000\\003"
-"\\001\\000"
-"\\032\\000\\000\\000\\000\\000\\000\\000"
-"-\\000\\000\\000\\000\\000\\000\\000"
-"__format"
diff --git a/fuzzing/fuzz-targets/fuzz_repo.py b/fuzzing/fuzz-targets/fuzz_repo.py
new file mode 100644
index 000000000..7bd82c120
--- /dev/null
+++ b/fuzzing/fuzz-targets/fuzz_repo.py
@@ -0,0 +1,47 @@
+import atheris
+import io
+import sys
+import os
+import tempfile
+
+if getattr(sys, "frozen", False) and hasattr(sys, "_MEIPASS"):
+    path_to_bundled_git_binary = os.path.abspath(os.path.join(os.path.dirname(__file__), "git"))
+    os.environ["GIT_PYTHON_GIT_EXECUTABLE"] = path_to_bundled_git_binary
+
+with atheris.instrument_imports():
+    import git
+
+
+def TestOneInput(data):
+    fdp = atheris.FuzzedDataProvider(data)
+
+    with tempfile.TemporaryDirectory() as temp_dir:
+        repo = git.Repo.init(path=temp_dir)
+
+        # Generate a minimal set of files based on fuzz data to minimize I/O operations.
+        file_paths = [os.path.join(temp_dir, f"File{i}") for i in range(min(3, fdp.ConsumeIntInRange(1, 3)))]
+        for file_path in file_paths:
+            with open(file_path, "wb") as f:
+                # The chosen upperbound for count of bytes we consume by writing to these
+                # files is somewhat arbitrary and may be worth experimenting with if the
+                # fuzzer coverage plateaus.
+                f.write(fdp.ConsumeBytes(fdp.ConsumeIntInRange(1, 512)))
+
+        repo.index.add(file_paths)
+        repo.index.commit(fdp.ConsumeUnicodeNoSurrogates(fdp.ConsumeIntInRange(1, 80)))
+
+        fuzz_tree = git.Tree(repo, git.Tree.NULL_BIN_SHA, 0, "")
+
+        try:
+            fuzz_tree._deserialize(io.BytesIO(data))
+        except IndexError:
+            return -1
+
+
+def main():
+    atheris.Setup(sys.argv, TestOneInput)
+    atheris.Fuzz()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/fuzzing/fuzz-targets/fuzz_tree.py b/fuzzing/fuzz-targets/fuzz_tree.py
deleted file mode 100644
index 4e2038add..000000000
--- a/fuzzing/fuzz-targets/fuzz_tree.py
+++ /dev/null
@@ -1,67 +0,0 @@
-# Copyright 2023 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-###############################################################################
-# Note: This file has been modified by contributors to GitPython.
-# The original state of this file may be referenced here:
-# https://github.com/google/oss-fuzz/commit/f26f254558fc48f3c9bc130b10507386b94522da
-###############################################################################
-import atheris
-import io
-import sys
-import os
-import shutil
-
-if getattr(sys, "frozen", False) and hasattr(sys, "_MEIPASS"):
-    path_to_bundled_git_binary = os.path.abspath(os.path.join(os.path.dirname(__file__), "git"))
-    os.environ["GIT_PYTHON_GIT_EXECUTABLE"] = path_to_bundled_git_binary
-
-with atheris.instrument_imports():
-    import git
-
-
-def TestOneInput(data):
-    fdp = atheris.FuzzedDataProvider(data)
-    git_dir = "/tmp/.git"
-    head_file = os.path.join(git_dir, "HEAD")
-    refs_dir = os.path.join(git_dir, "refs")
-    common_dir = os.path.join(git_dir, "commondir")
-    objects_dir = os.path.join(git_dir, "objects")
-
-    if os.path.isdir(git_dir):
-        shutil.rmtree(git_dir)
-
-    os.mkdir(git_dir)
-    with open(head_file, "w") as f:
-        f.write(fdp.ConsumeUnicodeNoSurrogates(1024))
-    os.mkdir(refs_dir)
-    os.mkdir(common_dir)
-    os.mkdir(objects_dir)
-
-    _repo = git.Repo("/tmp/")
-
-    fuzz_tree = git.Tree(_repo, git.Tree.NULL_BIN_SHA, 0, "")
-    try:
-        fuzz_tree._deserialize(io.BytesIO(data))
-    except IndexError:
-        return -1
-
-
-def main():
-    atheris.Setup(sys.argv, TestOneInput)
-    atheris.Fuzz()
-
-
-if __name__ == "__main__":
-    main()