From 3a4fc6abfb3b39237f557372262ac79f45b6a9fa Mon Sep 17 00:00:00 2001 From: Michael Mercier Date: Thu, 11 Mar 2021 18:46:34 +0100 Subject: [PATCH 1/8] Replace password in URI by stars if present + test --- git/repo/base.py | 8 +++++++- test/test_repo.py | 11 +++++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/git/repo/base.py b/git/repo/base.py index 8f1ef0a6e..44e3f8599 100644 --- a/git/repo/base.py +++ b/git/repo/base.py @@ -969,7 +969,13 @@ def _clone(cls, git, url, path, odb_default_type, progress, multi_options=None, handle_process_output(proc, None, progress.new_message_handler(), finalize_process, decode_streams=False) else: (stdout, stderr) = proc.communicate() - log.debug("Cmd(%s)'s unused stdout: %s", getattr(proc, 'args', ''), stdout) + cmdline = getattr(proc, 'args', '') + uri = cmdline[-2] + if "://" in uri and "@" in uri: + cred = uri.split("://")[1].split("@")[0].split(":") + if len(cred) == 2: + cmdline[-2] = uri.replace(cred[1], "******") + log.debug("Cmd(%s)'s unused stdout: %s", cmdline, stdout) finalize_process(proc, stderr=stderr) # our git command could have a different working dir than our actual diff --git a/test/test_repo.py b/test/test_repo.py index d5ea8664a..30e4f2cbd 100644 --- a/test/test_repo.py +++ b/test/test_repo.py @@ -238,6 +238,17 @@ def test_clone_from_with_path_contains_unicode(self): except UnicodeEncodeError: self.fail('Raised UnicodeEncodeError') + @with_rw_directory + def test_leaking_password_in_clone_logs(self, rw_dir): + """Check that the password is not printed on the logs""" + password = "fakepassword1234" + try: + Repo.clone_from( + url=f"https://fakeuser:{password}@fakerepo.example.com/testrepo", + to_path=rw_dir) + except GitCommandError as err: + assert password not in str(err) + @with_rw_repo('HEAD') def test_max_chunk_size(self, repo): class TestOutputStream(TestBase): From 1d43a751e578e859e03350f198bca77244ba53b5 Mon Sep 17 00:00:00 2001 From: Michael Mercier Date: Fri, 12 Mar 2021 08:42:15 +0100 Subject: [PATCH 2/8] Use format instead of f-string --- test/test_repo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_repo.py b/test/test_repo.py index 30e4f2cbd..65e69c520 100644 --- a/test/test_repo.py +++ b/test/test_repo.py @@ -244,7 +244,7 @@ def test_leaking_password_in_clone_logs(self, rw_dir): password = "fakepassword1234" try: Repo.clone_from( - url=f"https://fakeuser:{password}@fakerepo.example.com/testrepo", + url="https://fakeuser:{}@fakerepo.example.com/testrepo".format(password), to_path=rw_dir) except GitCommandError as err: assert password not in str(err) From b650c4f28bda658d1f3471882520698ef7fb3af6 Mon Sep 17 00:00:00 2001 From: Michael Mercier Date: Fri, 12 Mar 2021 09:05:39 +0100 Subject: [PATCH 3/8] Better assert message --- test/test_repo.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/test/test_repo.py b/test/test_repo.py index 65e69c520..ac4c6660f 100644 --- a/test/test_repo.py +++ b/test/test_repo.py @@ -244,10 +244,11 @@ def test_leaking_password_in_clone_logs(self, rw_dir): password = "fakepassword1234" try: Repo.clone_from( - url="https://fakeuser:{}@fakerepo.example.com/testrepo".format(password), + url="https://fakeuser:{}@fakerepo.example.com/testrepo".format( + password), to_path=rw_dir) except GitCommandError as err: - assert password not in str(err) + assert password not in str(err), "The error message '%s' should not contain the password" % err @with_rw_repo('HEAD') def test_max_chunk_size(self, repo): From f7180d50f785ec28963e12e647d269650ad89b31 Mon Sep 17 00:00:00 2001 From: Michael Mercier Date: Mon, 15 Mar 2021 09:51:14 +0100 Subject: [PATCH 4/8] Use urllib instead of custom parsing --- git/repo/base.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/git/repo/base.py b/git/repo/base.py index 44e3f8599..9cb84054e 100644 --- a/git/repo/base.py +++ b/git/repo/base.py @@ -9,6 +9,7 @@ import os import re import warnings +from urllib.parse import urlsplit, urlunsplit from git.cmd import ( Git, @@ -971,10 +972,15 @@ def _clone(cls, git, url, path, odb_default_type, progress, multi_options=None, (stdout, stderr) = proc.communicate() cmdline = getattr(proc, 'args', '') uri = cmdline[-2] - if "://" in uri and "@" in uri: - cred = uri.split("://")[1].split("@")[0].split(":") - if len(cred) == 2: - cmdline[-2] = uri.replace(cred[1], "******") + try: + url = urlsplit(uri) + # Remove password from the URL if present + if url.password: + edited_url = url._replace( + netloc=url.netloc.replace(url.password, "****")) + cmdline[-2] = urlunsplit(edited_url) + except ValueError: + log.debug("Unable to parse the URL %s", url) log.debug("Cmd(%s)'s unused stdout: %s", cmdline, stdout) finalize_process(proc, stderr=stderr) From f7968d136276607115907267b3be89c3ff9acd03 Mon Sep 17 00:00:00 2001 From: Michael Mercier Date: Mon, 15 Mar 2021 17:54:48 +0100 Subject: [PATCH 5/8] Put remove password in the utils and use it also in cmd.execute --- git/cmd.py | 6 ++++-- git/repo/base.py | 15 +++------------ git/util.py | 28 ++++++++++++++++++++++++++++ 3 files changed, 35 insertions(+), 14 deletions(-) diff --git a/git/cmd.py b/git/cmd.py index 050efaedf..222a2408a 100644 --- a/git/cmd.py +++ b/git/cmd.py @@ -28,7 +28,7 @@ is_win, ) from git.exc import CommandError -from git.util import is_cygwin_git, cygpath, expand_path +from git.util import is_cygwin_git, cygpath, expand_path, remove_password_if_present from .exc import ( GitCommandError, @@ -682,8 +682,10 @@ def execute(self, command, :note: If you add additional keyword arguments to the signature of this method, you must update the execute_kwargs tuple housed in this module.""" + # Remove password for the command if present + redacted_command = remove_password_if_present(command) if self.GIT_PYTHON_TRACE and (self.GIT_PYTHON_TRACE != 'full' or as_process): - log.info(' '.join(command)) + log.info(' '.join(redacted_command)) # Allow the user to have the command executed in their working dir. cwd = self._working_dir or os.getcwd() diff --git a/git/repo/base.py b/git/repo/base.py index 9cb84054e..e68bc0779 100644 --- a/git/repo/base.py +++ b/git/repo/base.py @@ -9,7 +9,6 @@ import os import re import warnings -from urllib.parse import urlsplit, urlunsplit from git.cmd import ( Git, @@ -27,7 +26,7 @@ from git.objects import Submodule, RootModule, Commit from git.refs import HEAD, Head, Reference, TagReference from git.remote import Remote, add_progress, to_progress_instance -from git.util import Actor, finalize_process, decygpath, hex_to_bin, expand_path +from git.util import Actor, finalize_process, decygpath, hex_to_bin, expand_path, remove_password_if_present import os.path as osp from .fun import rev_parse, is_git_dir, find_submodule_git_dir, touch, find_worktree_git_dir @@ -971,16 +970,8 @@ def _clone(cls, git, url, path, odb_default_type, progress, multi_options=None, else: (stdout, stderr) = proc.communicate() cmdline = getattr(proc, 'args', '') - uri = cmdline[-2] - try: - url = urlsplit(uri) - # Remove password from the URL if present - if url.password: - edited_url = url._replace( - netloc=url.netloc.replace(url.password, "****")) - cmdline[-2] = urlunsplit(edited_url) - except ValueError: - log.debug("Unable to parse the URL %s", url) + cmdline = remove_password_if_present(cmdline) + log.debug("Cmd(%s)'s unused stdout: %s", cmdline, stdout) finalize_process(proc, stderr=stderr) diff --git a/git/util.py b/git/util.py index 04c967891..e9d183d9e 100644 --- a/git/util.py +++ b/git/util.py @@ -16,6 +16,7 @@ from sys import maxsize import time from unittest import SkipTest +from urllib.parse import urlsplit, urlunsplit from gitdb.util import (# NOQA @IgnorePep8 make_sha, @@ -338,6 +339,33 @@ def expand_path(p, expand_vars=True): except Exception: return None + +def remove_password_if_present(cmdline): + """ + Parse any command line argument and if on of the element is an URL with a + password, replace it by stars. If nothing found just returns a copy of the + command line as-is. + + This should be used for every log line that print a command line. + """ + redacted_cmdline = [] + for to_parse in cmdline: + try: + url = urlsplit(to_parse) + # Remove password from the URL if present + if url.password is None: + raise ValueError() + + edited_url = url._replace( + netloc=url.netloc.replace(url.password, "*****")) + redacted_cmdline.append(urlunsplit(edited_url)) + except ValueError: + redacted_cmdline.append(to_parse) + # This is not a valid URL + pass + return redacted_cmdline + + #} END utilities #{ Classes From 50cbafc690e5692a16148dbde9de680be70ddbd1 Mon Sep 17 00:00:00 2001 From: Michael Mercier Date: Mon, 15 Mar 2021 18:39:26 +0100 Subject: [PATCH 6/8] Add more test and remove password also from error logs --- git/cmd.py | 4 ++-- git/util.py | 13 ++++++------- test/test_util.py | 17 ++++++++++++++++- 3 files changed, 24 insertions(+), 10 deletions(-) diff --git a/git/cmd.py b/git/cmd.py index 222a2408a..c571c4862 100644 --- a/git/cmd.py +++ b/git/cmd.py @@ -82,8 +82,8 @@ def pump_stream(cmdline, name, stream, is_decode, handler): line = line.decode(defenc) handler(line) except Exception as ex: - log.error("Pumping %r of cmd(%s) failed due to: %r", name, cmdline, ex) - raise CommandError(['<%s-pump>' % name] + cmdline, ex) from ex + log.error("Pumping %r of cmd(%s) failed due to: %r", name, remove_password_if_present(cmdline), ex) + raise CommandError(['<%s-pump>' % name] + remove_password_if_present(cmdline), ex) from ex finally: stream.close() diff --git a/git/util.py b/git/util.py index e9d183d9e..80985df49 100644 --- a/git/util.py +++ b/git/util.py @@ -343,13 +343,13 @@ def expand_path(p, expand_vars=True): def remove_password_if_present(cmdline): """ Parse any command line argument and if on of the element is an URL with a - password, replace it by stars. If nothing found just returns a copy of the - command line as-is. + password, replace it by stars (in-place). + + If nothing found just returns the command line as-is. This should be used for every log line that print a command line. """ - redacted_cmdline = [] - for to_parse in cmdline: + for index, to_parse in enumerate(cmdline): try: url = urlsplit(to_parse) # Remove password from the URL if present @@ -358,12 +358,11 @@ def remove_password_if_present(cmdline): edited_url = url._replace( netloc=url.netloc.replace(url.password, "*****")) - redacted_cmdline.append(urlunsplit(edited_url)) + cmdline[index] = urlunsplit(edited_url) except ValueError: - redacted_cmdline.append(to_parse) # This is not a valid URL pass - return redacted_cmdline + return cmdline #} END utilities diff --git a/test/test_util.py b/test/test_util.py index 5eba6c500..a49632647 100644 --- a/test/test_util.py +++ b/test/test_util.py @@ -30,7 +30,8 @@ Actor, IterableList, cygpath, - decygpath + decygpath, + remove_password_if_present, ) @@ -322,3 +323,17 @@ def test_pickle_tzoffset(self): t2 = pickle.loads(pickle.dumps(t1)) self.assertEqual(t1._offset, t2._offset) self.assertEqual(t1._name, t2._name) + + def test_remove_password_from_command_line(self): + """Check that the password is not printed on the logs""" + password = "fakepassword1234" + url_with_pass = "https://fakeuser:{}@fakerepo.example.com/testrepo".format(password) + url_without_pass = "https://fakerepo.example.com/testrepo" + + cmd_1 = ["git", "clone", "-v", url_with_pass] + cmd_2 = ["git", "clone", "-v", url_without_pass] + cmd_3 = ["no", "url", "in", "this", "one"] + + assert password not in remove_password_if_present(cmd_1) + assert cmd_2 == remove_password_if_present(cmd_2) + assert cmd_3 == remove_password_if_present(cmd_3) From ffddedf5467df993b7a42fbd15afacb901bca6d7 Mon Sep 17 00:00:00 2001 From: Michael Mercier Date: Tue, 16 Mar 2021 10:00:51 +0100 Subject: [PATCH 7/8] Use copy and not inplace remove password + working case test --- git/cmd.py | 16 ++++++++-------- git/util.py | 6 ++++-- test/test_repo.py | 5 ++++- test/test_util.py | 7 +++++-- 4 files changed, 21 insertions(+), 13 deletions(-) diff --git a/git/cmd.py b/git/cmd.py index c571c4862..796066072 100644 --- a/git/cmd.py +++ b/git/cmd.py @@ -405,7 +405,7 @@ def read_all_from_possibly_closed_stream(stream): if status != 0: errstr = read_all_from_possibly_closed_stream(self.proc.stderr) log.debug('AutoInterrupt wait stderr: %r' % (errstr,)) - raise GitCommandError(self.args, status, errstr) + raise GitCommandError(remove_password_if_present(self.args), status, errstr) # END status handling return status # END auto interrupt @@ -638,7 +638,7 @@ def execute(self, command, :param env: A dictionary of environment variables to be passed to `subprocess.Popen`. - + :param max_chunk_size: Maximum number of bytes in one chunk of data passed to the output_stream in one invocation of write() method. If the given number is not positive then @@ -706,7 +706,7 @@ def execute(self, command, if is_win: cmd_not_found_exception = OSError if kill_after_timeout: - raise GitCommandError(command, '"kill_after_timeout" feature is not supported on Windows.') + raise GitCommandError(redacted_command, '"kill_after_timeout" feature is not supported on Windows.') else: if sys.version_info[0] > 2: cmd_not_found_exception = FileNotFoundError # NOQA # exists, flake8 unknown @UndefinedVariable @@ -721,7 +721,7 @@ def execute(self, command, if istream: istream_ok = "" log.debug("Popen(%s, cwd=%s, universal_newlines=%s, shell=%s, istream=%s)", - command, cwd, universal_newlines, shell, istream_ok) + redacted_command, cwd, universal_newlines, shell, istream_ok) try: proc = Popen(command, env=env, @@ -737,7 +737,7 @@ def execute(self, command, **subprocess_kwargs ) except cmd_not_found_exception as err: - raise GitCommandNotFound(command, err) from err + raise GitCommandNotFound(redacted_command, err) from err if as_process: return self.AutoInterrupt(proc, command) @@ -787,7 +787,7 @@ def _kill_process(pid): watchdog.cancel() if kill_check.isSet(): stderr_value = ('Timeout: the command "%s" did not complete in %d ' - 'secs.' % (" ".join(command), kill_after_timeout)) + 'secs.' % (" ".join(redacted_command), kill_after_timeout)) if not universal_newlines: stderr_value = stderr_value.encode(defenc) # strip trailing "\n" @@ -811,7 +811,7 @@ def _kill_process(pid): proc.stderr.close() if self.GIT_PYTHON_TRACE == 'full': - cmdstr = " ".join(command) + cmdstr = " ".join(redacted_command) def as_text(stdout_value): return not output_stream and safe_decode(stdout_value) or '' @@ -827,7 +827,7 @@ def as_text(stdout_value): # END handle debug printing if with_exceptions and status != 0: - raise GitCommandError(command, status, stderr_value, stdout_value) + raise GitCommandError(redacted_command, status, stderr_value, stdout_value) if isinstance(stdout_value, bytes) and stdout_as_string: # could also be output_stream stdout_value = safe_decode(stdout_value) diff --git a/git/util.py b/git/util.py index 80985df49..907c6998c 100644 --- a/git/util.py +++ b/git/util.py @@ -349,7 +349,9 @@ def remove_password_if_present(cmdline): This should be used for every log line that print a command line. """ + new_cmdline = [] for index, to_parse in enumerate(cmdline): + new_cmdline.append(to_parse) try: url = urlsplit(to_parse) # Remove password from the URL if present @@ -358,11 +360,11 @@ def remove_password_if_present(cmdline): edited_url = url._replace( netloc=url.netloc.replace(url.password, "*****")) - cmdline[index] = urlunsplit(edited_url) + new_cmdline[index] = urlunsplit(edited_url) except ValueError: # This is not a valid URL pass - return cmdline + return new_cmdline #} END utilities diff --git a/test/test_repo.py b/test/test_repo.py index ac4c6660f..8dc178337 100644 --- a/test/test_repo.py +++ b/test/test_repo.py @@ -240,7 +240,6 @@ def test_clone_from_with_path_contains_unicode(self): @with_rw_directory def test_leaking_password_in_clone_logs(self, rw_dir): - """Check that the password is not printed on the logs""" password = "fakepassword1234" try: Repo.clone_from( @@ -249,6 +248,10 @@ def test_leaking_password_in_clone_logs(self, rw_dir): to_path=rw_dir) except GitCommandError as err: assert password not in str(err), "The error message '%s' should not contain the password" % err + # Working example from a blank private project + Repo.clone_from( + url="https://gitlab+deploy-token-392045:mLWhVus7bjLsy8xj8q2V@gitlab.com/mercierm/test_git_python", + to_path=rw_dir) @with_rw_repo('HEAD') def test_max_chunk_size(self, repo): diff --git a/test/test_util.py b/test/test_util.py index a49632647..ddc5f628f 100644 --- a/test/test_util.py +++ b/test/test_util.py @@ -325,7 +325,6 @@ def test_pickle_tzoffset(self): self.assertEqual(t1._name, t2._name) def test_remove_password_from_command_line(self): - """Check that the password is not printed on the logs""" password = "fakepassword1234" url_with_pass = "https://fakeuser:{}@fakerepo.example.com/testrepo".format(password) url_without_pass = "https://fakerepo.example.com/testrepo" @@ -334,6 +333,10 @@ def test_remove_password_from_command_line(self): cmd_2 = ["git", "clone", "-v", url_without_pass] cmd_3 = ["no", "url", "in", "this", "one"] - assert password not in remove_password_if_present(cmd_1) + redacted_cmd_1 = remove_password_if_present(cmd_1) + assert password not in " ".join(redacted_cmd_1) + # Check that we use a copy + assert cmd_1 is not redacted_cmd_1 + assert password in " ".join(cmd_1) assert cmd_2 == remove_password_if_present(cmd_2) assert cmd_3 == remove_password_if_present(cmd_3) From d283c83c43f5e52a1a14e55b35ffe85a780615d8 Mon Sep 17 00:00:00 2001 From: Michael Mercier Date: Thu, 18 Mar 2021 17:01:43 +0100 Subject: [PATCH 8/8] Use continue instead of raising error --- git/util.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/git/util.py b/git/util.py index 907c6998c..d0aec5ae9 100644 --- a/git/util.py +++ b/git/util.py @@ -356,14 +356,14 @@ def remove_password_if_present(cmdline): url = urlsplit(to_parse) # Remove password from the URL if present if url.password is None: - raise ValueError() + continue edited_url = url._replace( netloc=url.netloc.replace(url.password, "*****")) new_cmdline[index] = urlunsplit(edited_url) except ValueError: # This is not a valid URL - pass + continue return new_cmdline