Skip to content

Replace password in URI by stars if present to avoid leaking secrets in logs #1198

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
26 changes: 14 additions & 12 deletions git/cmd.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
is_win,
)
from git.exc import CommandError
from git.util import is_cygwin_git, cygpath, expand_path
from git.util import is_cygwin_git, cygpath, expand_path, remove_password_if_present

from .exc import (
GitCommandError,
Expand Down Expand Up @@ -82,8 +82,8 @@ def pump_stream(cmdline, name, stream, is_decode, handler):
line = line.decode(defenc)
handler(line)
except Exception as ex:
log.error("Pumping %r of cmd(%s) failed due to: %r", name, cmdline, ex)
raise CommandError(['<%s-pump>' % name] + cmdline, ex) from ex
log.error("Pumping %r of cmd(%s) failed due to: %r", name, remove_password_if_present(cmdline), ex)
raise CommandError(['<%s-pump>' % name] + remove_password_if_present(cmdline), ex) from ex
finally:
stream.close()

Expand Down Expand Up @@ -405,7 +405,7 @@ def read_all_from_possibly_closed_stream(stream):
if status != 0:
errstr = read_all_from_possibly_closed_stream(self.proc.stderr)
log.debug('AutoInterrupt wait stderr: %r' % (errstr,))
raise GitCommandError(self.args, status, errstr)
raise GitCommandError(remove_password_if_present(self.args), status, errstr)
# END status handling
return status
# END auto interrupt
Expand Down Expand Up @@ -638,7 +638,7 @@ def execute(self, command,

:param env:
A dictionary of environment variables to be passed to `subprocess.Popen`.

:param max_chunk_size:
Maximum number of bytes in one chunk of data passed to the output_stream in
one invocation of write() method. If the given number is not positive then
Expand Down Expand Up @@ -682,8 +682,10 @@ def execute(self, command,
:note:
If you add additional keyword arguments to the signature of this method,
you must update the execute_kwargs tuple housed in this module."""
# Remove password for the command if present
redacted_command = remove_password_if_present(command)
if self.GIT_PYTHON_TRACE and (self.GIT_PYTHON_TRACE != 'full' or as_process):
log.info(' '.join(command))
log.info(' '.join(redacted_command))

# Allow the user to have the command executed in their working dir.
cwd = self._working_dir or os.getcwd()
Expand All @@ -704,7 +706,7 @@ def execute(self, command,
if is_win:
cmd_not_found_exception = OSError
if kill_after_timeout:
raise GitCommandError(command, '"kill_after_timeout" feature is not supported on Windows.')
raise GitCommandError(redacted_command, '"kill_after_timeout" feature is not supported on Windows.')
else:
if sys.version_info[0] > 2:
cmd_not_found_exception = FileNotFoundError # NOQA # exists, flake8 unknown @UndefinedVariable
Expand All @@ -719,7 +721,7 @@ def execute(self, command,
if istream:
istream_ok = "<valid stream>"
log.debug("Popen(%s, cwd=%s, universal_newlines=%s, shell=%s, istream=%s)",
command, cwd, universal_newlines, shell, istream_ok)
redacted_command, cwd, universal_newlines, shell, istream_ok)
try:
proc = Popen(command,
env=env,
Expand All @@ -735,7 +737,7 @@ def execute(self, command,
**subprocess_kwargs
)
except cmd_not_found_exception as err:
raise GitCommandNotFound(command, err) from err
raise GitCommandNotFound(redacted_command, err) from err

if as_process:
return self.AutoInterrupt(proc, command)
Expand Down Expand Up @@ -785,7 +787,7 @@ def _kill_process(pid):
watchdog.cancel()
if kill_check.isSet():
stderr_value = ('Timeout: the command "%s" did not complete in %d '
'secs.' % (" ".join(command), kill_after_timeout))
'secs.' % (" ".join(redacted_command), kill_after_timeout))
if not universal_newlines:
stderr_value = stderr_value.encode(defenc)
# strip trailing "\n"
Expand All @@ -809,7 +811,7 @@ def _kill_process(pid):
proc.stderr.close()

if self.GIT_PYTHON_TRACE == 'full':
cmdstr = " ".join(command)
cmdstr = " ".join(redacted_command)

def as_text(stdout_value):
return not output_stream and safe_decode(stdout_value) or '<OUTPUT_STREAM>'
Expand All @@ -825,7 +827,7 @@ def as_text(stdout_value):
# END handle debug printing

if with_exceptions and status != 0:
raise GitCommandError(command, status, stderr_value, stdout_value)
raise GitCommandError(redacted_command, status, stderr_value, stdout_value)

if isinstance(stdout_value, bytes) and stdout_as_string: # could also be output_stream
stdout_value = safe_decode(stdout_value)
Expand Down
7 changes: 5 additions & 2 deletions git/repo/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
from git.objects import Submodule, RootModule, Commit
from git.refs import HEAD, Head, Reference, TagReference
from git.remote import Remote, add_progress, to_progress_instance
from git.util import Actor, finalize_process, decygpath, hex_to_bin, expand_path
from git.util import Actor, finalize_process, decygpath, hex_to_bin, expand_path, remove_password_if_present
import os.path as osp

from .fun import rev_parse, is_git_dir, find_submodule_git_dir, touch, find_worktree_git_dir
Expand Down Expand Up @@ -969,7 +969,10 @@ def _clone(cls, git, url, path, odb_default_type, progress, multi_options=None,
handle_process_output(proc, None, progress.new_message_handler(), finalize_process, decode_streams=False)
else:
(stdout, stderr) = proc.communicate()
log.debug("Cmd(%s)'s unused stdout: %s", getattr(proc, 'args', ''), stdout)
cmdline = getattr(proc, 'args', '')
cmdline = remove_password_if_present(cmdline)

log.debug("Cmd(%s)'s unused stdout: %s", cmdline, stdout)
finalize_process(proc, stderr=stderr)

# our git command could have a different working dir than our actual
Expand Down
29 changes: 29 additions & 0 deletions git/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from sys import maxsize
import time
from unittest import SkipTest
from urllib.parse import urlsplit, urlunsplit

from gitdb.util import (# NOQA @IgnorePep8
make_sha,
Expand Down Expand Up @@ -338,6 +339,34 @@ def expand_path(p, expand_vars=True):
except Exception:
return None


def remove_password_if_present(cmdline):
"""
Parse any command line argument and if on of the element is an URL with a
password, replace it by stars (in-place).

If nothing found just returns the command line as-is.

This should be used for every log line that print a command line.
"""
new_cmdline = []
for index, to_parse in enumerate(cmdline):
new_cmdline.append(to_parse)
try:
url = urlsplit(to_parse)
# Remove password from the URL if present
if url.password is None:
raise ValueError()

edited_url = url._replace(
netloc=url.netloc.replace(url.password, "*****"))
new_cmdline[index] = urlunsplit(edited_url)
except ValueError:
# This is not a valid URL
pass
return new_cmdline


#} END utilities

#{ Classes
Expand Down
15 changes: 15 additions & 0 deletions test/test_repo.py
Original file line number Diff line number Diff line change
Expand Up @@ -238,6 +238,21 @@ def test_clone_from_with_path_contains_unicode(self):
except UnicodeEncodeError:
self.fail('Raised UnicodeEncodeError')

@with_rw_directory
def test_leaking_password_in_clone_logs(self, rw_dir):
password = "fakepassword1234"
try:
Repo.clone_from(
url="https://fakeuser:{}@fakerepo.example.com/testrepo".format(
password),
to_path=rw_dir)
except GitCommandError as err:
assert password not in str(err), "The error message '%s' should not contain the password" % err
# Working example from a blank private project
Repo.clone_from(
url="https://gitlab+deploy-token-392045:[email protected]/mercierm/test_git_python",
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This might be a danger as it breaks the self-isolation even more (after all, GitPythons test rely on its own repository already), but I think it's OK to go with it and fix it when it breaks.
Maybe it never does.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree! I simply did not find another way to test the working use case. Maybe we can use a deploy token with the GitPython repository on Github but I think it is not available for public repo. Maybe you can create a private repo inside the gitpython-developers account in order to keep the responsibility in the same place?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I thought about that too and will do that when it breaks. It's certainly the lazy way of doing things and it's basically a mine that is primed to blow sometime in the future, but… it's going to be okay ;).

While writing this I thought to myself that I don't want to be that lazy and took a look at the github ways of doing this. It turns out it only supports repository deploy keys, which indeed are full blown ssh keys that I don't want to deal with right now. This makes your account one of the pillars of the happiness of GitPython's CI, something not everyone can say about themselves :D.

to_path=rw_dir)

@with_rw_repo('HEAD')
def test_max_chunk_size(self, repo):
class TestOutputStream(TestBase):
Expand Down
20 changes: 19 additions & 1 deletion test/test_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,8 @@
Actor,
IterableList,
cygpath,
decygpath
decygpath,
remove_password_if_present,
)


Expand Down Expand Up @@ -322,3 +323,20 @@ def test_pickle_tzoffset(self):
t2 = pickle.loads(pickle.dumps(t1))
self.assertEqual(t1._offset, t2._offset)
self.assertEqual(t1._name, t2._name)

def test_remove_password_from_command_line(self):
password = "fakepassword1234"
url_with_pass = "https://fakeuser:{}@fakerepo.example.com/testrepo".format(password)
url_without_pass = "https://fakerepo.example.com/testrepo"

cmd_1 = ["git", "clone", "-v", url_with_pass]
cmd_2 = ["git", "clone", "-v", url_without_pass]
cmd_3 = ["no", "url", "in", "this", "one"]

redacted_cmd_1 = remove_password_if_present(cmd_1)
assert password not in " ".join(redacted_cmd_1)
# Check that we use a copy
assert cmd_1 is not redacted_cmd_1
assert password in " ".join(cmd_1)
assert cmd_2 == remove_password_if_present(cmd_2)
assert cmd_3 == remove_password_if_present(cmd_3)