Skip to content

Build: replace GitPython with git commands #10594

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Aug 9, 2023
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 30 additions & 21 deletions readthedocs/doc_builder/environments.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,43 +166,52 @@ def run(self):
env=environment,
)
cmd_stdout, cmd_stderr = proc.communicate()
self.output = self.sanitize_output(cmd_stdout)
self.error = self.sanitize_output(cmd_stderr)
self.output = self.decode_output(cmd_stdout)
self.error = self.decode_output(cmd_stderr)
self.exit_code = proc.returncode
except OSError:
log.exception("Operating system error.")
self.exit_code = -1
finally:
self.end_time = datetime.utcnow()

def sanitize_output(self, output):
def decode_output(self, output: bytes) -> str:
"""Decode bytes output to a UTF-8 string."""
decoded = ""
try:
decoded = output.decode("utf-8", "replace")
except (TypeError, AttributeError):
pass
Comment on lines +183 to +184
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would be good to do log.warning here, so we can easily realize if there is a problem from NR.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can add a log here, but we will probably see a spike in logs, since we weren't logging these before.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you think we have lot of command's outputs that are not possible to decode? That shouldn't be normal, I'd say.

return decoded

def sanitize_output(self, output: str) -> str:
r"""
Sanitize ``output`` to be saved into the DB.

1. Decodes to UTF-8

2. Replaces NULL (\x00) characters with ``''`` (empty string) to
1. Replaces NULL (\x00) characters with ``''`` (empty string) to
avoid PostgreSQL db to fail:
https://code.djangoproject.com/ticket/28201

3. Chunk at around ``DATA_UPLOAD_MAX_MEMORY_SIZE`` bytes to be sent
2. Chunk at around ``DATA_UPLOAD_MAX_MEMORY_SIZE`` bytes to be sent
over the API call request

:param output: stdout/stderr to be sanitized
:type output: bytes

:returns: sanitized output as string or ``None`` if it fails
:returns: sanitized output as string
"""
sanitized = ""
try:
sanitized = output.decode('utf-8', 'replace')
# Replace NULL (\x00) character to avoid PostgreSQL db to fail
# https://code.djangoproject.com/ticket/28201
sanitized = sanitized.replace('\x00', '')
sanitized = output.replace("\x00", "")
except (TypeError, AttributeError):
sanitized = ""
pass

# Chunk the output data to be less than ``DATA_UPLOAD_MAX_MEMORY_SIZE``
output_length = len(output) if output else 0
# The length is calculated in bytes, so we need to encode the string first.
# TODO: we are calculating the length in bytes, but truncating the string
# in characters. We should use bytes or characters, but not both.
output_length = len(sanitized.encode("utf-8"))
# Left some extra space for the rest of the request data
threshold = 512 * 1024 # 512Kb
allowed_length = settings.DATA_UPLOAD_MAX_MEMORY_SIZE - threshold
Expand Down Expand Up @@ -236,12 +245,12 @@ def save(self, api_client):
self.exit_code = 0

data = {
'build': self.build_env.build.get('id'),
'command': self.get_command(),
'output': self.output,
'exit_code': self.exit_code,
'start_time': self.start_time,
'end_time': self.end_time,
"build": self.build_env.build.get("id"),
"command": self.get_command(),
"output": self.sanitize_output(self.output),
"exit_code": self.exit_code,
"start_time": self.start_time,
"end_time": self.end_time,
}

if self.build_env.project.has_feature(Feature.API_LARGE_DATA):
Expand Down Expand Up @@ -320,8 +329,8 @@ def run(self):
cmd_stdout, cmd_stderr = out
else:
cmd_stdout = out
self.output = self.sanitize_output(cmd_stdout)
self.error = self.sanitize_output(cmd_stderr)
self.output = self.decode_output(cmd_stdout)
self.error = self.decode_output(cmd_stderr)
cmd_ret = client.exec_inspect(exec_id=exec_cmd['Id'])
self.exit_code = cmd_ret['ExitCode']

Expand Down
16 changes: 14 additions & 2 deletions readthedocs/projects/tests/test_build_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -713,7 +713,16 @@ def test_build_commands_executed(
mock.call(
"git", "clone", "--no-single-branch", "--depth", "50", mock.ANY, "."
),
mock.call("git", "checkout", "--force", "a1b2c3"),
mock.call(
"git",
"show-ref",
"--verify",
"--quiet",
"--",
"refs/remotes/origin/a1b2c3",
record=False,
),
mock.call("git", "checkout", "--force", "origin/a1b2c3"),
mock.call("git", "clean", "-d", "-f", "-f"),
]
)
Expand Down Expand Up @@ -1662,7 +1671,9 @@ def test_submodules_include(self, load_yaml_config, value, expected):
self.mocker.mocks["git.Backend.run"].assert_has_calls(
[
mock.call("git", "submodule", "sync"),
mock.call("git", "submodule", "update", "--init", "--force", *expected),
mock.call(
"git", "submodule", "update", "--init", "--force", "--", *expected
),
]
)

Expand All @@ -1687,6 +1698,7 @@ def test_submodules_exclude(self, load_yaml_config):
"--init",
"--force",
"--recursive",
"--",
"two",
"three",
),
Expand Down
60 changes: 54 additions & 6 deletions readthedocs/rtd_tests/tests/test_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -328,12 +328,14 @@ def test_invalid_submodule_is_ignored(self):
repo.checkout('submodule')
gitmodules_path = os.path.join(repo.working_dir, '.gitmodules')

with open(gitmodules_path, 'a') as f:
content = textwrap.dedent("""
with open(gitmodules_path, "a") as f:
content = textwrap.dedent(
"""
[submodule "not-valid-path"]
path = not-valid-path
url = https://github.com/readthedocs/readthedocs.org
""")
url =
"""
)
f.write(content)

valid, submodules = repo.validate_submodules(self.dummy_conf)
Expand Down Expand Up @@ -492,15 +494,61 @@ def test_invalid_submodule_is_ignored(self):
"""
[submodule "not-valid-path"]
path = not-valid-path
url = https://github.com/readthedocs/readthedocs.org
"""
url =
"""
)
f.write(content)

valid, submodules = repo.validate_submodules(self.dummy_conf)
self.assertTrue(valid)
self.assertEqual(list(submodules), ["foobar"])

def test_parse_submodules(self):
repo = self.project.vcs_repo(
environment=self.build_environment,
version_type=BRANCH,
version_identifier="submodule",
)
repo.update()
repo.checkout("submodule")
gitmodules_path = os.path.join(repo.working_dir, ".gitmodules")

with open(gitmodules_path, "a") as f:
content = textwrap.dedent(
"""
[submodule "not-valid-path"]
path = not-valid-path
url =

[submodule "path with spaces"]
path = path with spaces
url = https://github.com

[submodule "another-submodule"]
url = https://github.com
path = another-submodule

[ssubmodule "invalid-submodule-key"]
url = https://github.com
path = invalid-submodule-key

[submodule "invalid-path-key"]
url = https://github.com
paths = invalid-submodule-key

[submodule "invalid-url-key"]
uurl = https://github.com
path = invalid-submodule-key
"""
)
f.write(content)

valid, submodules = repo.validate_submodules(self.dummy_conf)
self.assertTrue(valid)
self.assertEqual(
list(submodules), ["foobar", "path with spaces", "another-submodule"]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Where does the foobar submodule comes from? I don't see it on the file written in disk.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's defined in the submodule branch of the test git repo.

)

def test_skip_submodule_checkout(self):
"""Test that a submodule is listed as available."""
repo = self.project.vcs_repo(
Expand Down
39 changes: 32 additions & 7 deletions readthedocs/rtd_tests/tests/test_doc_building.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,12 @@ def test_record_command_as_success(self):
)

with build_env:
build_env.run('false', record_as_success=True)
build_env.run(
"false",
record_as_success=True,
# Use a directory that exists so the command doesn't fail.
cwd="/tmp",
)
self.assertEqual(len(build_env.commands), 1)

command = build_env.commands[0]
Expand Down Expand Up @@ -239,18 +244,38 @@ def test_missing_command(self):

def test_output(self):
"""Test output command."""
cmd = BuildCommand(['/bin/bash', '-c', 'echo -n FOOBAR'])
project = get(Project)
api_client = mock.MagicMock()
build_env = LocalBuildEnvironment(
project=project,
build={
"id": 1,
},
api_client=api_client,
)
cmd = BuildCommand(["/bin/bash", "-c", "echo -n FOOBAR"], build_env=build_env)

# Mock BuildCommand.sanitized_output just to count the amount of calls,
# but use the original method to behaves as real
original_sanitized_output = cmd.sanitize_output
with patch('readthedocs.doc_builder.environments.BuildCommand.sanitize_output') as sanitize_output: # noqa
sanitize_output.side_effect = original_sanitized_output
cmd.run()
self.assertEqual(cmd.output, 'FOOBAR')
cmd.save(api_client=api_client)
self.assertEqual(cmd.output, "FOOBAR")
api_client.command.post.assert_called_once_with(
{
"build": mock.ANY,
"command": "/bin/bash -c echo -n FOOBAR",
"output": "FOOBAR",
"exit_code": 0,
"start_time": mock.ANY,
"end_time": mock.ANY,
}
)

# Check that we sanitize the output
self.assertEqual(sanitize_output.call_count, 2)
self.assertEqual(sanitize_output.call_count, 1)

def test_error_output(self):
"""Test error output from command."""
Expand All @@ -262,9 +287,9 @@ def test_error_output(self):
def test_sanitize_output(self):
cmd = BuildCommand(['/bin/bash', '-c', 'echo'])
checks = (
(b'Hola', 'Hola'),
(b'H\x00i', 'Hi'),
(b'H\x00i \x00\x00\x00You!\x00', 'Hi You!'),
("Hola", "Hola"),
("H\x00i", "Hi"),
("H\x00i \x00\x00\x00You!\x00", "Hi You!"),
)
for output, sanitized in checks:
self.assertEqual(cmd.sanitize_output(output), sanitized)
Expand Down
Loading