Skip to content

Commit fd9559b

Browse files
stsewdhumitos
andauthored
Build: replace GitPython with git commands (#10594)
* Build: replace GitPython with git commands * Cleanup * Fix tests * Black * Format * Test * Updates from review * Black * Apply suggestions from code review --------- Co-authored-by: Manuel Kaufmann <[email protected]>
1 parent 18889f0 commit fd9559b

File tree

5 files changed

+249
-56
lines changed

5 files changed

+249
-56
lines changed

readthedocs/doc_builder/environments.py

+30-21
Original file line numberDiff line numberDiff line change
@@ -166,43 +166,52 @@ def run(self):
166166
env=environment,
167167
)
168168
cmd_stdout, cmd_stderr = proc.communicate()
169-
self.output = self.sanitize_output(cmd_stdout)
170-
self.error = self.sanitize_output(cmd_stderr)
169+
self.output = self.decode_output(cmd_stdout)
170+
self.error = self.decode_output(cmd_stderr)
171171
self.exit_code = proc.returncode
172172
except OSError:
173173
log.exception("Operating system error.")
174174
self.exit_code = -1
175175
finally:
176176
self.end_time = datetime.utcnow()
177177

178-
def sanitize_output(self, output):
178+
def decode_output(self, output: bytes) -> str:
179+
"""Decode bytes output to a UTF-8 string."""
180+
decoded = ""
181+
try:
182+
decoded = output.decode("utf-8", "replace")
183+
except (TypeError, AttributeError):
184+
pass
185+
return decoded
186+
187+
def sanitize_output(self, output: str) -> str:
179188
r"""
180189
Sanitize ``output`` to be saved into the DB.
181190
182-
1. Decodes to UTF-8
183-
184-
2. Replaces NULL (\x00) characters with ``''`` (empty string) to
191+
1. Replaces NULL (\x00) characters with ``''`` (empty string) to
185192
avoid PostgreSQL db to fail:
186193
https://code.djangoproject.com/ticket/28201
187194
188-
3. Chunk at around ``DATA_UPLOAD_MAX_MEMORY_SIZE`` bytes to be sent
195+
2. Chunk at around ``DATA_UPLOAD_MAX_MEMORY_SIZE`` bytes to be sent
189196
over the API call request
190197
191198
:param output: stdout/stderr to be sanitized
192-
:type output: bytes
193199
194-
:returns: sanitized output as string or ``None`` if it fails
200+
:returns: sanitized output as string
195201
"""
202+
sanitized = ""
196203
try:
197-
sanitized = output.decode('utf-8', 'replace')
198204
# Replace NULL (\x00) character to avoid PostgreSQL db to fail
199205
# https://code.djangoproject.com/ticket/28201
200-
sanitized = sanitized.replace('\x00', '')
206+
sanitized = output.replace("\x00", "")
201207
except (TypeError, AttributeError):
202-
sanitized = ""
208+
pass
203209

204210
# Chunk the output data to be less than ``DATA_UPLOAD_MAX_MEMORY_SIZE``
205-
output_length = len(output) if output else 0
211+
# The length is calculated in bytes, so we need to encode the string first.
212+
# TODO: we are calculating the length in bytes, but truncating the string
213+
# in characters. We should use bytes or characters, but not both.
214+
output_length = len(sanitized.encode("utf-8"))
206215
# Left some extra space for the rest of the request data
207216
threshold = 512 * 1024 # 512Kb
208217
allowed_length = settings.DATA_UPLOAD_MAX_MEMORY_SIZE - threshold
@@ -236,12 +245,12 @@ def save(self, api_client):
236245
self.exit_code = 0
237246

238247
data = {
239-
'build': self.build_env.build.get('id'),
240-
'command': self.get_command(),
241-
'output': self.output,
242-
'exit_code': self.exit_code,
243-
'start_time': self.start_time,
244-
'end_time': self.end_time,
248+
"build": self.build_env.build.get("id"),
249+
"command": self.get_command(),
250+
"output": self.sanitize_output(self.output),
251+
"exit_code": self.exit_code,
252+
"start_time": self.start_time,
253+
"end_time": self.end_time,
245254
}
246255

247256
if self.build_env.project.has_feature(Feature.API_LARGE_DATA):
@@ -320,8 +329,8 @@ def run(self):
320329
cmd_stdout, cmd_stderr = out
321330
else:
322331
cmd_stdout = out
323-
self.output = self.sanitize_output(cmd_stdout)
324-
self.error = self.sanitize_output(cmd_stderr)
332+
self.output = self.decode_output(cmd_stdout)
333+
self.error = self.decode_output(cmd_stderr)
325334
cmd_ret = client.exec_inspect(exec_id=exec_cmd['Id'])
326335
self.exit_code = cmd_ret['ExitCode']
327336

readthedocs/projects/tests/test_build_tasks.py

+14-2
Original file line numberDiff line numberDiff line change
@@ -713,7 +713,16 @@ def test_build_commands_executed(
713713
mock.call(
714714
"git", "clone", "--no-single-branch", "--depth", "50", mock.ANY, "."
715715
),
716-
mock.call("git", "checkout", "--force", "a1b2c3"),
716+
mock.call(
717+
"git",
718+
"show-ref",
719+
"--verify",
720+
"--quiet",
721+
"--",
722+
"refs/remotes/origin/a1b2c3",
723+
record=False,
724+
),
725+
mock.call("git", "checkout", "--force", "origin/a1b2c3"),
717726
mock.call("git", "clean", "-d", "-f", "-f"),
718727
]
719728
)
@@ -1662,7 +1671,9 @@ def test_submodules_include(self, load_yaml_config, value, expected):
16621671
self.mocker.mocks["git.Backend.run"].assert_has_calls(
16631672
[
16641673
mock.call("git", "submodule", "sync"),
1665-
mock.call("git", "submodule", "update", "--init", "--force", *expected),
1674+
mock.call(
1675+
"git", "submodule", "update", "--init", "--force", "--", *expected
1676+
),
16661677
]
16671678
)
16681679

@@ -1687,6 +1698,7 @@ def test_submodules_exclude(self, load_yaml_config):
16871698
"--init",
16881699
"--force",
16891700
"--recursive",
1701+
"--",
16901702
"two",
16911703
"three",
16921704
),

readthedocs/rtd_tests/tests/test_backend.py

+54-6
Original file line numberDiff line numberDiff line change
@@ -328,12 +328,14 @@ def test_invalid_submodule_is_ignored(self):
328328
repo.checkout('submodule')
329329
gitmodules_path = os.path.join(repo.working_dir, '.gitmodules')
330330

331-
with open(gitmodules_path, 'a') as f:
332-
content = textwrap.dedent("""
331+
with open(gitmodules_path, "a") as f:
332+
content = textwrap.dedent(
333+
"""
333334
[submodule "not-valid-path"]
334335
path = not-valid-path
335-
url = https://github.com/readthedocs/readthedocs.org
336-
""")
336+
url =
337+
"""
338+
)
337339
f.write(content)
338340

339341
valid, submodules = repo.validate_submodules(self.dummy_conf)
@@ -492,15 +494,61 @@ def test_invalid_submodule_is_ignored(self):
492494
"""
493495
[submodule "not-valid-path"]
494496
path = not-valid-path
495-
url = https://github.com/readthedocs/readthedocs.org
496-
"""
497+
url =
498+
"""
497499
)
498500
f.write(content)
499501

500502
valid, submodules = repo.validate_submodules(self.dummy_conf)
501503
self.assertTrue(valid)
502504
self.assertEqual(list(submodules), ["foobar"])
503505

506+
def test_parse_submodules(self):
507+
repo = self.project.vcs_repo(
508+
environment=self.build_environment,
509+
version_type=BRANCH,
510+
version_identifier="submodule",
511+
)
512+
repo.update()
513+
repo.checkout("submodule")
514+
gitmodules_path = os.path.join(repo.working_dir, ".gitmodules")
515+
516+
with open(gitmodules_path, "a") as f:
517+
content = textwrap.dedent(
518+
"""
519+
[submodule "not-valid-path"]
520+
path = not-valid-path
521+
url =
522+
523+
[submodule "path with spaces"]
524+
path = path with spaces
525+
url = https://github.com
526+
527+
[submodule "another-submodule"]
528+
url = https://github.com
529+
path = another-submodule
530+
531+
[ssubmodule "invalid-submodule-key"]
532+
url = https://github.com
533+
path = invalid-submodule-key
534+
535+
[submodule "invalid-path-key"]
536+
url = https://github.com
537+
paths = invalid-submodule-key
538+
539+
[submodule "invalid-url-key"]
540+
uurl = https://github.com
541+
path = invalid-submodule-key
542+
"""
543+
)
544+
f.write(content)
545+
546+
valid, submodules = repo.validate_submodules(self.dummy_conf)
547+
self.assertTrue(valid)
548+
self.assertEqual(
549+
list(submodules), ["foobar", "path with spaces", "another-submodule"]
550+
)
551+
504552
def test_skip_submodule_checkout(self):
505553
"""Test that a submodule is listed as available."""
506554
repo = self.project.vcs_repo(

readthedocs/rtd_tests/tests/test_doc_building.py

+32-7
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,12 @@ def test_record_command_as_success(self):
5656
)
5757

5858
with build_env:
59-
build_env.run('false', record_as_success=True)
59+
build_env.run(
60+
"false",
61+
record_as_success=True,
62+
# Use a directory that exists so the command doesn't fail.
63+
cwd="/tmp",
64+
)
6065
self.assertEqual(len(build_env.commands), 1)
6166

6267
command = build_env.commands[0]
@@ -239,18 +244,38 @@ def test_missing_command(self):
239244

240245
def test_output(self):
241246
"""Test output command."""
242-
cmd = BuildCommand(['/bin/bash', '-c', 'echo -n FOOBAR'])
247+
project = get(Project)
248+
api_client = mock.MagicMock()
249+
build_env = LocalBuildEnvironment(
250+
project=project,
251+
build={
252+
"id": 1,
253+
},
254+
api_client=api_client,
255+
)
256+
cmd = BuildCommand(["/bin/bash", "-c", "echo -n FOOBAR"], build_env=build_env)
243257

244258
# Mock BuildCommand.sanitized_output just to count the amount of calls,
245259
# but use the original method to behaves as real
246260
original_sanitized_output = cmd.sanitize_output
247261
with patch('readthedocs.doc_builder.environments.BuildCommand.sanitize_output') as sanitize_output: # noqa
248262
sanitize_output.side_effect = original_sanitized_output
249263
cmd.run()
250-
self.assertEqual(cmd.output, 'FOOBAR')
264+
cmd.save(api_client=api_client)
265+
self.assertEqual(cmd.output, "FOOBAR")
266+
api_client.command.post.assert_called_once_with(
267+
{
268+
"build": mock.ANY,
269+
"command": "/bin/bash -c echo -n FOOBAR",
270+
"output": "FOOBAR",
271+
"exit_code": 0,
272+
"start_time": mock.ANY,
273+
"end_time": mock.ANY,
274+
}
275+
)
251276

252277
# Check that we sanitize the output
253-
self.assertEqual(sanitize_output.call_count, 2)
278+
self.assertEqual(sanitize_output.call_count, 1)
254279

255280
def test_error_output(self):
256281
"""Test error output from command."""
@@ -262,9 +287,9 @@ def test_error_output(self):
262287
def test_sanitize_output(self):
263288
cmd = BuildCommand(['/bin/bash', '-c', 'echo'])
264289
checks = (
265-
(b'Hola', 'Hola'),
266-
(b'H\x00i', 'Hi'),
267-
(b'H\x00i \x00\x00\x00You!\x00', 'Hi You!'),
290+
("Hola", "Hola"),
291+
("H\x00i", "Hi"),
292+
("H\x00i \x00\x00\x00You!\x00", "Hi You!"),
268293
)
269294
for output, sanitized in checks:
270295
self.assertEqual(cmd.sanitize_output(output), sanitized)

0 commit comments

Comments
 (0)