Skip to content

Commit c4be596

Browse files
humitosagjohnson
authored andcommitted
Sanitize BuildCommand.output by removing NULL characters (#4552)
* Sanitize BuildCommand.output by removing NULL characters PostgreSQL doesn't support NULL (\x00) characters on TextFields. Django 2.0 introduces a new validator that doesn't allow NULL characters to reach the database: https://code.djangoproject.com/ticket/28201 This commit just replaces the NULL characters of the stdout and stder from the command ran by '' (empty string) to avoid conflicts when saving them into the database. * Put the logic to sanitize stdout/stderr under a method * Fix lint
1 parent a59c87f commit c4be596

File tree

2 files changed

+47
-10
lines changed

2 files changed

+47
-10
lines changed

readthedocs/doc_builder/environments.py

+26-8
Original file line numberDiff line numberDiff line change
@@ -164,14 +164,8 @@ def run(self):
164164
cmd_input_bytes = cmd_input
165165
cmd_output = proc.communicate(input=cmd_input_bytes)
166166
(cmd_stdout, cmd_stderr) = cmd_output
167-
try:
168-
self.output = cmd_stdout.decode('utf-8', 'replace')
169-
except (TypeError, AttributeError):
170-
self.output = None
171-
try:
172-
self.error = cmd_stderr.decode('utf-8', 'replace')
173-
except (TypeError, AttributeError):
174-
self.error = None
167+
self.output = self.sanitize_output(cmd_stdout)
168+
self.error = self.sanitize_output(cmd_stderr)
175169
self.exit_code = proc.returncode
176170
except OSError:
177171
self.error = traceback.format_exc()
@@ -180,6 +174,30 @@ def run(self):
180174
finally:
181175
self.end_time = datetime.utcnow()
182176

177+
def sanitize_output(self, output):
178+
r"""
179+
Sanitize ``output`` to be saved into the DB.
180+
181+
1. Decodes to UTF-8
182+
183+
2. Replaces NULL (\x00) characters with ``''`` (empty string) to
184+
avoid PostgreSQL db to fail:
185+
https://code.djangoproject.com/ticket/28201
186+
187+
:param output: stdout/stderr to be sanitized
188+
:type output: bytes
189+
190+
:returns: sanitized output as string or ``None`` if it fails
191+
"""
192+
try:
193+
sanitized = output.decode('utf-8', 'replace')
194+
# Replace NULL (\x00) character to avoid PostgreSQL db to fail
195+
# https://code.djangoproject.com/ticket/28201
196+
sanitized = sanitized.replace('\x00', '')
197+
except (TypeError, AttributeError):
198+
sanitized = None
199+
return sanitized
200+
183201
def get_command(self):
184202
"""Flatten command."""
185203
if hasattr(self.command, '__iter__') and not isinstance(self.command, str):

readthedocs/rtd_tests/tests/test_doc_building.py

+21-2
Original file line numberDiff line numberDiff line change
@@ -1009,8 +1009,17 @@ def test_input(self):
10091009
def test_output(self):
10101010
"""Test output command."""
10111011
cmd = BuildCommand(['/bin/bash', '-c', 'echo -n FOOBAR'])
1012-
cmd.run()
1013-
self.assertEqual(cmd.output, 'FOOBAR')
1012+
1013+
# Mock BuildCommand.sanitized_output just to count the amount of calls,
1014+
# but use the original method to behaves as real
1015+
original_sanitized_output = cmd.sanitize_output
1016+
with patch('readthedocs.doc_builder.environments.BuildCommand.sanitize_output') as sanitize_output: # noqa
1017+
sanitize_output.side_effect = original_sanitized_output
1018+
cmd.run()
1019+
self.assertEqual(cmd.output, 'FOOBAR')
1020+
1021+
# Check that we sanitize the output
1022+
self.assertEqual(sanitize_output.call_count, 2)
10141023

10151024
def test_error_output(self):
10161025
"""Test error output from command."""
@@ -1026,6 +1035,16 @@ def test_error_output(self):
10261035
self.assertEqual(cmd.output, '')
10271036
self.assertEqual(cmd.error, 'FOOBAR')
10281037

1038+
def test_sanitize_output(self):
1039+
cmd = BuildCommand(['/bin/bash', '-c', 'echo'])
1040+
checks = (
1041+
(b'Hola', 'Hola'),
1042+
(b'H\x00i', 'Hi'),
1043+
(b'H\x00i \x00\x00\x00You!\x00', 'Hi You!'),
1044+
)
1045+
for output, sanitized in checks:
1046+
self.assertEqual(cmd.sanitize_output(output), sanitized)
1047+
10291048
@patch('subprocess.Popen')
10301049
def test_unicode_output(self, mock_subprocess):
10311050
"""Unicode output from command."""

0 commit comments

Comments
 (0)