Skip to content

Sanitize BuildCommand.output by removing NULL characters #4552

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Aug 24, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 26 additions & 8 deletions readthedocs/doc_builder/environments.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,14 +159,8 @@ def run(self):
cmd_input_bytes = cmd_input
cmd_output = proc.communicate(input=cmd_input_bytes)
(cmd_stdout, cmd_stderr) = cmd_output
try:
self.output = cmd_stdout.decode('utf-8', 'replace')
except (TypeError, AttributeError):
self.output = None
try:
self.error = cmd_stderr.decode('utf-8', 'replace')
except (TypeError, AttributeError):
self.error = None
self.output = self.sanitize_output(cmd_stdout)
self.error = self.sanitize_output(cmd_stderr)
self.exit_code = proc.returncode
except OSError:
self.error = traceback.format_exc()
Expand All @@ -175,6 +169,30 @@ def run(self):
finally:
self.end_time = datetime.utcnow()

def sanitize_output(self, output):
r"""
Sanitize ``output`` to be saved into the DB.

1. Decodes to UTF-8

2. Replaces NULL (\x00) characters with ``''`` (empty string) to
avoid PostgreSQL db to fail:
https://code.djangoproject.com/ticket/28201

:param output: stdout/stderr to be sanitized
:type output: bytes

:returns: sanitized output as string or ``None`` if it fails
"""
try:
sanitized = output.decode('utf-8', 'replace')
# Replace NULL (\x00) character to avoid PostgreSQL db to fail
# https://code.djangoproject.com/ticket/28201
sanitized = sanitized.replace('\x00', '')
except (TypeError, AttributeError):
sanitized = None
return sanitized

def get_command(self):
"""Flatten command."""
if hasattr(self.command, '__iter__') and not isinstance(self.command, str):
Expand Down
23 changes: 21 additions & 2 deletions readthedocs/rtd_tests/tests/test_doc_building.py
Original file line number Diff line number Diff line change
Expand Up @@ -1009,8 +1009,17 @@ def test_input(self):
def test_output(self):
"""Test output command."""
cmd = BuildCommand(['/bin/bash', '-c', 'echo -n FOOBAR'])
cmd.run()
self.assertEqual(cmd.output, 'FOOBAR')

# Mock BuildCommand.sanitized_output just to count the amount of calls,
# but use the original method to behaves as real
original_sanitized_output = cmd.sanitize_output
with patch('readthedocs.doc_builder.environments.BuildCommand.sanitize_output') as sanitize_output: # noqa
Copy link
Member

@stsewd stsewd Aug 23, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We don't run the linter on tests files, so, it doesn't matter having a # noqa comment p:

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🙈

sanitize_output.side_effect = original_sanitized_output
cmd.run()
self.assertEqual(cmd.output, 'FOOBAR')

# Check that we sanitize the output
self.assertEqual(sanitize_output.call_count, 2)

def test_error_output(self):
"""Test error output from command."""
Expand All @@ -1026,6 +1035,16 @@ def test_error_output(self):
self.assertEqual(cmd.output, '')
self.assertEqual(cmd.error, 'FOOBAR')

def test_sanitize_output(self):
cmd = BuildCommand(['/bin/bash', '-c', 'echo'])
checks = (
(b'Hola', 'Hola'),
(b'H\x00i', 'Hi'),
(b'H\x00i \x00\x00\x00You!\x00', 'Hi You!'),
)
for output, sanitized in checks:
self.assertEqual(cmd.sanitize_output(output), sanitized)

@patch('subprocess.Popen')
def test_unicode_output(self, mock_subprocess):
"""Unicode output from command."""
Expand Down