diff --git a/readthedocs/doc_builder/environments.py b/readthedocs/doc_builder/environments.py index eb5002a5bd5..130dade8908 100644 --- a/readthedocs/doc_builder/environments.py +++ b/readthedocs/doc_builder/environments.py @@ -159,14 +159,8 @@ def run(self): cmd_input_bytes = cmd_input cmd_output = proc.communicate(input=cmd_input_bytes) (cmd_stdout, cmd_stderr) = cmd_output - try: - self.output = cmd_stdout.decode('utf-8', 'replace') - except (TypeError, AttributeError): - self.output = None - try: - self.error = cmd_stderr.decode('utf-8', 'replace') - except (TypeError, AttributeError): - self.error = None + self.output = self.sanitize_output(cmd_stdout) + self.error = self.sanitize_output(cmd_stderr) self.exit_code = proc.returncode except OSError: self.error = traceback.format_exc() @@ -175,6 +169,30 @@ def run(self): finally: self.end_time = datetime.utcnow() + def sanitize_output(self, output): + r""" + Sanitize ``output`` to be saved into the DB. + + 1. Decodes to UTF-8 + + 2. Replaces NULL (\x00) characters with ``''`` (empty string) to + avoid PostgreSQL db to fail: + https://code.djangoproject.com/ticket/28201 + + :param output: stdout/stderr to be sanitized + :type output: bytes + + :returns: sanitized output as string or ``None`` if it fails + """ + try: + sanitized = output.decode('utf-8', 'replace') + # Replace NULL (\x00) character to avoid PostgreSQL db to fail + # https://code.djangoproject.com/ticket/28201 + sanitized = sanitized.replace('\x00', '') + except (TypeError, AttributeError): + sanitized = None + return sanitized + def get_command(self): """Flatten command.""" if hasattr(self.command, '__iter__') and not isinstance(self.command, str): diff --git a/readthedocs/rtd_tests/tests/test_doc_building.py b/readthedocs/rtd_tests/tests/test_doc_building.py index 8d809e94778..32ba1e7e1b0 100644 --- a/readthedocs/rtd_tests/tests/test_doc_building.py +++ b/readthedocs/rtd_tests/tests/test_doc_building.py @@ -1009,8 +1009,17 @@ def test_input(self): def test_output(self): """Test output command.""" cmd = BuildCommand(['/bin/bash', '-c', 'echo -n FOOBAR']) - cmd.run() - self.assertEqual(cmd.output, 'FOOBAR') + + # Mock BuildCommand.sanitized_output just to count the amount of calls, + # but use the original method to behaves as real + original_sanitized_output = cmd.sanitize_output + with patch('readthedocs.doc_builder.environments.BuildCommand.sanitize_output') as sanitize_output: # noqa + sanitize_output.side_effect = original_sanitized_output + cmd.run() + self.assertEqual(cmd.output, 'FOOBAR') + + # Check that we sanitize the output + self.assertEqual(sanitize_output.call_count, 2) def test_error_output(self): """Test error output from command.""" @@ -1026,6 +1035,16 @@ def test_error_output(self): self.assertEqual(cmd.output, '') self.assertEqual(cmd.error, 'FOOBAR') + def test_sanitize_output(self): + cmd = BuildCommand(['/bin/bash', '-c', 'echo']) + checks = ( + (b'Hola', 'Hola'), + (b'H\x00i', 'Hi'), + (b'H\x00i \x00\x00\x00You!\x00', 'Hi You!'), + ) + for output, sanitized in checks: + self.assertEqual(cmd.sanitize_output(output), sanitized) + @patch('subprocess.Popen') def test_unicode_output(self, mock_subprocess): """Unicode output from command."""