readthedocs · humitos · Aug 9, 2023 · Aug 2, 2023 · Aug 2, 2023 · Aug 2, 2023
@@ -166,43 +166,52 @@ def run(self):
                 env=environment,
             )
             cmd_stdout, cmd_stderr = proc.communicate()
-            self.output = self.sanitize_output(cmd_stdout)
-            self.error = self.sanitize_output(cmd_stderr)
+            self.output = self.decode_output(cmd_stdout)
+            self.error = self.decode_output(cmd_stderr)
             self.exit_code = proc.returncode
         except OSError:
             log.exception("Operating system error.")
             self.exit_code = -1
         finally:
             self.end_time = datetime.utcnow()
 
-    def sanitize_output(self, output):
+    def decode_output(self, output: bytes) -> str:
+        """Decode bytes output to a UTF-8 string."""
+        decoded = ""
+        try:
+            decoded = output.decode("utf-8", "replace")
+        except (TypeError, AttributeError):
+            pass
+        return decoded
+
+    def sanitize_output(self, output: str) -> str:
         r"""
         Sanitize ``output`` to be saved into the DB.
 
-            1. Decodes to UTF-8
-
-            2. Replaces NULL (\x00) characters with ``''`` (empty string) to
+            1. Replaces NULL (\x00) characters with ``''`` (empty string) to
                avoid PostgreSQL db to fail:
                https://code.djangoproject.com/ticket/28201
 
-            3. Chunk at around ``DATA_UPLOAD_MAX_MEMORY_SIZE`` bytes to be sent
+            2. Chunk at around ``DATA_UPLOAD_MAX_MEMORY_SIZE`` bytes to be sent
                over the API call request
 
         :param output: stdout/stderr to be sanitized
-        :type output: bytes
 
-        :returns: sanitized output as string or ``None`` if it fails
+        :returns: sanitized output as string
         """
+        sanitized = ""
         try:
-            sanitized = output.decode('utf-8', 'replace')
             # Replace NULL (\x00) character to avoid PostgreSQL db to fail
             # https://code.djangoproject.com/ticket/28201
-            sanitized = sanitized.replace('\x00', '')
+            sanitized = output.replace("\x00", "")
         except (TypeError, AttributeError):
-            sanitized = ""
+            pass
 
         # Chunk the output data to be less than ``DATA_UPLOAD_MAX_MEMORY_SIZE``
-        output_length = len(output) if output else 0
+        # The length is calculated in bytes, so we need to encode the string first.
+        # TODO: we are calculating the length in bytes, but truncating the string
+        # in characters. We should use bytes or characters, but not both.
+        output_length = len(sanitized.encode("utf-8"))
         # Left some extra space for the rest of the request data
         threshold = 512 * 1024  # 512Kb
         allowed_length = settings.DATA_UPLOAD_MAX_MEMORY_SIZE - threshold
@@ -236,12 +245,12 @@ def save(self, api_client):
             self.exit_code = 0
 
         data = {
-            'build': self.build_env.build.get('id'),
-            'command': self.get_command(),
-            'output': self.output,
-            'exit_code': self.exit_code,
-            'start_time': self.start_time,
-            'end_time': self.end_time,
+            "build": self.build_env.build.get("id"),
+            "command": self.get_command(),
+            "output": self.sanitize_output(self.output),
+            "exit_code": self.exit_code,
+            "start_time": self.start_time,
+            "end_time": self.end_time,
         }
 
         if self.build_env.project.has_feature(Feature.API_LARGE_DATA):
@@ -320,8 +329,8 @@ def run(self):
                 cmd_stdout, cmd_stderr = out
             else:
                 cmd_stdout = out
-            self.output = self.sanitize_output(cmd_stdout)
-            self.error = self.sanitize_output(cmd_stderr)
+            self.output = self.decode_output(cmd_stdout)
+            self.error = self.decode_output(cmd_stderr)
             cmd_ret = client.exec_inspect(exec_id=exec_cmd['Id'])
             self.exit_code = cmd_ret['ExitCode']
 

@@ -713,7 +713,16 @@ def test_build_commands_executed(
                 mock.call(
                     "git", "clone", "--no-single-branch", "--depth", "50", mock.ANY, "."
                 ),
-                mock.call("git", "checkout", "--force", "a1b2c3"),
+                mock.call(
+                    "git",
+                    "show-ref",
+                    "--verify",
+                    "--quiet",
+                    "--",
+                    "refs/remotes/origin/a1b2c3",
+                    record=False,
+                ),
+                mock.call("git", "checkout", "--force", "origin/a1b2c3"),
                 mock.call("git", "clean", "-d", "-f", "-f"),
             ]
         )
@@ -1662,7 +1671,9 @@ def test_submodules_include(self, load_yaml_config, value, expected):
         self.mocker.mocks["git.Backend.run"].assert_has_calls(
             [
                 mock.call("git", "submodule", "sync"),
-                mock.call("git", "submodule", "update", "--init", "--force", *expected),
+                mock.call(
+                    "git", "submodule", "update", "--init", "--force", "--", *expected
+                ),
             ]
         )
 
@@ -1687,6 +1698,7 @@ def test_submodules_exclude(self, load_yaml_config):
                     "--init",
                     "--force",
                     "--recursive",
+                    "--",
                     "two",
                     "three",
                 ),

@@ -328,12 +328,14 @@ def test_invalid_submodule_is_ignored(self):
         repo.checkout('submodule')
         gitmodules_path = os.path.join(repo.working_dir, '.gitmodules')
 
-        with open(gitmodules_path, 'a') as f:
-            content = textwrap.dedent("""
+        with open(gitmodules_path, "a") as f:
+            content = textwrap.dedent(
+                """
                 [submodule "not-valid-path"]
                     path = not-valid-path
-                    url = https://github.com/readthedocs/readthedocs.org
-            """)
+                    url =
+                """
+            )
             f.write(content)
 
         valid, submodules = repo.validate_submodules(self.dummy_conf)
@@ -492,15 +494,61 @@ def test_invalid_submodule_is_ignored(self):
                 """
                 [submodule "not-valid-path"]
                     path = not-valid-path
-                    url = https://github.com/readthedocs/readthedocs.org
-            """
+                    url =
+                """
             )
             f.write(content)
 
         valid, submodules = repo.validate_submodules(self.dummy_conf)
         self.assertTrue(valid)
         self.assertEqual(list(submodules), ["foobar"])
 
+    def test_parse_submodules(self):
+        repo = self.project.vcs_repo(
+            environment=self.build_environment,
+            version_type=BRANCH,
+            version_identifier="submodule",
+        )
+        repo.update()
+        repo.checkout("submodule")
+        gitmodules_path = os.path.join(repo.working_dir, ".gitmodules")
+
+        with open(gitmodules_path, "a") as f:
+            content = textwrap.dedent(
+                """
+                [submodule "not-valid-path"]
+                    path = not-valid-path
+                    url =
+
+                [submodule "path with spaces"]
+                    path = path with spaces
+                    url = https://github.com
+
+                [submodule "another-submodule"]
+                    url = https://github.com
+                    path = another-submodule
+
+                [ssubmodule "invalid-submodule-key"]
+                    url = https://github.com
+                    path = invalid-submodule-key
+
+                [submodule "invalid-path-key"]
+                    url = https://github.com
+                    paths = invalid-submodule-key
+
+                [submodule "invalid-url-key"]
+                    uurl = https://github.com
+                    path = invalid-submodule-key
+                """
+            )
+            f.write(content)
+
+        valid, submodules = repo.validate_submodules(self.dummy_conf)
+        self.assertTrue(valid)
+        self.assertEqual(
+            list(submodules), ["foobar", "path with spaces", "another-submodule"]
+        )
+
     def test_skip_submodule_checkout(self):
         """Test that a submodule is listed as available."""
         repo = self.project.vcs_repo(

@@ -56,7 +56,12 @@ def test_record_command_as_success(self):
         )
 
         with build_env:
-            build_env.run('false', record_as_success=True)
+            build_env.run(
+                "false",
+                record_as_success=True,
+                # Use a directory that exists so the command doesn't fail.
+                cwd="/tmp",
+            )
         self.assertEqual(len(build_env.commands), 1)
 
         command = build_env.commands[0]
@@ -239,18 +244,38 @@ def test_missing_command(self):
 
     def test_output(self):
         """Test output command."""
-        cmd = BuildCommand(['/bin/bash', '-c', 'echo -n FOOBAR'])
+        project = get(Project)
+        api_client = mock.MagicMock()
+        build_env = LocalBuildEnvironment(
+            project=project,
+            build={
+                "id": 1,
+            },
+            api_client=api_client,
+        )
+        cmd = BuildCommand(["/bin/bash", "-c", "echo -n FOOBAR"], build_env=build_env)
 
         # Mock BuildCommand.sanitized_output just to count the amount of calls,
         # but use the original method to behaves as real
         original_sanitized_output = cmd.sanitize_output
         with patch('readthedocs.doc_builder.environments.BuildCommand.sanitize_output') as sanitize_output:  # noqa
             sanitize_output.side_effect = original_sanitized_output
             cmd.run()
-            self.assertEqual(cmd.output, 'FOOBAR')
+            cmd.save(api_client=api_client)
+            self.assertEqual(cmd.output, "FOOBAR")
+            api_client.command.post.assert_called_once_with(
+                {
+                    "build": mock.ANY,
+                    "command": "/bin/bash -c echo -n FOOBAR",
+                    "output": "FOOBAR",
+                    "exit_code": 0,
+                    "start_time": mock.ANY,
+                    "end_time": mock.ANY,
+                }
+            )
 
             # Check that we sanitize the output
-            self.assertEqual(sanitize_output.call_count, 2)
+            self.assertEqual(sanitize_output.call_count, 1)
 
     def test_error_output(self):
         """Test error output from command."""
@@ -262,9 +287,9 @@ def test_error_output(self):
     def test_sanitize_output(self):
         cmd = BuildCommand(['/bin/bash', '-c', 'echo'])
         checks = (
-            (b'Hola', 'Hola'),
-            (b'H\x00i', 'Hi'),
-            (b'H\x00i \x00\x00\x00You!\x00', 'Hi You!'),
+            ("Hola", "Hola"),
+            ("H\x00i", "Hi"),
+            ("H\x00i \x00\x00\x00You!\x00", "Hi You!"),
         )
         for output, sanitized in checks:
             self.assertEqual(cmd.sanitize_output(output), sanitized)