readthedocs · humitos · Jan 31, 2023 · Jan 25, 2023 · Jan 25, 2023 · Jan 25, 2023
@@ -40,6 +40,14 @@ class BuildUserError(BuildBaseException):
     BUILD_COMMANDS_WITHOUT_OUTPUT = gettext_noop(
         f'No "{BUILD_COMMANDS_OUTPUT_PATH_HTML}" folder was created during this build.'
     )
+    BUILD_OUTPUT_IS_NOT_A_DIRECTORY = gettext_noop(
+        'Build output directory for format "{artifact_type}" is not a directory.'
+    )
+    BUILD_OUTPUT_HAS_MULTIPLE_FILES = gettext_noop(
+        'Build output directory for format "{artifact_type}" contains multiple files '
+        "and it is not currently supported. "
+        'Please, remove all the files but the "{artifact_type}" your want to upload.'
+    )
 
 
 class BuildUserSkip(BuildUserError):

@@ -494,7 +494,6 @@ def on_failure(self, exc, task_id, args, kwargs, einfo):
             if self.data.version:
                 version_type = self.data.version.type
 
-            # NOTE: autoflake gets confused here. We need the NOQA for now.
             status = BUILD_STATUS_FAILURE
             if isinstance(exc, BuildUserSkip):
                 # The build was skipped by returning the magic exit code,
@@ -513,55 +512,84 @@ def on_failure(self, exc, task_id, args, kwargs, einfo):
         # Update build object
         self.data.build['success'] = False
 
-    def on_success(self, retval, task_id, args, kwargs):
-        time_before_store_build_artifacts = timezone.now()
-        # Store build artifacts to storage (local or cloud storage)
-        #
-        # TODO: move ``store_build_artifacts`` inside ``execute`` so we can
-        # handle exceptions properly on ``on_failure``
-        self.store_build_artifacts()
-        log.info(
-            "Store build artifacts finished.",
-            time=(timezone.now() - time_before_store_build_artifacts).seconds,
-        )
+    def get_valid_artifact_types(self):
+        """
+        Return a list of all the valid artifact types for this build.
 
-        # HTML are built successfully.
-        html = os.path.exists(
-            self.data.project.artifact_path(
-                version=self.data.version.slug, type_="html"
-            )
-        )
-        localmedia = os.path.exists(
-            self.data.project.artifact_path(
-                version=self.data.version.slug, type_="htmlzip"
-            )
-        )
-        pdf = os.path.exists(
-            self.data.project.artifact_path(version=self.data.version.slug, type_="pdf")
-        )
-        epub = os.path.exists(
-            self.data.project.artifact_path(
-                version=self.data.version.slug, type_="epub"
+        It performs the following checks on each output format type path:
+         - it exists
+         - it is a directory
+         - does not contains more than 1 files (only PDF, HTMLZip, ePUB)
+
+        TODO: remove the limitation of only 1 file.
+        Add support for multiple PDF files in the output directory and
+        grab them by using glob syntaxt between other files that could be garbage.
+        """
+        valid_artifacts = []
+        for artifact_type in ARTIFACT_TYPES:
+            artifact_directory = self.data.project.artifact_path(
+                version=self.data.version.slug,
+                type_=artifact_type,
             )
-        )
+            if not os.path.exists(artifact_directory):
+                # There is no output directory.
+                # Skip this format.
+                continue
+
+            if not os.path.isdir(artifact_directory):
+                log.error(
+                    "The output path is not a directory.",
+                    output_format=artifact_type,
+                )
+                raise BuildUserError(
+                    BuildUserError.BUILD_OUTPUT_IS_NOT_A_DIRECTORY.format(
+                        artifact_type=artifact_type
+                    )
+                )
+
+            # Check if there are multiple files on artifact directories.
+            # These output format does not support multiple files yet.
+            # In case multiple files are found, the upload for this format is not performed.
+            if artifact_type in ("htmlzip", "epub", "pdf"):
+                if len(os.listdir(artifact_directory)) > 1:
+                    log.error(
+                        "Multiple files are not supported for this format. "
+                        "Skipping this output format.",
+                        output_format=artifact_type,
+                    )
+                    raise BuildUserError(
+                        BuildUserError.BUILD_OUTPUT_HAS_MULTIPLE_FILES.format(
+                            artifact_type=artifact_type
+                        )
+                    )
+
+            # If all the conditions were met, the artifact is valid
+            valid_artifacts.append(artifact_type)
+
+        return valid_artifacts
+
+    def on_success(self, retval, task_id, args, kwargs):
+        valid_artifacts = self.get_valid_artifact_types()
 
         # NOTE: we are updating the db version instance *only* when
-        if html:
+        # TODO: remove this condition and *always* update the DB Version instance
+        if "html" in valid_artifacts:
             try:
                 api_v2.version(self.data.version.pk).patch(
                     {
                         "built": True,
                         "documentation_type": self.data.version.documentation_type,
-                        "has_pdf": pdf,
-                        "has_epub": epub,
-                        "has_htmlzip": localmedia,
+                        "has_pdf": "pdf" in valid_artifacts,
+                        "has_epub": "epub" in valid_artifacts,
+                        "has_htmlzip": "htmlzip" in valid_artifacts,
                     }
                 )
             except HttpClientError:
                 # NOTE: I think we should fail the build if we cannot update
                 # the version at this point. Otherwise, we will have inconsistent data
                 log.exception(
-                    'Updating version failed, skipping file sync.',
+                    "Updating version db object failed. "
+                    'Files are synced in the storage, but "Version" object is not updated',
                 )
 
         # Index search data
@@ -658,10 +686,13 @@ def update_build(self, state=None):
         try:
             api_v2.build(self.data.build['id']).patch(self.data.build)
         except Exception:
-            # NOTE: I think we should fail the build if we cannot update it
-            # at this point otherwise, the data will be inconsistent and we
-            # may be serving "new docs" but saying the "build have failed"
-            log.exception('Unable to update build')
+            # NOTE: we are updating the "Build" object on each `state`.
+            # Only if the last update fails, there may be some inconsistency
+            # between the "Build" object in our db and the reality.
+            #
+            # The `state` argument will help us to track this more and understand
+            # at what state our updates are failing and decide what to do.
+            log.exception("Error while updating the build object.", state=state)
 
     def execute(self):
         self.data.build_director = BuildDirector(
@@ -709,6 +740,12 @@ def execute(self):
             finally:
                 self.data.build_data = self.collect_build_data()
 
+        # At this point, the user's build already succeeded.
+        # However, we cannot use `.on_success()` because we still have to upload the artifacts;
+        # which could fail, and we want to detect that and handle it properly at `.on_failure()`
+        # Store build artifacts to storage (local or cloud storage)
+        self.store_build_artifacts()
+
     def collect_build_data(self):
         """
         Collect data from the current build.
@@ -786,53 +823,28 @@ def store_build_artifacts(self):
 
         Remove build artifacts of types not included in this build (PDF, ePub, zip only).
         """
+        time_before_store_build_artifacts = timezone.now()
         log.info('Writing build artifacts to media storage')
-        # NOTE: I don't remember why we removed this state from the Build
-        # object. I'm re-adding it because I think it's useful, but we can
-        # remove it if we want
         self.update_build(state=BUILD_STATE_UPLOADING)
 
+        valid_artifacts = self.get_valid_artifact_types()
         types_to_copy = []
         types_to_delete = []
 
         for artifact_type in ARTIFACT_TYPES:
-            if os.path.exists(
-                self.data.project.artifact_path(
-                    version=self.data.version.slug,
-                    type_=artifact_type,
-                )
-            ):
+            if artifact_type in valid_artifacts:
                 types_to_copy.append(artifact_type)
             # Never delete HTML nor JSON (search index)
             elif artifact_type not in UNDELETABLE_ARTIFACT_TYPES:
                 types_to_delete.append(artifact_type)
 
+        # Upload formats
         for media_type in types_to_copy:
-            # NOTE: here is where we get the correct FROM path to upload
-            from_path = self.data.version.project.artifact_path(
+            from_path = self.data.project.artifact_path(
                 version=self.data.version.slug,
-                type_=media_type,
+                type_=artifact_type,
             )
-
-            # Check if there are multiple files on source directories.
-            # These output format does not support multiple files yet.
-            # In case multiple files are found, the upload for this format is not performed.
-            #
-            # TODO: we should fail the build for these cases and clearly communicate this.
-            # to the user. To do this, we need to call this method (``store_build_artifacts``)
-            # since the ``execute`` method.
-            # It will allow us to just `raise BuildUserError` and handle it at
-            # Celery `on_failure` handler.
-            if media_type in ("htmlzip", "epub", "pdf"):
-                if len(os.listdir(from_path)) > 1:
-                    log.exception(
-                        "Multiple files are not supported for this format. "
-                        "Skipping this output format.",
-                        output_format=media_type,
-                    )
-                    continue
-
-            to_path = self.data.version.project.get_storage_path(
+            to_path = self.data.project.get_storage_path(
                 type_=media_type,
                 version_slug=self.data.version.slug,
                 include_file=False,
@@ -841,15 +853,22 @@ def store_build_artifacts(self):
             try:
                 build_media_storage.sync_directory(from_path, to_path)
             except Exception:
-                # Ideally this should just be an IOError
-                # but some storage backends unfortunately throw other errors
+                # NOTE: the exceptions reported so far are:
+                #  - botocore.exceptions:HTTPClientError
+                #  - botocore.exceptions:ClientError
+                #  - readthedocs.doc_builder.exceptions:BuildCancelled
                 log.exception(
-                    'Error copying to storage (not failing build)',
+                    "Error copying to storage",
                     media_type=media_type,
                     from_path=from_path,
                     to_path=to_path,
                 )
+                # Re-raise the exception to fail the build and handle it
+                # automatically at `on_failure`.
+                # It will clearly communicate the error to the user.
+                raise BuildAppError("Error uploading files to the storage.")
 
+        # Delete formats
         for media_type in types_to_delete:
             media_path = self.data.version.project.get_storage_path(
                 type_=media_type,
@@ -860,13 +879,21 @@ def store_build_artifacts(self):
             try:
                 build_media_storage.delete_directory(media_path)
             except Exception:
-                # Ideally this should just be an IOError
-                # but some storage backends unfortunately throw other errors
+                # NOTE: I didn't find any log line for this case yet
                 log.exception(
-                    'Error deleting from storage (not failing build)',
+                    "Error deleting files from storage",
                     media_type=media_type,
                     media_path=media_path,
                 )
+                # Re-raise the exception to fail the build and handle it
+                # automatically at `on_failure`.
+                # It will clearly communicate the error to the user.
+                raise BuildAppError("Error deleting files from storage.")
+
+        log.info(
+            "Store build artifacts finished.",
+            time=(timezone.now() - time_before_store_build_artifacts).seconds,
+        )
 
     def send_notifications(self, version_pk, build_pk, event):
         """Send notifications to all subscribers of `event`."""