Skip to content

Commit 0c0d2c6

Browse files
authored
Build: communicate uploading errors to users (#9941)
* Build: check if the output directory is a directory It checks for `_readthedocs/<format>` output directory to be a directory. If it's not a directory, it silently skip that format for now. This behavior will change once we move out `store_build_artifacts` from `on_success` and we can communicate errors easily to the user. Related #9931 * Build: refactor logic to validate artifacts output paths Wrap all the logic into a class method so it's easier to follow and read. It also remove some duplicated checks of `os.path.exists`, making sure we are always using _validated_ paths already. * Build: communicate uploading errors to users Running `store_build_artifacts` from inside `.execute()` method instead of from `.on_sucess()` allow us to raise any exception and handle it properly. We can raise a `BuildUserError` to clearly communicate the specific error to the user, or any other exception to show the generic message: "Try again or contact us". Besides, this allows us to *fail the build* instead of continuing silently without the user noticing this: - there more than 1 file of formats that don't support multiple files - the output path is not a directory - there were errors uploading the artifacts to the storage * Build: validate the output directory has at least 1 file In case the `_readthedocs/<format>` directory is created by it contains 0 files, we fail the build and communicate this to the user. * Build: constant for artifact types that don't support multiple files
1 parent 5ab0da0 commit 0c0d2c6

File tree

4 files changed

+150
-83
lines changed

4 files changed

+150
-83
lines changed

readthedocs/builds/constants.py

+8
Original file line numberDiff line numberDiff line change
@@ -164,3 +164,11 @@
164164
"html",
165165
"json",
166166
)
167+
# Artifacts that expect one and only one file in the output directory.
168+
# NOTE: currently, this is a limitation that we are consider to remove
169+
# https://github.com/readthedocs/readthedocs.org/issues/9931#issuecomment-1403415757
170+
ARTIFACT_TYPES_WITHOUT_MULTIPLE_FILES_SUPPORT = (
171+
"htmlzip",
172+
"epub",
173+
"pdf",
174+
)

readthedocs/doc_builder/exceptions.py

+12
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,18 @@ class BuildUserError(BuildBaseException):
4040
BUILD_COMMANDS_WITHOUT_OUTPUT = gettext_noop(
4141
f'No "{BUILD_COMMANDS_OUTPUT_PATH_HTML}" folder was created during this build.'
4242
)
43+
BUILD_OUTPUT_IS_NOT_A_DIRECTORY = gettext_noop(
44+
'Build output directory for format "{artifact_type}" is not a directory.'
45+
)
46+
BUILD_OUTPUT_HAS_0_FILES = gettext_noop(
47+
'Build output directory for format "{artifact_type}" does not contain any files. '
48+
"It seems the build process created the directory but did not save any file to it."
49+
)
50+
BUILD_OUTPUT_HAS_MULTIPLE_FILES = gettext_noop(
51+
'Build output directory for format "{artifact_type}" contains multiple files '
52+
"and it is not currently supported. "
53+
'Please, remove all the files but the "{artifact_type}" your want to upload.'
54+
)
4355

4456

4557
class BuildUserSkip(BuildUserError):

readthedocs/projects/tasks/builds.py

+113-76
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
from readthedocs.builds import tasks as build_tasks
2020
from readthedocs.builds.constants import (
2121
ARTIFACT_TYPES,
22+
ARTIFACT_TYPES_WITHOUT_MULTIPLE_FILES_SUPPORT,
2223
BUILD_FINAL_STATES,
2324
BUILD_STATE_BUILDING,
2425
BUILD_STATE_CLONING,
@@ -494,7 +495,6 @@ def on_failure(self, exc, task_id, args, kwargs, einfo):
494495
if self.data.version:
495496
version_type = self.data.version.type
496497

497-
# NOTE: autoflake gets confused here. We need the NOQA for now.
498498
status = BUILD_STATUS_FAILURE
499499
if isinstance(exc, BuildUserSkip):
500500
# The build was skipped by returning the magic exit code,
@@ -513,55 +513,91 @@ def on_failure(self, exc, task_id, args, kwargs, einfo):
513513
# Update build object
514514
self.data.build['success'] = False
515515

516-
def on_success(self, retval, task_id, args, kwargs):
517-
time_before_store_build_artifacts = timezone.now()
518-
# Store build artifacts to storage (local or cloud storage)
519-
#
520-
# TODO: move ``store_build_artifacts`` inside ``execute`` so we can
521-
# handle exceptions properly on ``on_failure``
522-
self.store_build_artifacts()
523-
log.info(
524-
"Store build artifacts finished.",
525-
time=(timezone.now() - time_before_store_build_artifacts).seconds,
526-
)
516+
def get_valid_artifact_types(self):
517+
"""
518+
Return a list of all the valid artifact types for this build.
527519
528-
# HTML are built successfully.
529-
html = os.path.exists(
530-
self.data.project.artifact_path(
531-
version=self.data.version.slug, type_="html"
532-
)
533-
)
534-
localmedia = os.path.exists(
535-
self.data.project.artifact_path(
536-
version=self.data.version.slug, type_="htmlzip"
537-
)
538-
)
539-
pdf = os.path.exists(
540-
self.data.project.artifact_path(version=self.data.version.slug, type_="pdf")
541-
)
542-
epub = os.path.exists(
543-
self.data.project.artifact_path(
544-
version=self.data.version.slug, type_="epub"
520+
It performs the following checks on each output format type path:
521+
- it exists
522+
- it is a directory
523+
- does not contains more than 1 files (only PDF, HTMLZip, ePUB)
524+
525+
TODO: remove the limitation of only 1 file.
526+
Add support for multiple PDF files in the output directory and
527+
grab them by using glob syntaxt between other files that could be garbage.
528+
"""
529+
valid_artifacts = []
530+
for artifact_type in ARTIFACT_TYPES:
531+
artifact_directory = self.data.project.artifact_path(
532+
version=self.data.version.slug,
533+
type_=artifact_type,
545534
)
546-
)
535+
if not os.path.exists(artifact_directory):
536+
# There is no output directory.
537+
# Skip this format.
538+
continue
539+
540+
if not os.path.isdir(artifact_directory):
541+
log.error(
542+
"The output path is not a directory.",
543+
output_format=artifact_type,
544+
)
545+
raise BuildUserError(
546+
BuildUserError.BUILD_OUTPUT_IS_NOT_A_DIRECTORY.format(
547+
artifact_type=artifact_type
548+
)
549+
)
550+
551+
# Check if there are multiple files on artifact directories.
552+
# These output format does not support multiple files yet.
553+
# In case multiple files are found, the upload for this format is not performed.
554+
if artifact_type in ARTIFACT_TYPES_WITHOUT_MULTIPLE_FILES_SUPPORT:
555+
artifact_format_files = len(os.listdir(artifact_directory))
556+
if artifact_format_files > 1:
557+
log.error(
558+
"Multiple files are not supported for this format. "
559+
"Skipping this output format.",
560+
output_format=artifact_type,
561+
)
562+
raise BuildUserError(
563+
BuildUserError.BUILD_OUTPUT_HAS_MULTIPLE_FILES.format(
564+
artifact_type=artifact_type
565+
)
566+
)
567+
elif artifact_format_files == 0:
568+
raise BuildUserError(
569+
BuildUserError.BUILD_OUTPUT_HAS_0_FILES.format(
570+
artifact_type=artifact_type
571+
)
572+
)
573+
574+
# If all the conditions were met, the artifact is valid
575+
valid_artifacts.append(artifact_type)
576+
577+
return valid_artifacts
578+
579+
def on_success(self, retval, task_id, args, kwargs):
580+
valid_artifacts = self.get_valid_artifact_types()
547581

548582
# NOTE: we are updating the db version instance *only* when
549-
if html:
583+
# TODO: remove this condition and *always* update the DB Version instance
584+
if "html" in valid_artifacts:
550585
try:
551586
api_v2.version(self.data.version.pk).patch(
552587
{
553588
"built": True,
554589
"documentation_type": self.data.version.documentation_type,
555-
"has_pdf": pdf,
556-
"has_epub": epub,
557-
"has_htmlzip": localmedia,
590+
"has_pdf": "pdf" in valid_artifacts,
591+
"has_epub": "epub" in valid_artifacts,
592+
"has_htmlzip": "htmlzip" in valid_artifacts,
558593
}
559594
)
560595
except HttpClientError:
561596
# NOTE: I think we should fail the build if we cannot update
562597
# the version at this point. Otherwise, we will have inconsistent data
563598
log.exception(
564-
'Updating version failed, skipping file sync.',
599+
"Updating version db object failed. "
600+
'Files are synced in the storage, but "Version" object is not updated',
565601
)
566602

567603
# Index search data
@@ -658,10 +694,13 @@ def update_build(self, state=None):
658694
try:
659695
api_v2.build(self.data.build['id']).patch(self.data.build)
660696
except Exception:
661-
# NOTE: I think we should fail the build if we cannot update it
662-
# at this point otherwise, the data will be inconsistent and we
663-
# may be serving "new docs" but saying the "build have failed"
664-
log.exception('Unable to update build')
697+
# NOTE: we are updating the "Build" object on each `state`.
698+
# Only if the last update fails, there may be some inconsistency
699+
# between the "Build" object in our db and the reality.
700+
#
701+
# The `state` argument will help us to track this more and understand
702+
# at what state our updates are failing and decide what to do.
703+
log.exception("Error while updating the build object.", state=state)
665704

666705
def execute(self):
667706
self.data.build_director = BuildDirector(
@@ -709,6 +748,12 @@ def execute(self):
709748
finally:
710749
self.data.build_data = self.collect_build_data()
711750

751+
# At this point, the user's build already succeeded.
752+
# However, we cannot use `.on_success()` because we still have to upload the artifacts;
753+
# which could fail, and we want to detect that and handle it properly at `.on_failure()`
754+
# Store build artifacts to storage (local or cloud storage)
755+
self.store_build_artifacts()
756+
712757
def collect_build_data(self):
713758
"""
714759
Collect data from the current build.
@@ -786,53 +831,30 @@ def store_build_artifacts(self):
786831
787832
Remove build artifacts of types not included in this build (PDF, ePub, zip only).
788833
"""
834+
time_before_store_build_artifacts = timezone.now()
789835
log.info('Writing build artifacts to media storage')
790-
# NOTE: I don't remember why we removed this state from the Build
791-
# object. I'm re-adding it because I think it's useful, but we can
792-
# remove it if we want
793836
self.update_build(state=BUILD_STATE_UPLOADING)
794837

838+
valid_artifacts = self.get_valid_artifact_types()
839+
log.bind(artifacts=valid_artifacts)
840+
795841
types_to_copy = []
796842
types_to_delete = []
797843

798844
for artifact_type in ARTIFACT_TYPES:
799-
if os.path.exists(
800-
self.data.project.artifact_path(
801-
version=self.data.version.slug,
802-
type_=artifact_type,
803-
)
804-
):
845+
if artifact_type in valid_artifacts:
805846
types_to_copy.append(artifact_type)
806847
# Never delete HTML nor JSON (search index)
807848
elif artifact_type not in UNDELETABLE_ARTIFACT_TYPES:
808849
types_to_delete.append(artifact_type)
809850

851+
# Upload formats
810852
for media_type in types_to_copy:
811-
# NOTE: here is where we get the correct FROM path to upload
812-
from_path = self.data.version.project.artifact_path(
853+
from_path = self.data.project.artifact_path(
813854
version=self.data.version.slug,
814855
type_=media_type,
815856
)
816-
817-
# Check if there are multiple files on source directories.
818-
# These output format does not support multiple files yet.
819-
# In case multiple files are found, the upload for this format is not performed.
820-
#
821-
# TODO: we should fail the build for these cases and clearly communicate this.
822-
# to the user. To do this, we need to call this method (``store_build_artifacts``)
823-
# since the ``execute`` method.
824-
# It will allow us to just `raise BuildUserError` and handle it at
825-
# Celery `on_failure` handler.
826-
if media_type in ("htmlzip", "epub", "pdf"):
827-
if len(os.listdir(from_path)) > 1:
828-
log.exception(
829-
"Multiple files are not supported for this format. "
830-
"Skipping this output format.",
831-
output_format=media_type,
832-
)
833-
continue
834-
835-
to_path = self.data.version.project.get_storage_path(
857+
to_path = self.data.project.get_storage_path(
836858
type_=media_type,
837859
version_slug=self.data.version.slug,
838860
include_file=False,
@@ -844,15 +866,22 @@ def store_build_artifacts(self):
844866
else:
845867
build_media_storage.sync_directory(from_path, to_path)
846868
except Exception:
847-
# Ideally this should just be an IOError
848-
# but some storage backends unfortunately throw other errors
869+
# NOTE: the exceptions reported so far are:
870+
# - botocore.exceptions:HTTPClientError
871+
# - botocore.exceptions:ClientError
872+
# - readthedocs.doc_builder.exceptions:BuildCancelled
849873
log.exception(
850-
'Error copying to storage (not failing build)',
874+
"Error copying to storage",
851875
media_type=media_type,
852876
from_path=from_path,
853877
to_path=to_path,
854878
)
879+
# Re-raise the exception to fail the build and handle it
880+
# automatically at `on_failure`.
881+
# It will clearly communicate the error to the user.
882+
raise BuildAppError("Error uploading files to the storage.")
855883

884+
# Delete formats
856885
for media_type in types_to_delete:
857886
media_path = self.data.version.project.get_storage_path(
858887
type_=media_type,
@@ -863,13 +892,21 @@ def store_build_artifacts(self):
863892
try:
864893
build_media_storage.delete_directory(media_path)
865894
except Exception:
866-
# Ideally this should just be an IOError
867-
# but some storage backends unfortunately throw other errors
895+
# NOTE: I didn't find any log line for this case yet
868896
log.exception(
869-
'Error deleting from storage (not failing build)',
897+
"Error deleting files from storage",
870898
media_type=media_type,
871899
media_path=media_path,
872900
)
901+
# Re-raise the exception to fail the build and handle it
902+
# automatically at `on_failure`.
903+
# It will clearly communicate the error to the user.
904+
raise BuildAppError("Error deleting files from storage.")
905+
906+
log.info(
907+
"Store build artifacts finished.",
908+
time=(timezone.now() - time_before_store_build_artifacts).seconds,
909+
)
873910

874911
def send_notifications(self, version_pk, build_pk, event):
875912
"""Send notifications to all subscribers of `event`."""

readthedocs/projects/tests/test_build_tasks.py

+17-7
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import os
2+
import pathlib
23
from unittest import mock
34

45
import django_dynamic_fixture as fixture
@@ -200,8 +201,14 @@ def test_build_updates_documentation_type(self, load_yaml_config):
200201
# Create the artifact paths, so that `store_build_artifacts`
201202
# properly runs: https://github.com/readthedocs/readthedocs.org/blob/faa611fad689675f81101ea643770a6b669bf529/readthedocs/projects/tasks/builds.py#L798-L804
202203
os.makedirs(self.project.artifact_path(version=self.version.slug, type_="html"))
203-
os.makedirs(self.project.artifact_path(version=self.version.slug, type_="epub"))
204-
os.makedirs(self.project.artifact_path(version=self.version.slug, type_="pdf"))
204+
for f in ("epub", "pdf"):
205+
os.makedirs(self.project.artifact_path(version=self.version.slug, type_=f))
206+
pathlib.Path(
207+
os.path.join(
208+
self.project.artifact_path(version=self.version.slug, type_=f),
209+
f"{self.project.slug}.{f}",
210+
)
211+
).touch()
205212

206213
self._trigger_update_docs_task()
207214

@@ -322,11 +329,14 @@ def test_successful_build(
322329
# Create the artifact paths, so it's detected by the builder
323330
os.makedirs(self.project.artifact_path(version=self.version.slug, type_="html"))
324331
os.makedirs(self.project.artifact_path(version=self.version.slug, type_="json"))
325-
os.makedirs(
326-
self.project.artifact_path(version=self.version.slug, type_="htmlzip")
327-
)
328-
os.makedirs(self.project.artifact_path(version=self.version.slug, type_="epub"))
329-
os.makedirs(self.project.artifact_path(version=self.version.slug, type_="pdf"))
332+
for f in ("htmlzip", "epub", "pdf"):
333+
os.makedirs(self.project.artifact_path(version=self.version.slug, type_=f))
334+
pathlib.Path(
335+
os.path.join(
336+
self.project.artifact_path(version=self.version.slug, type_=f),
337+
f"{self.project.slug}.{f}",
338+
)
339+
).touch()
330340

331341
self._trigger_update_docs_task()
332342

0 commit comments

Comments
 (0)