Skip to content

Build: communicate uploading errors to users #9941

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Jan 31, 2023
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions readthedocs/doc_builder/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,14 @@ class BuildUserError(BuildBaseException):
BUILD_COMMANDS_WITHOUT_OUTPUT = gettext_noop(
f'No "{BUILD_COMMANDS_OUTPUT_PATH_HTML}" folder was created during this build.'
)
BUILD_OUTPUT_IS_NOT_A_DIRECTORY = gettext_noop(
'Build output directory for format "{artifact_type}" is not a directory.'
)
BUILD_OUTPUT_HAS_MULTIPLE_FILES = gettext_noop(
'Build output directory for format "{artifact_type}" contains multiple files '
"and it is not currently supported. "
'Please, remove all the files but the "{artifact_type}" your want to upload.'
)


class BuildUserSkip(BuildUserError):
Expand Down
181 changes: 104 additions & 77 deletions readthedocs/projects/tasks/builds.py
Original file line number Diff line number Diff line change
Expand Up @@ -494,7 +494,6 @@ def on_failure(self, exc, task_id, args, kwargs, einfo):
if self.data.version:
version_type = self.data.version.type

# NOTE: autoflake gets confused here. We need the NOQA for now.
status = BUILD_STATUS_FAILURE
if isinstance(exc, BuildUserSkip):
# The build was skipped by returning the magic exit code,
Expand All @@ -513,55 +512,84 @@ def on_failure(self, exc, task_id, args, kwargs, einfo):
# Update build object
self.data.build['success'] = False

def on_success(self, retval, task_id, args, kwargs):
time_before_store_build_artifacts = timezone.now()
# Store build artifacts to storage (local or cloud storage)
#
# TODO: move ``store_build_artifacts`` inside ``execute`` so we can
# handle exceptions properly on ``on_failure``
self.store_build_artifacts()
log.info(
"Store build artifacts finished.",
time=(timezone.now() - time_before_store_build_artifacts).seconds,
)
def get_valid_artifact_types(self):
"""
Return a list of all the valid artifact types for this build.

# HTML are built successfully.
html = os.path.exists(
self.data.project.artifact_path(
version=self.data.version.slug, type_="html"
)
)
localmedia = os.path.exists(
self.data.project.artifact_path(
version=self.data.version.slug, type_="htmlzip"
)
)
pdf = os.path.exists(
self.data.project.artifact_path(version=self.data.version.slug, type_="pdf")
)
epub = os.path.exists(
self.data.project.artifact_path(
version=self.data.version.slug, type_="epub"
It performs the following checks on each output format type path:
- it exists
- it is a directory
- does not contains more than 1 files (only PDF, HTMLZip, ePUB)

TODO: remove the limitation of only 1 file.
Add support for multiple PDF files in the output directory and
grab them by using glob syntaxt between other files that could be garbage.
"""
valid_artifacts = []
for artifact_type in ARTIFACT_TYPES:
artifact_directory = self.data.project.artifact_path(
version=self.data.version.slug,
type_=artifact_type,
)
)
if not os.path.exists(artifact_directory):
# There is no output directory.
# Skip this format.
continue

if not os.path.isdir(artifact_directory):
log.error(
"The output path is not a directory.",
output_format=artifact_type,
)
raise BuildUserError(
BuildUserError.BUILD_OUTPUT_IS_NOT_A_DIRECTORY.format(
artifact_type=artifact_type
)
)

# Check if there are multiple files on artifact directories.
# These output format does not support multiple files yet.
# In case multiple files are found, the upload for this format is not performed.
if artifact_type in ("htmlzip", "epub", "pdf"):
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Address Eric's feedback for this #9940 (comment)

if len(os.listdir(artifact_directory)) > 1:
log.error(
"Multiple files are not supported for this format. "
"Skipping this output format.",
output_format=artifact_type,
)
raise BuildUserError(
BuildUserError.BUILD_OUTPUT_HAS_MULTIPLE_FILES.format(
artifact_type=artifact_type
)
)

# If all the conditions were met, the artifact is valid
valid_artifacts.append(artifact_type)

return valid_artifacts

def on_success(self, retval, task_id, args, kwargs):
valid_artifacts = self.get_valid_artifact_types()

# NOTE: we are updating the db version instance *only* when
if html:
# TODO: remove this condition and *always* update the DB Version instance
if "html" in valid_artifacts:
try:
api_v2.version(self.data.version.pk).patch(
{
"built": True,
"documentation_type": self.data.version.documentation_type,
"has_pdf": pdf,
"has_epub": epub,
"has_htmlzip": localmedia,
"has_pdf": "pdf" in valid_artifacts,
"has_epub": "epub" in valid_artifacts,
"has_htmlzip": "htmlzip" in valid_artifacts,
}
)
except HttpClientError:
# NOTE: I think we should fail the build if we cannot update
# the version at this point. Otherwise, we will have inconsistent data
log.exception(
'Updating version failed, skipping file sync.',
"Updating version db object failed. "
'Files are synced in the storage, but "Version" object is not updated',
)

# Index search data
Expand Down Expand Up @@ -658,10 +686,13 @@ def update_build(self, state=None):
try:
api_v2.build(self.data.build['id']).patch(self.data.build)
except Exception:
# NOTE: I think we should fail the build if we cannot update it
# at this point otherwise, the data will be inconsistent and we
# may be serving "new docs" but saying the "build have failed"
log.exception('Unable to update build')
# NOTE: we are updating the "Build" object on each `state`.
# Only if the last update fails, there may be some inconsistency
# between the "Build" object in our db and the reality.
#
# The `state` argument will help us to track this more and understand
# at what state our updates are failing and decide what to do.
log.exception("Error while updating the build object.", state=state)

def execute(self):
self.data.build_director = BuildDirector(
Expand Down Expand Up @@ -709,6 +740,12 @@ def execute(self):
finally:
self.data.build_data = self.collect_build_data()

# At this point, the user's build already succeeded.
# However, we cannot use `.on_success()` because we still have to upload the artifacts;
# which could fail, and we want to detect that and handle it properly at `.on_failure()`
# Store build artifacts to storage (local or cloud storage)
self.store_build_artifacts()

def collect_build_data(self):
"""
Collect data from the current build.
Expand Down Expand Up @@ -786,53 +823,28 @@ def store_build_artifacts(self):

Remove build artifacts of types not included in this build (PDF, ePub, zip only).
"""
time_before_store_build_artifacts = timezone.now()
log.info('Writing build artifacts to media storage')
# NOTE: I don't remember why we removed this state from the Build
# object. I'm re-adding it because I think it's useful, but we can
# remove it if we want
self.update_build(state=BUILD_STATE_UPLOADING)

valid_artifacts = self.get_valid_artifact_types()
types_to_copy = []
types_to_delete = []

for artifact_type in ARTIFACT_TYPES:
if os.path.exists(
self.data.project.artifact_path(
version=self.data.version.slug,
type_=artifact_type,
)
):
if artifact_type in valid_artifacts:
types_to_copy.append(artifact_type)
# Never delete HTML nor JSON (search index)
elif artifact_type not in UNDELETABLE_ARTIFACT_TYPES:
types_to_delete.append(artifact_type)

# Upload formats
for media_type in types_to_copy:
# NOTE: here is where we get the correct FROM path to upload
from_path = self.data.version.project.artifact_path(
from_path = self.data.project.artifact_path(
version=self.data.version.slug,
type_=media_type,
type_=artifact_type,
)

# Check if there are multiple files on source directories.
# These output format does not support multiple files yet.
# In case multiple files are found, the upload for this format is not performed.
#
# TODO: we should fail the build for these cases and clearly communicate this.
# to the user. To do this, we need to call this method (``store_build_artifacts``)
# since the ``execute`` method.
# It will allow us to just `raise BuildUserError` and handle it at
# Celery `on_failure` handler.
if media_type in ("htmlzip", "epub", "pdf"):
if len(os.listdir(from_path)) > 1:
log.exception(
"Multiple files are not supported for this format. "
"Skipping this output format.",
output_format=media_type,
)
continue

to_path = self.data.version.project.get_storage_path(
to_path = self.data.project.get_storage_path(
type_=media_type,
version_slug=self.data.version.slug,
include_file=False,
Expand All @@ -841,15 +853,22 @@ def store_build_artifacts(self):
try:
build_media_storage.sync_directory(from_path, to_path)
except Exception:
# Ideally this should just be an IOError
# but some storage backends unfortunately throw other errors
# NOTE: the exceptions reported so far are:
# - botocore.exceptions:HTTPClientError
# - botocore.exceptions:ClientError
# - readthedocs.doc_builder.exceptions:BuildCancelled
log.exception(
'Error copying to storage (not failing build)',
"Error copying to storage",
media_type=media_type,
from_path=from_path,
to_path=to_path,
)
# Re-raise the exception to fail the build and handle it
# automatically at `on_failure`.
# It will clearly communicate the error to the user.
raise BuildAppError("Error uploading files to the storage.")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I feel like we need more information here. Won't part of the users files already be uploaded, but not all of them? We likely need to make this atomic somehow...

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes. I think atomic uploads is the correct way. However, I'm not sure how to do that in a simple way. I suppose we could upload the content to one folder and them move it to the final one, in case the "move to the final one" action is atomic. Otherwise, we are in the same situation.


# Delete formats
for media_type in types_to_delete:
media_path = self.data.version.project.get_storage_path(
type_=media_type,
Expand All @@ -860,13 +879,21 @@ def store_build_artifacts(self):
try:
build_media_storage.delete_directory(media_path)
except Exception:
# Ideally this should just be an IOError
# but some storage backends unfortunately throw other errors
# NOTE: I didn't find any log line for this case yet
log.exception(
'Error deleting from storage (not failing build)',
"Error deleting files from storage",
media_type=media_type,
media_path=media_path,
)
# Re-raise the exception to fail the build and handle it
# automatically at `on_failure`.
# It will clearly communicate the error to the user.
raise BuildAppError("Error deleting files from storage.")

log.info(
"Store build artifacts finished.",
time=(timezone.now() - time_before_store_build_artifacts).seconds,
)

def send_notifications(self, version_pk, build_pk, event):
"""Send notifications to all subscribers of `event`."""
Expand Down