From 46f4b4169b534e4c6eee7bc19f2a411ce5ca6c64 Mon Sep 17 00:00:00 2001 From: Manuel Kaufmann Date: Mon, 14 Feb 2022 17:48:29 -0300 Subject: [PATCH 1/3] Celery: max concurrency settings Define Django settings for max concurrency retry and seconds delay to re-use them acrross the whole codebase. --- readthedocs/core/utils/__init__.py | 4 ++-- readthedocs/projects/tasks/builds.py | 5 +++-- readthedocs/settings/base.py | 2 ++ 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/readthedocs/core/utils/__init__.py b/readthedocs/core/utils/__init__.py index 68d4aee700b..352038e438a 100644 --- a/readthedocs/core/utils/__init__.py +++ b/readthedocs/core/utils/__init__.py @@ -186,8 +186,8 @@ def prepare_build( project_slug=project.slug, version_slug=version.slug, ) - options['countdown'] = 5 * 60 - options['max_retries'] = 25 + options['countdown'] = settings.RTD_BUILDS_RETRY_DELAY + options['max_retries'] = settings.RTD_BUILDS_MAX_RETRIES build.error = BuildMaxConcurrencyError.message.format( limit=max_concurrent_builds, ) diff --git a/readthedocs/projects/tasks/builds.py b/readthedocs/projects/tasks/builds.py index 4acf845887e..7038be01718 100644 --- a/readthedocs/projects/tasks/builds.py +++ b/readthedocs/projects/tasks/builds.py @@ -217,8 +217,9 @@ class UpdateDocsTask(SyncRepositoryMixin, Task): autoretry_for = ( BuildMaxConcurrencyError, ) - max_retries = 5 # 5 per normal builds, 25 per concurrency limited - default_retry_delay = 7 * 60 + max_retries = settings.RTD_BUILDS_MAX_RETRIES + default_retry_delay = settings.RTD_BUILDS_RETRY_DELAY + retry_backoff = False # Expected exceptions that will be logged as info only and not retried throws = ( diff --git a/readthedocs/settings/base.py b/readthedocs/settings/base.py index 5771e1dfb19..9eacf3cab61 100644 --- a/readthedocs/settings/base.py +++ b/readthedocs/settings/base.py @@ -125,6 +125,8 @@ def SESSION_COOKIE_SAMESITE(self): RTD_STABLE_VERBOSE_NAME = 'stable' RTD_CLEAN_AFTER_BUILD = False RTD_MAX_CONCURRENT_BUILDS = 4 + RTD_BUILDS_MAX_RETRIES = 25 + RTD_BUILDS_RETRY_DELAY = 5 * 60 # seconds RTD_BUILD_STATUS_API_NAME = 'docs/readthedocs' RTD_ANALYTICS_DEFAULT_RETENTION_DAYS = 30 * 3 RTD_AUDITLOGS_DEFAULT_RETENTION_DAYS = 30 * 3 From 9b5cfdf1dc03341bc9ec3dee77a0a423355b2644 Mon Sep 17 00:00:00 2001 From: Manuel Kaufmann Date: Mon, 14 Feb 2022 18:16:44 -0300 Subject: [PATCH 2/3] Celery: use `on_retry` to handle `BuildMaxConcurrencyError` Instead of handling the retry of a task manually when the project hits max concurrency error, we rely on Celey handlers to do it for us. By raising the any of the exceptions defined in `autoretry_for`, Celery will call `on_retry` automatically. There, inside `on_retry` we handle the particular case for `BuildMaxConcurrencyError` by setting the error into the build object and skipping the test to continue running. See https://docs.celeryproject.org/en/master/userguide/tasks.html#automatic-retry-for-known-exceptions and https://docs.celeryproject.org/en/master/userguide/tasks.html#Task.autoretry_for --- readthedocs/projects/tasks/builds.py | 35 +++++++++++++--------------- 1 file changed, 16 insertions(+), 19 deletions(-) diff --git a/readthedocs/projects/tasks/builds.py b/readthedocs/projects/tasks/builds.py index 7038be01718..0f556a66dbd 100644 --- a/readthedocs/projects/tasks/builds.py +++ b/readthedocs/projects/tasks/builds.py @@ -31,6 +31,7 @@ BUILD_STATE_CLONING, BUILD_STATE_FINISHED, BUILD_STATE_INSTALLING, + BUILD_STATE_TRIGGERED, BUILD_STATE_UPLOADING, BUILD_STATUS_FAILURE, BUILD_STATUS_SUCCESS, @@ -264,25 +265,12 @@ def _check_concurrency_limit(self): max_concurrent_builds = settings.RTD_MAX_CONCURRENT_BUILDS if concurrency_limit_reached: - # TODO: this could be handled in `on_retry` probably - log.warning( - 'Delaying tasks due to concurrency limit.', - project_slug=self.data.project.slug, - version_slug=self.data.version.slug, - ) - - # This is done automatically on the environment context, but - # we are executing this code before creating one - api_v2.build(self.data.build['id']).patch({ - 'error': BuildMaxConcurrencyError.message.format( + # By raising this exception and using ``autoretry_for``, Celery + # will handle this automatically calling ``on_retry`` + raise BuildMaxConcurrencyError( + BuildMaxConcurrencyError.message.format( limit=max_concurrent_builds, - ), - 'builder': socket.gethostname(), - }) - self.retry( - exc=BuildMaxConcurrencyError, - # We want to retry this build more times - max_retries=25, + ) ) def _check_duplicated_build(self): @@ -483,7 +471,16 @@ def on_success(self, retval, task_id, args, kwargs): self.data.build['success'] = True def on_retry(self, exc, task_id, args, kwargs, einfo): - log.warning('Retrying this task.') + log.info('Retrying this task.') + + if isinstance(exc, BuildMaxConcurrencyError): + log.warning( + 'Delaying tasks due to concurrency limit.', + project_slug=self.data.project.slug, + version_slug=self.data.version.slug, + ) + self.data.build['error'] = exc.message + self.update_build(state=BUILD_STATE_TRIGGERED) def after_return(self, status, retval, task_id, args, kwargs, einfo): # Update build object From 1bf397efe62aee6851ba9233b0f9e4c3378eb94b Mon Sep 17 00:00:00 2001 From: Manuel Kaufmann Date: Mon, 14 Feb 2022 18:55:27 -0300 Subject: [PATCH 3/3] Docstring for on_retry --- readthedocs/projects/tasks/builds.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/readthedocs/projects/tasks/builds.py b/readthedocs/projects/tasks/builds.py index 0f556a66dbd..06c38d98561 100644 --- a/readthedocs/projects/tasks/builds.py +++ b/readthedocs/projects/tasks/builds.py @@ -471,6 +471,15 @@ def on_success(self, retval, task_id, args, kwargs): self.data.build['success'] = True def on_retry(self, exc, task_id, args, kwargs, einfo): + """ + Celery helper called when the task is retried. + + This happens when any of the exceptions defined in ``autoretry_for`` + argument is raised or when ``self.retry`` is called from inside the + task. + + See https://docs.celeryproject.org/en/master/userguide/tasks.html#retrying + """ log.info('Retrying this task.') if isinstance(exc, BuildMaxConcurrencyError):