Skip to content

Commit 3a9befe

Browse files
humitosagjohnson
andauthored
Builds: set scale-in protection before/after each build (#10507)
* Builds: set scale-in protection before/after each build We use AWS scale-in protection to protect our instances from being scaled-in automatically while running a build. This avoids killing the build in the middle while it's running. Besides, it could help us to be more aggressive to scale-in if we consider without degrading user experience. Related readthedocs/readthedocs-ops#1324 * Apply suggestions from code review Co-authored-by: Anthony <[email protected]> * Build: scale-in under a feature flag This way we will be able to have more control and enable/disable this without requiring a deploy in case that something goes wrong. * Typo * Test: mock `update_docs_task` on a test that doesn't use it --------- Co-authored-by: Anthony <[email protected]>
1 parent 6ccdcb0 commit 3a9befe

File tree

4 files changed

+77
-2
lines changed

4 files changed

+77
-2
lines changed

readthedocs/projects/models.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1937,6 +1937,7 @@ def add_features(sender, **kwargs):
19371937
# Build related features
19381938
HOSTING_INTEGRATIONS = "hosting_integrations"
19391939
NO_CONFIG_FILE_DEPRECATED = "no_config_file"
1940+
SCALE_IN_PROTECTION = "scale_in_prtection"
19401941

19411942
FEATURES = (
19421943
(
@@ -2067,6 +2068,10 @@ def add_features(sender, **kwargs):
20672068
NO_CONFIG_FILE_DEPRECATED,
20682069
_("Build: Building without a configuration file is deprecated."),
20692070
),
2071+
(
2072+
SCALE_IN_PROTECTION,
2073+
_("Build: Set scale-in protection before/after building."),
2074+
),
20702075
)
20712076

20722077
FEATURES = sorted(FEATURES, key=lambda l: l[1])

readthedocs/projects/tasks/builds.py

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@
5353
ProjectBuildsSkippedError,
5454
YAMLParseError,
5555
)
56+
from readthedocs.projects.models import Feature
5657
from readthedocs.storage import build_media_storage
5758
from readthedocs.telemetry.collectors import BuildDataCollector
5859
from readthedocs.telemetry.tasks import save_build_data
@@ -67,7 +68,12 @@
6768
from ..signals import before_vcs
6869
from .mixins import SyncRepositoryMixin
6970
from .search import fileify
70-
from .utils import BuildRequest, clean_build, send_external_build_status
71+
from .utils import (
72+
BuildRequest,
73+
clean_build,
74+
send_external_build_status,
75+
set_builder_scale_in_protection,
76+
)
7177

7278
log = structlog.get_logger(__name__)
7379

@@ -418,6 +424,16 @@ def before_start(self, task_id, args, kwargs):
418424
version_slug=self.data.version.slug,
419425
)
420426

427+
# Enable scale-in protection on this instance
428+
#
429+
# TODO: move this to the beginning of this method
430+
# once we don't need to rely on `self.data.project`.
431+
if self.data.project.has_feature(Feature.SCALE_IN_PROTECTION):
432+
set_builder_scale_in_protection.delay(
433+
instance=socket.gethostname(),
434+
protected_from_scale_in=True,
435+
)
436+
421437
# Clean the build paths completely to avoid conflicts with previous run
422438
# (e.g. cleanup task failed for some reason)
423439
clean_build(self.data.version)
@@ -728,6 +744,13 @@ def after_return(self, status, retval, task_id, args, kwargs, einfo):
728744
except Exception:
729745
log.exception("Failed to revoke build api key.", exc_info=True)
730746

747+
# Disable scale-in protection on this instance
748+
if self.data.project.has_feature(Feature.SCALE_IN_PROTECTION):
749+
set_builder_scale_in_protection.delay(
750+
instance=socket.gethostname(),
751+
protected_from_scale_in=False,
752+
)
753+
731754
log.info(
732755
'Build finished.',
733756
length=self.data.build['length'],

readthedocs/projects/tasks/utils.py

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
import datetime
22
import os
3+
import re
34

5+
import boto3
46
import structlog
57
from celery.worker.request import Request
68
from django.conf import settings
@@ -321,6 +323,49 @@ def deprecated_config_file_used_notification():
321323
)
322324

323325

326+
@app.task(queue="web")
327+
def set_builder_scale_in_protection(instance, protected_from_scale_in):
328+
"""
329+
Set scale-in protection on this builder ``instance``.
330+
331+
This way, AWS will not scale-in this instance while it's building the documentation.
332+
This is pretty useful for long running tasks.
333+
"""
334+
log.bind(instance=instance, protected_from_scale_in=protected_from_scale_in)
335+
336+
if settings.DEBUG or settings.RTD_DOCKER_COMPOSE:
337+
log.info(
338+
"Running development environment. Skipping scale-in protection.",
339+
)
340+
return
341+
342+
asg = boto3.client(
343+
"autoscaling",
344+
aws_access_key_id=settings.RTD_AWS_SCALE_IN_ACCESS_KEY,
345+
aws_secret_access_key=settings.RTD_AWS_SCALE_IN_SECRET_ACCESS_KEY,
346+
region_name=settings.RTD_AWS_SCALE_IN_REGION_NAME,
347+
)
348+
349+
# web-extra-i-0c3e866c4e323928f
350+
hostname_match = re.match(r"([a-z\-]+)-(i-[a-f0-9]+)", instance)
351+
if not hostname_match:
352+
log.warning(
353+
"Unable to set scale-in protection. Hostname name matching not found.",
354+
)
355+
return
356+
scaling_group, instance_id = hostname_match.groups()
357+
358+
# Set protection on instance
359+
try:
360+
asg.set_instance_protection(
361+
InstanceIds=[instance_id],
362+
AutoScalingGroupName=scaling_group,
363+
ProtectedFromScaleIn=protected_from_scale_in,
364+
)
365+
except Exception:
366+
log.exception("Failed when trying to set instance protection.")
367+
368+
324369
class BuildRequest(Request):
325370

326371
def on_timeout(self, soft, timeout):

readthedocs/rtd_tests/tests/test_project_forms.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
from unittest import mock
12

23
from django.contrib.auth.models import User
34
from django.test import TestCase
@@ -258,8 +259,9 @@ def test_can_update_privacy_level(self):
258259
self.assertTrue(form.is_valid())
259260
self.assertEqual(self.project.privacy_level, PRIVATE)
260261

262+
@mock.patch("readthedocs.projects.tasks.builds.update_docs_task")
261263
@override_settings(ALLOW_PRIVATE_REPOS=False)
262-
def test_custom_readthedocs_yaml(self):
264+
def test_custom_readthedocs_yaml(self, update_docs_task):
263265
custom_readthedocs_yaml_path = "folder/.readthedocs.yaml"
264266
form = ProjectAdvancedForm(
265267
{

0 commit comments

Comments
 (0)