Skip to content

Build: use scoped credentials for interacting with S3 #12078

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 20 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 33 additions & 0 deletions readthedocs/api/v2/views/model_views.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""Endpoints for listing Projects, Versions, Builds, etc."""

import json
from dataclasses import asdict

import structlog
from allauth.socialaccount.models import SocialAccount
Expand Down Expand Up @@ -38,6 +39,8 @@
from readthedocs.projects.models import Domain
from readthedocs.projects.models import Project
from readthedocs.storage import build_commands_storage
from readthedocs.storage.security_token_service import AWSTemporaryCredentialsError
from readthedocs.storage.security_token_service import get_s3_scoped_credentials

from ..serializers import BuildAdminReadOnlySerializer
from ..serializers import BuildAdminSerializer
Expand Down Expand Up @@ -345,6 +348,36 @@ def reset(self, request, **kwargs):
def get_queryset_for_api_key(self, api_key):
return self.model.objects.filter(project=api_key.project)

@decorators.action(
detail=True,
permission_classes=[HasBuildAPIKey],
methods=["post"],
url_path="temporary-credentials",
)
def temporary_credentials(self, request, **kwargs):
"""
Generate temporary credentials for the build.

This can generate temporary credentials for interacting with S3 only for now.
"""
build = self.get_object()
project = build.project
version = build.version
try:
credentials = get_s3_scoped_credentials(
project=project,
version=version,
session_id=build.pk,
# 30 minutes should be enough to upload all build artifacts.
duration=30 * 60,
)
except AWSTemporaryCredentialsError:
return Response(
{"error": "Failed to generate temporary credentials"},
status=status.HTTP_500_INTERNAL_SERVER_ERROR,
)
return Response({"s3": asdict(credentials)})


class BuildCommandViewSet(DisableListEndpoint, CreateModelMixin, UserSelectViewSet):
parser_classes = [JSONParser, MultiPartParser]
Expand Down
4 changes: 4 additions & 0 deletions readthedocs/builds/storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,10 @@ class BuildMediaStorageMixin:
# that will serve files from this storage.
internal_redirect_root_path = "proxito"

# If the storage backend supports passing credentials in their init method.
# Mainly used for S3.
supports_credentials = False

@staticmethod
def _dirpath(path):
"""
Expand Down
5 changes: 5 additions & 0 deletions readthedocs/projects/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -1932,6 +1932,7 @@ def add_features(sender, **kwargs):

# Build related features
SCALE_IN_PROTECTION = "scale_in_prtection"
USE_S3_SCOPED_CREDENTIALS_ON_BUILDERS = "use_s3_scoped_credentials_on_builders"

FEATURES = (
(
Expand Down Expand Up @@ -2010,6 +2011,10 @@ def add_features(sender, **kwargs):
SCALE_IN_PROTECTION,
_("Build: Set scale-in protection before/after building."),
),
(
USE_S3_SCOPED_CREDENTIALS_ON_BUILDERS,
_("Build: Use S3 scoped credentials for uploading build artifacts."),
),
)

FEATURES = sorted(FEATURES, key=lambda x: x[1])
Expand Down
44 changes: 43 additions & 1 deletion readthedocs/projects/tasks/builds.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from celery import Task
from django.conf import settings
from django.utils import timezone
from django.utils.module_loading import import_string
from slumber import API
from slumber.exceptions import HttpClientError

Expand Down Expand Up @@ -53,7 +54,6 @@
from readthedocs.doc_builder.exceptions import BuildUserError
from readthedocs.doc_builder.exceptions import MkDocsYAMLParseError
from readthedocs.projects.models import Feature
from readthedocs.storage import build_media_storage
from readthedocs.telemetry.collectors import BuildDataCollector
from readthedocs.telemetry.tasks import save_build_data
from readthedocs.worker import app
Expand Down Expand Up @@ -885,6 +885,46 @@ def set_valid_clone(self):
self.data.project.has_valid_clone = True
self.data.version.project.has_valid_clone = True

def _get_sync_media_storage(self):
"""
Get a storage class instance to use for syncing build artifacts.

.. note::

We no longer use readthedocs.storage.build_media_storage directly,
as we are now using per-build credentials for S3 storage,
so we need to dynamically create the storage class instance
"""
storage_class = import_string(settings.RTD_BUILD_MEDIA_STORAGE)
extra_kwargs = {}
if storage_class.supports_credentials:
extra_kwargs = self._get_s3_scoped_credentials()
return storage_class(**extra_kwargs)

def _get_s3_scoped_credentials(self):
if not self.data.project.has_feature(Feature.USE_S3_SCOPED_CREDENTIALS_ON_BUILDERS):
return {}

build_id = self.data.build["id"]
try:
credentials = self.data.api_client.build(f"{build_id}/temporary-credentials").post()
except Exception:
log.exception(
"Error getting scoped credentials.",
build_id=build_id,
)
raise BuildAppError(
BuildAppError.GENERIC_WITH_BUILD_ID,
exception_message="Error getting scoped credentials.",
)

s3_credentials = credentials["s3"]
return {
"access_key": s3_credentials["access_key_id"],
"secret_key": s3_credentials["secret_access_key"],
"security_token": s3_credentials["session_token"],
}

def store_build_artifacts(self):
"""
Save build artifacts to "storage" using Django's storage API.
Expand All @@ -904,6 +944,8 @@ def store_build_artifacts(self):
types_to_copy = []
types_to_delete = []

build_media_storage = self._get_sync_media_storage()

for artifact_type in ARTIFACT_TYPES:
if artifact_type in valid_artifacts:
types_to_copy.append(artifact_type)
Expand Down
2 changes: 2 additions & 0 deletions readthedocs/settings/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -419,6 +419,8 @@ def MIDDLEWARE(self):
]
PYTHON_MEDIA = False

RTD_USE_SCOPED_CREDENTIALS_ON_BUILDS = False

# Django Storage subclass used to write build artifacts to cloud or local storage
# https://docs.readthedocs.io/page/development/settings.html#rtd-build-media-storage
RTD_BUILD_MEDIA_STORAGE = "readthedocs.builds.storage.BuildMediaFileSystemStorage"
Expand Down
3 changes: 3 additions & 0 deletions readthedocs/storage/rclone.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,7 @@ def __init__(
secret_acces_key,
region,
provider="AWS",
session_token=None,
acl=None,
endpoint=None,
):
Expand All @@ -185,6 +186,8 @@ def __init__(
"RCLONE_S3_REGION": region,
"RCLONE_S3_LOCATION_CONSTRAINT": region,
}
if session_token:
self.env_vars["RCLONE_S3_SESSION_TOKEN"] = session_token
if acl:
self.env_vars["RCLONE_S3_ACL"] = acl
if endpoint:
Expand Down
2 changes: 2 additions & 0 deletions readthedocs/storage/s3_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ def _rclone(self):
bucket_name=self.bucket_name,
access_key_id=self.access_key,
secret_acces_key=self.secret_key,
session_token=self.security_token,
region=self.region_name or "",
acl=self.default_acl,
endpoint=self.endpoint_url,
Expand All @@ -44,6 +45,7 @@ class S3BuildMediaStorage(OverrideHostnameMixin, S3BuildMediaStorageMixin):

bucket_name = getattr(settings, "S3_MEDIA_STORAGE_BUCKET", None)
override_hostname = getattr(settings, "S3_MEDIA_STORAGE_OVERRIDE_HOSTNAME", None)
supports_credentials = True

def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
Expand Down
157 changes: 157 additions & 0 deletions readthedocs/storage/security_token_service.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
"""
Module to interact with AWS STS (Security Token Service) to assume a role and get temporary scoped credentials.

This is mainly used to generate temporary credentials to interact with S3 buckets from the builders.

In order to make use of STS, we need:

- Create a role in IAM with a trusted entity type set to the AWS account that is going to be used to generate the temporary credentials.
- A policy that allows access to all S3 buckets and paths that are going to be used.
Which should be attached to the role.
- The permissions of the temporary credentials are the result of the intersection of the role policy and the inline policy that is passed to the AssumeRole API.
This means that the inline policy can be used to limit the permissions of the temporary credentials, but not to expand them.

See:

- https://docs.aws.amazon.com/STS/latest/APIReference/API_AssumeRole.html
- https://docs.aws.amazon.com/IAM/latest/UserGuide/id_credentials_sts-comparison.html
- https://docs.aws.amazon.com/IAM/latest/UserGuide/id_credentials_temp_control-access_assumerole.html
"""

import json
from dataclasses import dataclass

import boto3
import structlog
from django.conf import settings

from readthedocs.storage import build_media_storage


log = structlog.get_logger(__name__)


class AWSTemporaryCredentialsError(Exception):
"""Exception raised when there is an error getting AWS S3 credentials."""


@dataclass
class AWSTemporaryCredentials:
"""Dataclass to hold AWS temporary credentials."""

access_key_id: str
secret_access_key: str
session_token: str | None


def get_sts_client():
return boto3.client(
"sts",
aws_access_key_id=settings.AWS_ACCESS_KEY_ID,
aws_secret_access_key=settings.AWS_SECRET_ACCESS_KEY,
# TODO: should this be its own setting?
region_name=settings.AWS_S3_REGION_NAME,
)


def get_s3_scoped_credentials(
*, project, version, session_id=None, duration=60 * 15
) -> AWSTemporaryCredentials:
"""
:param project: The project to get the credentials for.
:param version: The version to get the credentials for.
:param session_id: A unique identifier to add to the name of the role session.
The name of the session always includes the project and version slug (rtd-{project}-{version}),
if session_id is given, the name of the session would be "rtd-{session_id}-{project}-{version}".
AWS limits the session name to 64 characters, so if the session_id is too long, it will be truncated.
For example, for a token used in a build, a good session_id is the ID of the build.
:duration: The duration of the credentials in seconds. Default is 15 minutes.
Note that the minimum duration time is 15 minutes and the maximum is given by the role (defaults to 1 hour).

.. note::

If RTD_USE_SCOPED_CREDENTIALS_ON_BUILDS is set to False, this function will return
the values of the AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY settings.
"""
if not settings.RTD_USE_SCOPED_CREDENTIALS_ON_BUILDS:
return AWSTemporaryCredentials(
access_key_id=settings.AWS_ACCESS_KEY_ID,
secret_access_key=settings.AWS_SECRET_ACCESS_KEY,
# A session token is not needed for the default credentials.
session_token=None,
)

bucket_name = build_media_storage.bucket_name
bucket_arn = f"arn:aws:s3:::{bucket_name}"

storage_paths = version.get_storage_paths()
# Generate the list of allowed prefix resources
# The resulting prefix looks like:
# - html/project/latest/*
# - pdf/project/latest/*
allowed_prefixes = [f"{storage_path}/*" for storage_path in storage_paths]

# Generate the list of allowed object resources in ARN format.
# The resulting ARN looks like:
# arn:aws:s3:::readthedocs-media/html/project/latest/*
# arn:aws:s3:::readthedocs-media/pdf/project/latest/*
allowed_objects_arn = [f"{bucket_arn}/{prefix}" for prefix in allowed_prefixes]

# Define an inline policy document to limit the permissions of the temporary credentials.
policy_document = {
"Version": "2012-10-17",
"Statement": [
{
"Effect": "Allow",
"Action": [
"s3:GetObject",
"s3:PutObject",
"s3:DeleteObject",
],
"Resource": allowed_objects_arn,
},
# In order to list the objects in a path, we need to allow the ListBucket action.
# But since that action is not scoped to a path, we need to limit it using a condition.
{
"Effect": "Allow",
"Action": ["s3:ListBucket"],
"Resource": [
bucket_arn,
],
"Condition": {
"StringLike": {
"s3:prefix": allowed_prefixes,
}
},
},
],
}

session_prefix = f"rtd-{session_id}" if session_id else "rtd"
role_session_name = f"{session_prefix}-{project.slug}-{version.slug}"
# Limit to 64 characters, as per AWS limitations.
role_session_name = role_session_name[:64]

try:
sts_client = get_sts_client()
response = sts_client.assume_role(
RoleArn=settings.AWS_STS_ASSUME_ROLE_ARN,
RoleSessionName=role_session_name,
Policy=json.dumps(policy_document),
DurationSeconds=duration,
)
except Exception:
log.exception(
"Error while assuming role to generate temporary credentials",
role_session_name=role_session_name,
policy_document=policy_document,
duration=duration,
)
raise AWSTemporaryCredentialsError

credentials = response["Credentials"]
return AWSTemporaryCredentials(
access_key_id=credentials["AccessKeyId"],
secret_access_key=credentials["SecretAccessKey"],
session_token=credentials["SessionToken"],
)