Skip to content

Commit edc9797

Browse files
committed
Build: use rclone for sync
1 parent 8048602 commit edc9797

File tree

6 files changed

+206
-5
lines changed

6 files changed

+206
-5
lines changed

dockerfiles/Dockerfile

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,8 @@ RUN apt-get -y install \
3030
netcat \
3131
telnet \
3232
lsb-release \
33-
npm
33+
npm \
34+
rclone
3435

3536
# Gets the MinIO mc client used to add buckets upon initialization
3637
# If this client should have issues running inside this image, it is also

readthedocs/builds/storage.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from pathlib import Path
2+
from functools import cached_property
23

34
import structlog
45
from django.conf import settings
@@ -7,6 +8,7 @@
78
from storages.utils import get_available_overwrite_name, safe_join
89

910
from readthedocs.core.utils.filesystem import safe_open
11+
from readthedocs.storage.rclone import RClone
1012

1113
log = structlog.get_logger(__name__)
1214

@@ -153,6 +155,14 @@ def sync_directory(self, source, destination):
153155
log.debug('Deleting file from media storage.', filepath=filepath)
154156
self.delete(filepath)
155157

158+
@cached_property
159+
def _rclone(self):
160+
return RClone()
161+
162+
def rclone_sync(self, source, destination):
163+
"""Sync a directory recursively to storage using rclone sync."""
164+
return self._rclone.sync(source, destination)
165+
156166
def join(self, directory, filepath):
157167
return safe_join(directory, filepath)
158168

readthedocs/projects/models.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1845,6 +1845,7 @@ def add_features(sender, **kwargs):
18451845
USE_SPHINX_BUILDERS = "use_sphinx_builders"
18461846
CANCEL_OLD_BUILDS = "cancel_old_builds"
18471847
DONT_CREATE_INDEX = "dont_create_index"
1848+
USE_RCLONE = "use_rclone"
18481849

18491850
FEATURES = (
18501851
(ALLOW_DEPRECATED_WEBHOOKS, _('Allow deprecated webhook views')),
@@ -2001,6 +2002,10 @@ def add_features(sender, **kwargs):
20012002
DONT_CREATE_INDEX,
20022003
_('Do not create index.md or README.rst if the project does not have one.'),
20032004
),
2005+
(
2006+
USE_RCLONE,
2007+
_("Use rclone for syncing files to the media storage."),
2008+
),
20042009
)
20052010

20062011
projects = models.ManyToManyField(

readthedocs/projects/tasks/builds.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -833,7 +833,10 @@ def store_build_artifacts(
833833
version_type=self.data.version.type,
834834
)
835835
try:
836-
build_media_storage.sync_directory(from_path, to_path)
836+
if self.data.project.has_feature(Feature.USE_RCLONE):
837+
build_media_storage.rclone_sync(from_path, to_path)
838+
else:
839+
build_media_storage.sync_directory(from_path, to_path)
837840
except Exception:
838841
# Ideally this should just be an IOError
839842
# but some storage backends unfortunately throw other errors

readthedocs/storage/rclone.py

Lines changed: 159 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,159 @@
1+
"""
2+
Wrapper around the rclone command.
3+
4+
See https://rclone.org/docs.
5+
"""
6+
7+
import os
8+
import subprocess
9+
10+
import structlog
11+
12+
log = structlog.get_logger(__name__)
13+
14+
15+
class RClone:
16+
"""
17+
RClone base class.
18+
19+
This class allows you to interact with an rclone remote without
20+
a configuration file, the remote declaration and its options
21+
are passed in the command itself.
22+
23+
This base class allows you to use the local file system as remote.
24+
25+
:param remote_type: You can see the full list of supported providers at https://rclone.org/#providers.
26+
Defaults to use the local filesystem (https://rclone.org/local/).
27+
:param rclone_bin: Binary name or path to the rclone binary.
28+
Defaults to ``rclone``.
29+
:param default_options: Options passed to the rclone command.
30+
:parm env_vars: Environment variables used when executing the rclone command.
31+
Useful to pass secrets to the command, since all arguments and options will be logged.
32+
"""
33+
34+
remote_type = "local"
35+
rclone_bin = "rclone"
36+
default_options = [
37+
# Number of file transfers to run in parallel.
38+
"--transfers=8",
39+
"--verbose",
40+
]
41+
env_vars = {}
42+
43+
def build_target(self, path):
44+
"""
45+
Build the proper target using the current remote type.
46+
47+
We start the remote with `:` to create it on the fly,
48+
instead of having to create a configuration file.
49+
See https://rclone.org/docs/#backend-path-to-dir.
50+
51+
:param path: Path to the remote target.
52+
"""
53+
return f":{self.remote_type}:{path}"
54+
55+
def execute(self, action, args, options=None):
56+
"""
57+
Execute an rclone subcommand.
58+
59+
:param action: Name of the subcommand.
60+
:param list args: List of positional arguments passed the to command.
61+
:param list options: List of options passed to the command.
62+
"""
63+
options = options or []
64+
command = [
65+
self.rclone_bin,
66+
action,
67+
*self.default_options,
68+
*options,
69+
"--",
70+
*args,
71+
]
72+
env = os.environ.copy()
73+
# env = {}
74+
env.update(self.env_vars)
75+
log.info("Executing rclone command.", command=command)
76+
log.debug("env", env=env)
77+
result = subprocess.run(
78+
command,
79+
capture_output=True,
80+
env=env,
81+
)
82+
log.debug(
83+
"Result.",
84+
stdout=result.stdout.decode(),
85+
stderr=result.stderr.decode(),
86+
exit_code=result.returncode,
87+
)
88+
return result
89+
90+
def sync(self, source, destination):
91+
"""
92+
Run the `rclone sync` command.
93+
94+
See https://rclone.org/commands/rclone_sync/.
95+
96+
:params source: Local path to the source directory.
97+
:params destination: Remote path to the destination directory.
98+
"""
99+
# TODO: check if source can be a symlink.
100+
return self.execute("sync", args=[source, self.build_target(destination)])
101+
102+
103+
class RCloneS3Remote(RClone):
104+
105+
"""
106+
RClone remote implementation for S3.
107+
108+
All secrets will be passed as environ variables.
109+
110+
See https://rclone.org/s3/.
111+
112+
:params bucket_name: Name of the S3 bucket.
113+
:params access_key_id: AWS access key id.
114+
:params secret_acces_key: AWS secret access key.
115+
:params region: AWS region.
116+
:params provider: S3 provider, defaults to ``AWS``.
117+
Useful to use Minio during development.
118+
See https://rclone.org/s3/#s3-provider.
119+
:param acl: Canned ACL used when creating buckets and storing or copying objects.
120+
See https://rclone.org/s3/#s3-acl.
121+
:param endpoint: Custom S3 endpoint, useful for development.
122+
"""
123+
124+
remote_type = "s3"
125+
126+
def __init__(
127+
self,
128+
bucket_name,
129+
access_key_id,
130+
secret_acces_key,
131+
region,
132+
provider="AWS",
133+
acl=None,
134+
endpoint=None,
135+
):
136+
super().__init__()
137+
138+
# When using minion, the region is set to None.
139+
region = region or ""
140+
141+
# rclone S3 options passed as env vars.
142+
# https://rclone.org/s3/#standard-options.
143+
self.env_vars = {
144+
"RCLONE_S3_PROVIDER": provider,
145+
"RCLONE_S3_ACCESS_KEY_ID": access_key_id,
146+
"RCLONE_S3_SECRET_ACCESS_KEY": secret_acces_key,
147+
"RCLONE_S3_REGION": region,
148+
"RCLONE_S3_LOCATION_CONSTRAINT": region,
149+
}
150+
if acl:
151+
self.env_vars["RCLONE_S3_ACL"] = acl
152+
if endpoint:
153+
self.env_vars["RCLONE_S3_ENDPOINT"] = endpoint
154+
self.bucket_name = bucket_name
155+
156+
def build_target(self, path):
157+
"""Overridden to prepend the bucket name to the path."""
158+
path = f"{self.bucket_name}/{path}"
159+
return super().build_target(path)

readthedocs/storage/s3_storage.py

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,16 +9,39 @@
99

1010
# Disable abstract method because we are not overriding all the methods
1111
# pylint: disable=abstract-method
12+
from functools import cached_property
1213
from django.conf import settings
1314
from django.core.exceptions import ImproperlyConfigured
1415
from storages.backends.s3boto3 import S3Boto3Storage, S3ManifestStaticStorage
1516

1617
from readthedocs.builds.storage import BuildMediaStorageMixin
18+
from readthedocs.storage.rclone import RCloneS3Remote
1719

1820
from .mixins import OverrideHostnameMixin, S3PrivateBucketMixin
1921

2022

21-
class S3BuildMediaStorage(BuildMediaStorageMixin, OverrideHostnameMixin, S3Boto3Storage):
23+
class S3BuildMediaStorageMixin(BuildMediaStorageMixin, S3Boto3Storage):
24+
25+
@cached_property
26+
def _rclone(self):
27+
provider = "AWS"
28+
# If a custom endpoint URL is given and
29+
# we are running in DEBUG mode, use minio as provider.
30+
if self.endpoint_url and settings.DEBUG:
31+
provider = "minio"
32+
33+
return RCloneS3Remote(
34+
bucket_name=self.bucket_name,
35+
access_key_id=self.access_key,
36+
secret_acces_key=self.secret_key,
37+
region=self.region_name,
38+
acl=self.default_acl,
39+
endpoint=self.endpoint_url,
40+
provider=provider,
41+
)
42+
43+
44+
class S3BuildMediaStorage(OverrideHostnameMixin, S3BuildMediaStorageMixin):
2245

2346
"""An AWS S3 Storage backend for build artifacts."""
2447

@@ -94,7 +117,7 @@ class NoManifestS3StaticStorage(
94117
"""
95118

96119

97-
class S3BuildEnvironmentStorage(S3PrivateBucketMixin, BuildMediaStorageMixin, S3Boto3Storage):
120+
class S3BuildEnvironmentStorage(S3PrivateBucketMixin, S3BuildMediaStorageMixin):
98121

99122
bucket_name = getattr(settings, 'S3_BUILD_ENVIRONMENT_STORAGE_BUCKET', None)
100123

@@ -108,7 +131,7 @@ def __init__(self, *args, **kwargs):
108131
)
109132

110133

111-
class S3BuildToolsStorage(S3PrivateBucketMixin, BuildMediaStorageMixin, S3Boto3Storage):
134+
class S3BuildToolsStorage(S3PrivateBucketMixin, S3BuildMediaStorageMixin):
112135

113136
bucket_name = getattr(settings, 'S3_BUILD_TOOLS_STORAGE_BUCKET', None)
114137

0 commit comments

Comments
 (0)