Skip to content

Commit 05d44e3

Browse files
committed
Build: use rclone for sync
1 parent 8048602 commit 05d44e3

File tree

6 files changed

+212
-5
lines changed

6 files changed

+212
-5
lines changed

dockerfiles/Dockerfile

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,8 @@ RUN apt-get -y install \
3030
netcat \
3131
telnet \
3232
lsb-release \
33-
npm
33+
npm \
34+
rclone
3435

3536
# Gets the MinIO mc client used to add buckets upon initialization
3637
# If this client should have issues running inside this image, it is also

readthedocs/builds/storage.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
from functools import cached_property
12
from pathlib import Path
23

34
import structlog
@@ -7,6 +8,7 @@
78
from storages.utils import get_available_overwrite_name, safe_join
89

910
from readthedocs.core.utils.filesystem import safe_open
11+
from readthedocs.storage.rclone import RClone
1012

1113
log = structlog.get_logger(__name__)
1214

@@ -153,6 +155,14 @@ def sync_directory(self, source, destination):
153155
log.debug('Deleting file from media storage.', filepath=filepath)
154156
self.delete(filepath)
155157

158+
@cached_property
159+
def _rclone(self):
160+
return RClone()
161+
162+
def rclone_sync(self, source, destination):
163+
"""Sync a directory recursively to storage using rclone sync."""
164+
return self._rclone.sync(source, destination)
165+
156166
def join(self, directory, filepath):
157167
return safe_join(directory, filepath)
158168

readthedocs/projects/models.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1845,6 +1845,7 @@ def add_features(sender, **kwargs):
18451845
USE_SPHINX_BUILDERS = "use_sphinx_builders"
18461846
CANCEL_OLD_BUILDS = "cancel_old_builds"
18471847
DONT_CREATE_INDEX = "dont_create_index"
1848+
USE_RCLONE = "use_rclone"
18481849

18491850
FEATURES = (
18501851
(ALLOW_DEPRECATED_WEBHOOKS, _('Allow deprecated webhook views')),
@@ -2001,6 +2002,10 @@ def add_features(sender, **kwargs):
20012002
DONT_CREATE_INDEX,
20022003
_('Do not create index.md or README.rst if the project does not have one.'),
20032004
),
2005+
(
2006+
USE_RCLONE,
2007+
_("Use rclone for syncing files to the media storage."),
2008+
),
20042009
)
20052010

20062011
projects = models.ManyToManyField(

readthedocs/projects/tasks/builds.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -833,7 +833,10 @@ def store_build_artifacts(
833833
version_type=self.data.version.type,
834834
)
835835
try:
836-
build_media_storage.sync_directory(from_path, to_path)
836+
if self.data.project.has_feature(Feature.USE_RCLONE):
837+
build_media_storage.rclone_sync(from_path, to_path)
838+
else:
839+
build_media_storage.sync_directory(from_path, to_path)
837840
except Exception:
838841
# Ideally this should just be an IOError
839842
# but some storage backends unfortunately throw other errors

readthedocs/storage/rclone.py

Lines changed: 163 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,163 @@
1+
"""
2+
Wrapper around the rclone command.
3+
4+
See https://rclone.org/docs.
5+
"""
6+
7+
import os
8+
import subprocess
9+
10+
import structlog
11+
12+
log = structlog.get_logger(__name__)
13+
14+
15+
class RClone:
16+
17+
"""
18+
RClone base class.
19+
20+
This class allows you to interact with an rclone remote without
21+
a configuration file, the remote declaration and its options
22+
are passed in the command itself.
23+
24+
This base class allows you to use the local file system as remote.
25+
26+
:param remote_type: You can see the full list of supported providers at
27+
https://rclone.org/#providers. Defaults to use the local filesystem
28+
(https://rclone.org/local/).
29+
:param rclone_bin: Binary name or path to the rclone binary.
30+
Defaults to ``rclone``.
31+
:param default_options: Options passed to the rclone command.
32+
:parm env_vars: Environment variables used when executing the rclone command.
33+
Useful to pass secrets to the command, since all arguments and options will be logged.
34+
"""
35+
36+
remote_type = "local"
37+
rclone_bin = "rclone"
38+
default_options = [
39+
# Number of file transfers to run in parallel.
40+
"--transfers=8",
41+
"--verbose",
42+
]
43+
env_vars = {}
44+
45+
def build_target(self, path):
46+
"""
47+
Build the proper target using the current remote type.
48+
49+
We start the remote with `:` to create it on the fly,
50+
instead of having to create a configuration file.
51+
See https://rclone.org/docs/#backend-path-to-dir.
52+
53+
:param path: Path to the remote target.
54+
"""
55+
return f":{self.remote_type}:{path}"
56+
57+
def execute(self, action, args, options=None):
58+
"""
59+
Execute an rclone subcommand.
60+
61+
:param action: Name of the subcommand.
62+
:param list args: List of positional arguments passed the to command.
63+
:param list options: List of options passed to the command.
64+
"""
65+
options = options or []
66+
command = [
67+
self.rclone_bin,
68+
action,
69+
*self.default_options,
70+
*options,
71+
"--",
72+
*args,
73+
]
74+
env = os.environ.copy()
75+
# env = {}
76+
env.update(self.env_vars)
77+
log.info("Executing rclone command.", command=command)
78+
log.debug("env", env=env)
79+
result = subprocess.run(
80+
command,
81+
capture_output=True,
82+
env=env,
83+
# TODO: Fail or let the called decide what to do?
84+
check=True,
85+
)
86+
log.debug(
87+
"Result.",
88+
stdout=result.stdout.decode(),
89+
stderr=result.stderr.decode(),
90+
exit_code=result.returncode,
91+
)
92+
return result
93+
94+
def sync(self, source, destination):
95+
"""
96+
Run the `rclone sync` command.
97+
98+
See https://rclone.org/commands/rclone_sync/.
99+
100+
:params source: Local path to the source directory.
101+
:params destination: Remote path to the destination directory.
102+
"""
103+
# TODO: check if source can be a symlink.
104+
return self.execute("sync", args=[source, self.build_target(destination)])
105+
106+
107+
class RCloneS3Remote(RClone):
108+
109+
"""
110+
RClone remote implementation for S3.
111+
112+
All secrets will be passed as environ variables.
113+
114+
See https://rclone.org/s3/.
115+
116+
:params bucket_name: Name of the S3 bucket.
117+
:params access_key_id: AWS access key id.
118+
:params secret_acces_key: AWS secret access key.
119+
:params region: AWS region.
120+
:params provider: S3 provider, defaults to ``AWS``.
121+
Useful to use Minio during development.
122+
See https://rclone.org/s3/#s3-provider.
123+
:param acl: Canned ACL used when creating buckets and storing or copying objects.
124+
See https://rclone.org/s3/#s3-acl.
125+
:param endpoint: Custom S3 endpoint, useful for development.
126+
"""
127+
128+
remote_type = "s3"
129+
130+
def __init__(
131+
self,
132+
bucket_name,
133+
access_key_id,
134+
secret_acces_key,
135+
region,
136+
provider="AWS",
137+
acl=None,
138+
endpoint=None,
139+
):
140+
super().__init__()
141+
142+
# When using minion, the region is set to None.
143+
region = region or ""
144+
145+
# rclone S3 options passed as env vars.
146+
# https://rclone.org/s3/#standard-options.
147+
self.env_vars = {
148+
"RCLONE_S3_PROVIDER": provider,
149+
"RCLONE_S3_ACCESS_KEY_ID": access_key_id,
150+
"RCLONE_S3_SECRET_ACCESS_KEY": secret_acces_key,
151+
"RCLONE_S3_REGION": region,
152+
"RCLONE_S3_LOCATION_CONSTRAINT": region,
153+
}
154+
if acl:
155+
self.env_vars["RCLONE_S3_ACL"] = acl
156+
if endpoint:
157+
self.env_vars["RCLONE_S3_ENDPOINT"] = endpoint
158+
self.bucket_name = bucket_name
159+
160+
def build_target(self, path):
161+
"""Overridden to prepend the bucket name to the path."""
162+
path = f"{self.bucket_name}/{path}"
163+
return super().build_target(path)

readthedocs/storage/s3_storage.py

Lines changed: 28 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,16 +9,41 @@
99

1010
# Disable abstract method because we are not overriding all the methods
1111
# pylint: disable=abstract-method
12+
from functools import cached_property
13+
1214
from django.conf import settings
1315
from django.core.exceptions import ImproperlyConfigured
1416
from storages.backends.s3boto3 import S3Boto3Storage, S3ManifestStaticStorage
1517

1618
from readthedocs.builds.storage import BuildMediaStorageMixin
19+
from readthedocs.storage.rclone import RCloneS3Remote
1720

1821
from .mixins import OverrideHostnameMixin, S3PrivateBucketMixin
1922

2023

21-
class S3BuildMediaStorage(BuildMediaStorageMixin, OverrideHostnameMixin, S3Boto3Storage):
24+
class S3BuildMediaStorageMixin(BuildMediaStorageMixin, S3Boto3Storage):
25+
26+
@cached_property
27+
def _rclone(self):
28+
provider = "AWS"
29+
# If a custom endpoint URL is given and
30+
# we are running in DEBUG mode, use minio as provider.
31+
if self.endpoint_url and settings.DEBUG:
32+
provider = "minio"
33+
34+
return RCloneS3Remote(
35+
bucket_name=self.bucket_name,
36+
access_key_id=self.access_key,
37+
secret_acces_key=self.secret_key,
38+
region=self.region_name,
39+
acl=self.default_acl,
40+
endpoint=self.endpoint_url,
41+
provider=provider,
42+
)
43+
44+
45+
# pylint: disable=too-many-ancestors
46+
class S3BuildMediaStorage(OverrideHostnameMixin, S3BuildMediaStorageMixin):
2247

2348
"""An AWS S3 Storage backend for build artifacts."""
2449

@@ -94,7 +119,7 @@ class NoManifestS3StaticStorage(
94119
"""
95120

96121

97-
class S3BuildEnvironmentStorage(S3PrivateBucketMixin, BuildMediaStorageMixin, S3Boto3Storage):
122+
class S3BuildEnvironmentStorage(S3PrivateBucketMixin, S3BuildMediaStorageMixin):
98123

99124
bucket_name = getattr(settings, 'S3_BUILD_ENVIRONMENT_STORAGE_BUCKET', None)
100125

@@ -108,7 +133,7 @@ def __init__(self, *args, **kwargs):
108133
)
109134

110135

111-
class S3BuildToolsStorage(S3PrivateBucketMixin, BuildMediaStorageMixin, S3Boto3Storage):
136+
class S3BuildToolsStorage(S3PrivateBucketMixin, S3BuildMediaStorageMixin):
112137

113138
bucket_name = getattr(settings, 'S3_BUILD_TOOLS_STORAGE_BUCKET', None)
114139

0 commit comments

Comments
 (0)