Skip to content

Commit 1ccc43b

Browse files
committed
Build: use rclone for sync
1 parent c4a15c8 commit 1ccc43b

File tree

6 files changed

+138
-5
lines changed

6 files changed

+138
-5
lines changed

dockerfiles/Dockerfile

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,8 @@ RUN apt-get -y install \
3030
netcat \
3131
telnet \
3232
lsb-release \
33-
npm
33+
npm \
34+
rclone
3435

3536
# Gets the MinIO mc client used to add buckets upon initialization
3637
# If this client should have issues running inside this image, it is also

readthedocs/builds/storage.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from pathlib import Path
2+
from functools import cached_property
23

34
import structlog
45
from django.conf import settings
@@ -7,6 +8,7 @@
78
from storages.utils import get_available_overwrite_name, safe_join
89

910
from readthedocs.core.utils.filesystem import safe_open
11+
from readthedocs.storage.rclone import RClone
1012

1113
log = structlog.get_logger(__name__)
1214

@@ -153,6 +155,14 @@ def sync_directory(self, source, destination):
153155
log.debug('Deleting file from media storage.', filepath=filepath)
154156
self.delete(filepath)
155157

158+
@cached_property
159+
def _rclone(self):
160+
return RClone()
161+
162+
def rclone_sync(self, source, destination):
163+
"""Sync a directory recursively to storage using rclone sync."""
164+
return self._rclone.sync(source, destination)
165+
156166
def join(self, directory, filepath):
157167
return safe_join(directory, filepath)
158168

readthedocs/projects/models.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1842,6 +1842,7 @@ def add_features(sender, **kwargs):
18421842
USE_SPHINX_BUILDERS = "use_sphinx_builders"
18431843
CANCEL_OLD_BUILDS = "cancel_old_builds"
18441844
DONT_CREATE_INDEX = "dont_create_index"
1845+
USE_RCLONE = "use_rclone"
18451846

18461847
FEATURES = (
18471848
(ALLOW_DEPRECATED_WEBHOOKS, _('Allow deprecated webhook views')),
@@ -1998,6 +1999,10 @@ def add_features(sender, **kwargs):
19981999
DONT_CREATE_INDEX,
19992000
_('Do not create index.md or README.rst if the project does not have one.'),
20002001
),
2002+
(
2003+
USE_RCLONE,
2004+
_("Use rclone for syncing files to the media storage."),
2005+
),
20012006
)
20022007

20032008
projects = models.ManyToManyField(

readthedocs/projects/tasks/builds.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -833,7 +833,10 @@ def store_build_artifacts(
833833
version_type=self.data.version.type,
834834
)
835835
try:
836-
build_media_storage.sync_directory(from_path, to_path)
836+
if self.data.project.has_feature(Feature.USE_RCLONE):
837+
build_media_storage.rclone_sync(from_path, to_path)
838+
else:
839+
build_media_storage.sync_directory(from_path, to_path)
837840
except Exception:
838841
# Ideally this should just be an IOError
839842
# but some storage backends unfortunately throw other errors

readthedocs/storage/rclone.py

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
"""Wrapper around the rclone command."""
2+
3+
import os
4+
import subprocess
5+
6+
import structlog
7+
8+
log = structlog.get_logger(__name__)
9+
10+
11+
class RClone:
12+
13+
remote_type = "local"
14+
rclone_bin = "rclone"
15+
default_options = [
16+
# Number of file transfers to run in parallel.
17+
"--transfers=8",
18+
"--verbose",
19+
]
20+
env_vars = {}
21+
22+
def build_target(self, path):
23+
return f":{self.remote_type}:{path}"
24+
25+
def execute(self, action, args, options=None):
26+
options = options or []
27+
command = [
28+
self.rclone_bin,
29+
action,
30+
*self.default_options,
31+
*options,
32+
"--",
33+
*args,
34+
]
35+
env = os.environ.copy()
36+
# env = {}
37+
env.update(self.env_vars)
38+
log.info("Executing rclone command.", command=command)
39+
log.debug("env", env=env)
40+
result = subprocess.run(
41+
command,
42+
capture_output=True,
43+
env=env,
44+
)
45+
log.debug(
46+
"Result.",
47+
stdout=result.stdout.decode(),
48+
stderr=result.stderr.decode(),
49+
exit_code=result.returncode,
50+
)
51+
return result
52+
53+
def sync(self, source, destination):
54+
# TODO: check if source can be a symlink.
55+
return self.execute("sync", args=[source, self.build_target(destination)])
56+
57+
58+
class RCloneS3Remote(RClone):
59+
60+
remote_type = "s3"
61+
62+
def __init__(
63+
self,
64+
bucket_name,
65+
access_key_id,
66+
secret_acces_key,
67+
region,
68+
provider="AWS",
69+
acl=None,
70+
endpoint=None,
71+
):
72+
super().__init__()
73+
# rclone S3 options passed as env vars.
74+
# https://rclone.org/s3/#standard-options.
75+
region = region or ""
76+
self.env_vars = {
77+
"RCLONE_S3_PROVIDER": provider,
78+
"RCLONE_S3_ACCESS_KEY_ID": access_key_id,
79+
"RCLONE_S3_SECRET_ACCESS_KEY": secret_acces_key,
80+
"RCLONE_S3_REGION": region,
81+
"RCLONE_S3_LOCATION_CONSTRAINT": region,
82+
}
83+
if acl:
84+
self.env_vars["RCLONE_S3_ACL"] = acl
85+
if endpoint:
86+
self.env_vars["RCLONE_S3_ENDPOINT"] = endpoint
87+
self.bucket_name = bucket_name
88+
89+
def build_target(self, path):
90+
path = f"{self.bucket_name}/{path}"
91+
return super().build_target(path)

readthedocs/storage/s3_storage.py

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,16 +9,39 @@
99

1010
# Disable abstract method because we are not overriding all the methods
1111
# pylint: disable=abstract-method
12+
from functools import cached_property
1213
from django.conf import settings
1314
from django.core.exceptions import ImproperlyConfigured
1415
from storages.backends.s3boto3 import S3Boto3Storage, S3ManifestStaticStorage
1516

1617
from readthedocs.builds.storage import BuildMediaStorageMixin
18+
from readthedocs.storage.rclone import RCloneS3Remote
1719

1820
from .mixins import OverrideHostnameMixin, S3PrivateBucketMixin
1921

2022

21-
class S3BuildMediaStorage(BuildMediaStorageMixin, OverrideHostnameMixin, S3Boto3Storage):
23+
class S3BuildMediaStorageMixin(BuildMediaStorageMixin, S3Boto3Storage):
24+
25+
@cached_property
26+
def _rclone(self):
27+
provider = "AWS"
28+
# If a cutom endpoint URL is given and
29+
# we are runnin in DEBUG mode, use minio as provider.
30+
if self.endpoint_url and settings.DEBUG:
31+
provider = "minio"
32+
33+
return RCloneS3Remote(
34+
bucket_name=self.bucket_name,
35+
access_key_id=self.access_key,
36+
secret_acces_key=self.secret_key,
37+
region=self.region_name,
38+
acl=self.default_acl,
39+
endpoint=self.endpoint_url,
40+
provider=provider,
41+
)
42+
43+
44+
class S3BuildMediaStorage(OverrideHostnameMixin, S3BuildMediaStorageMixin):
2245

2346
"""An AWS S3 Storage backend for build artifacts."""
2447

@@ -94,7 +117,7 @@ class NoManifestS3StaticStorage(
94117
"""
95118

96119

97-
class S3BuildEnvironmentStorage(S3PrivateBucketMixin, BuildMediaStorageMixin, S3Boto3Storage):
120+
class S3BuildEnvironmentStorage(S3PrivateBucketMixin, S3BuildMediaStorageMixin):
98121

99122
bucket_name = getattr(settings, 'S3_BUILD_ENVIRONMENT_STORAGE_BUCKET', None)
100123

@@ -108,7 +131,7 @@ def __init__(self, *args, **kwargs):
108131
)
109132

110133

111-
class S3BuildToolsStorage(S3PrivateBucketMixin, BuildMediaStorageMixin, S3Boto3Storage):
134+
class S3BuildToolsStorage(S3PrivateBucketMixin, S3BuildMediaStorageMixin):
112135

113136
bucket_name = getattr(settings, 'S3_BUILD_TOOLS_STORAGE_BUCKET', None)
114137

0 commit comments

Comments
 (0)