From 05d44e3b79fdfae68414bf0279dfa9878b59a540 Mon Sep 17 00:00:00 2001 From: Santos Gallegos Date: Thu, 22 Dec 2022 19:00:37 -0500 Subject: [PATCH 1/6] Build: use rclone for sync --- dockerfiles/Dockerfile | 3 +- readthedocs/builds/storage.py | 10 ++ readthedocs/projects/models.py | 5 + readthedocs/projects/tasks/builds.py | 5 +- readthedocs/storage/rclone.py | 163 +++++++++++++++++++++++++++ readthedocs/storage/s3_storage.py | 31 ++++- 6 files changed, 212 insertions(+), 5 deletions(-) create mode 100644 readthedocs/storage/rclone.py diff --git a/dockerfiles/Dockerfile b/dockerfiles/Dockerfile index b26f322a48e..e6004b1976b 100644 --- a/dockerfiles/Dockerfile +++ b/dockerfiles/Dockerfile @@ -30,7 +30,8 @@ RUN apt-get -y install \ netcat \ telnet \ lsb-release \ - npm + npm \ + rclone # Gets the MinIO mc client used to add buckets upon initialization # If this client should have issues running inside this image, it is also diff --git a/readthedocs/builds/storage.py b/readthedocs/builds/storage.py index b4ddc5305c2..9d8c6780f0f 100644 --- a/readthedocs/builds/storage.py +++ b/readthedocs/builds/storage.py @@ -1,3 +1,4 @@ +from functools import cached_property from pathlib import Path import structlog @@ -7,6 +8,7 @@ from storages.utils import get_available_overwrite_name, safe_join from readthedocs.core.utils.filesystem import safe_open +from readthedocs.storage.rclone import RClone log = structlog.get_logger(__name__) @@ -153,6 +155,14 @@ def sync_directory(self, source, destination): log.debug('Deleting file from media storage.', filepath=filepath) self.delete(filepath) + @cached_property + def _rclone(self): + return RClone() + + def rclone_sync(self, source, destination): + """Sync a directory recursively to storage using rclone sync.""" + return self._rclone.sync(source, destination) + def join(self, directory, filepath): return safe_join(directory, filepath) diff --git a/readthedocs/projects/models.py b/readthedocs/projects/models.py index c05d9eb4388..3107f16c446 100644 --- a/readthedocs/projects/models.py +++ b/readthedocs/projects/models.py @@ -1845,6 +1845,7 @@ def add_features(sender, **kwargs): USE_SPHINX_BUILDERS = "use_sphinx_builders" CANCEL_OLD_BUILDS = "cancel_old_builds" DONT_CREATE_INDEX = "dont_create_index" + USE_RCLONE = "use_rclone" FEATURES = ( (ALLOW_DEPRECATED_WEBHOOKS, _('Allow deprecated webhook views')), @@ -2001,6 +2002,10 @@ def add_features(sender, **kwargs): DONT_CREATE_INDEX, _('Do not create index.md or README.rst if the project does not have one.'), ), + ( + USE_RCLONE, + _("Use rclone for syncing files to the media storage."), + ), ) projects = models.ManyToManyField( diff --git a/readthedocs/projects/tasks/builds.py b/readthedocs/projects/tasks/builds.py index f5a72d2a0c3..af1a3977a48 100644 --- a/readthedocs/projects/tasks/builds.py +++ b/readthedocs/projects/tasks/builds.py @@ -833,7 +833,10 @@ def store_build_artifacts( version_type=self.data.version.type, ) try: - build_media_storage.sync_directory(from_path, to_path) + if self.data.project.has_feature(Feature.USE_RCLONE): + build_media_storage.rclone_sync(from_path, to_path) + else: + build_media_storage.sync_directory(from_path, to_path) except Exception: # Ideally this should just be an IOError # but some storage backends unfortunately throw other errors diff --git a/readthedocs/storage/rclone.py b/readthedocs/storage/rclone.py new file mode 100644 index 00000000000..4d8a156756e --- /dev/null +++ b/readthedocs/storage/rclone.py @@ -0,0 +1,163 @@ +""" +Wrapper around the rclone command. + +See https://rclone.org/docs. +""" + +import os +import subprocess + +import structlog + +log = structlog.get_logger(__name__) + + +class RClone: + + """ + RClone base class. + + This class allows you to interact with an rclone remote without + a configuration file, the remote declaration and its options + are passed in the command itself. + + This base class allows you to use the local file system as remote. + + :param remote_type: You can see the full list of supported providers at + https://rclone.org/#providers. Defaults to use the local filesystem + (https://rclone.org/local/). + :param rclone_bin: Binary name or path to the rclone binary. + Defaults to ``rclone``. + :param default_options: Options passed to the rclone command. + :parm env_vars: Environment variables used when executing the rclone command. + Useful to pass secrets to the command, since all arguments and options will be logged. + """ + + remote_type = "local" + rclone_bin = "rclone" + default_options = [ + # Number of file transfers to run in parallel. + "--transfers=8", + "--verbose", + ] + env_vars = {} + + def build_target(self, path): + """ + Build the proper target using the current remote type. + + We start the remote with `:` to create it on the fly, + instead of having to create a configuration file. + See https://rclone.org/docs/#backend-path-to-dir. + + :param path: Path to the remote target. + """ + return f":{self.remote_type}:{path}" + + def execute(self, action, args, options=None): + """ + Execute an rclone subcommand. + + :param action: Name of the subcommand. + :param list args: List of positional arguments passed the to command. + :param list options: List of options passed to the command. + """ + options = options or [] + command = [ + self.rclone_bin, + action, + *self.default_options, + *options, + "--", + *args, + ] + env = os.environ.copy() + # env = {} + env.update(self.env_vars) + log.info("Executing rclone command.", command=command) + log.debug("env", env=env) + result = subprocess.run( + command, + capture_output=True, + env=env, + # TODO: Fail or let the called decide what to do? + check=True, + ) + log.debug( + "Result.", + stdout=result.stdout.decode(), + stderr=result.stderr.decode(), + exit_code=result.returncode, + ) + return result + + def sync(self, source, destination): + """ + Run the `rclone sync` command. + + See https://rclone.org/commands/rclone_sync/. + + :params source: Local path to the source directory. + :params destination: Remote path to the destination directory. + """ + # TODO: check if source can be a symlink. + return self.execute("sync", args=[source, self.build_target(destination)]) + + +class RCloneS3Remote(RClone): + + """ + RClone remote implementation for S3. + + All secrets will be passed as environ variables. + + See https://rclone.org/s3/. + + :params bucket_name: Name of the S3 bucket. + :params access_key_id: AWS access key id. + :params secret_acces_key: AWS secret access key. + :params region: AWS region. + :params provider: S3 provider, defaults to ``AWS``. + Useful to use Minio during development. + See https://rclone.org/s3/#s3-provider. + :param acl: Canned ACL used when creating buckets and storing or copying objects. + See https://rclone.org/s3/#s3-acl. + :param endpoint: Custom S3 endpoint, useful for development. + """ + + remote_type = "s3" + + def __init__( + self, + bucket_name, + access_key_id, + secret_acces_key, + region, + provider="AWS", + acl=None, + endpoint=None, + ): + super().__init__() + + # When using minion, the region is set to None. + region = region or "" + + # rclone S3 options passed as env vars. + # https://rclone.org/s3/#standard-options. + self.env_vars = { + "RCLONE_S3_PROVIDER": provider, + "RCLONE_S3_ACCESS_KEY_ID": access_key_id, + "RCLONE_S3_SECRET_ACCESS_KEY": secret_acces_key, + "RCLONE_S3_REGION": region, + "RCLONE_S3_LOCATION_CONSTRAINT": region, + } + if acl: + self.env_vars["RCLONE_S3_ACL"] = acl + if endpoint: + self.env_vars["RCLONE_S3_ENDPOINT"] = endpoint + self.bucket_name = bucket_name + + def build_target(self, path): + """Overridden to prepend the bucket name to the path.""" + path = f"{self.bucket_name}/{path}" + return super().build_target(path) diff --git a/readthedocs/storage/s3_storage.py b/readthedocs/storage/s3_storage.py index 197602ea50c..f6b06b4a27c 100644 --- a/readthedocs/storage/s3_storage.py +++ b/readthedocs/storage/s3_storage.py @@ -9,16 +9,41 @@ # Disable abstract method because we are not overriding all the methods # pylint: disable=abstract-method +from functools import cached_property + from django.conf import settings from django.core.exceptions import ImproperlyConfigured from storages.backends.s3boto3 import S3Boto3Storage, S3ManifestStaticStorage from readthedocs.builds.storage import BuildMediaStorageMixin +from readthedocs.storage.rclone import RCloneS3Remote from .mixins import OverrideHostnameMixin, S3PrivateBucketMixin -class S3BuildMediaStorage(BuildMediaStorageMixin, OverrideHostnameMixin, S3Boto3Storage): +class S3BuildMediaStorageMixin(BuildMediaStorageMixin, S3Boto3Storage): + + @cached_property + def _rclone(self): + provider = "AWS" + # If a custom endpoint URL is given and + # we are running in DEBUG mode, use minio as provider. + if self.endpoint_url and settings.DEBUG: + provider = "minio" + + return RCloneS3Remote( + bucket_name=self.bucket_name, + access_key_id=self.access_key, + secret_acces_key=self.secret_key, + region=self.region_name, + acl=self.default_acl, + endpoint=self.endpoint_url, + provider=provider, + ) + + +# pylint: disable=too-many-ancestors +class S3BuildMediaStorage(OverrideHostnameMixin, S3BuildMediaStorageMixin): """An AWS S3 Storage backend for build artifacts.""" @@ -94,7 +119,7 @@ class NoManifestS3StaticStorage( """ -class S3BuildEnvironmentStorage(S3PrivateBucketMixin, BuildMediaStorageMixin, S3Boto3Storage): +class S3BuildEnvironmentStorage(S3PrivateBucketMixin, S3BuildMediaStorageMixin): bucket_name = getattr(settings, 'S3_BUILD_ENVIRONMENT_STORAGE_BUCKET', None) @@ -108,7 +133,7 @@ def __init__(self, *args, **kwargs): ) -class S3BuildToolsStorage(S3PrivateBucketMixin, BuildMediaStorageMixin, S3Boto3Storage): +class S3BuildToolsStorage(S3PrivateBucketMixin, S3BuildMediaStorageMixin): bucket_name = getattr(settings, 'S3_BUILD_TOOLS_STORAGE_BUCKET', None) From 3427b61fec9f4bc6486dd8814129b846abfba0f1 Mon Sep 17 00:00:00 2001 From: Santos Gallegos Date: Wed, 11 Jan 2023 11:49:06 -0500 Subject: [PATCH 2/6] Force to always use checksum --- readthedocs/storage/rclone.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/readthedocs/storage/rclone.py b/readthedocs/storage/rclone.py index 4d8a156756e..db6ebe46ded 100644 --- a/readthedocs/storage/rclone.py +++ b/readthedocs/storage/rclone.py @@ -38,6 +38,8 @@ class RClone: default_options = [ # Number of file transfers to run in parallel. "--transfers=8", + # Skip based on checksum (if available) & size, not mod-time & size. + "--checksum", "--verbose", ] env_vars = {} @@ -80,7 +82,7 @@ def execute(self, action, args, options=None): command, capture_output=True, env=env, - # TODO: Fail or let the called decide what to do? + # TODO: Fail or let the caller decide what to do? check=True, ) log.debug( From 9d295505e87a20d4aa61396d716fe739194fc0b9 Mon Sep 17 00:00:00 2001 From: Santos Gallegos Date: Wed, 11 Jan 2023 17:37:39 -0500 Subject: [PATCH 3/6] Tests --- .circleci/config.yml | 2 + readthedocs/builds/storage.py | 13 +++- .../rtd_tests/tests/test_build_storage.py | 61 +++++++++++++++++++ readthedocs/storage/rclone.py | 52 +++++++++++----- readthedocs/storage/s3_storage.py | 3 +- 5 files changed, 111 insertions(+), 20 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 9facf43fdfb..921cdd080fe 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -18,6 +18,8 @@ jobs: - checkout - run: git submodule sync - run: git submodule update --init + - run: sudo apt update + - run: sudo apt install -y rclone - run: pip install --user 'tox<5' - run: tox -e py310 - codecov/upload diff --git a/readthedocs/builds/storage.py b/readthedocs/builds/storage.py index 9d8c6780f0f..4c057fcc02b 100644 --- a/readthedocs/builds/storage.py +++ b/readthedocs/builds/storage.py @@ -8,7 +8,7 @@ from storages.utils import get_available_overwrite_name, safe_join from readthedocs.core.utils.filesystem import safe_open -from readthedocs.storage.rclone import RClone +from readthedocs.storage.rclone import RCloneLocal log = structlog.get_logger(__name__) @@ -157,10 +157,15 @@ def sync_directory(self, source, destination): @cached_property def _rclone(self): - return RClone() + raise NotImplementedError def rclone_sync(self, source, destination): """Sync a directory recursively to storage using rclone sync.""" + if destination in ("", "/"): + raise SuspiciousFileOperation("Syncing all storage cannot be right") + + # TODO: borrow the symlink check from #9890 when merged. + # self._check_suspicious_path(source) return self._rclone.sync(source, destination) def join(self, directory, filepath): @@ -197,6 +202,10 @@ def __init__(self, **kwargs): super().__init__(location) + @cached_property + def _rclone(self): + return RCloneLocal(location=self.location) + def get_available_name(self, name, max_length=None): """ A hack to overwrite by default with the FileSystemStorage. diff --git a/readthedocs/rtd_tests/tests/test_build_storage.py b/readthedocs/rtd_tests/tests/test_build_storage.py index 8724dbe0669..1344cb1eae9 100644 --- a/readthedocs/rtd_tests/tests/test_build_storage.py +++ b/readthedocs/rtd_tests/tests/test_build_storage.py @@ -1,7 +1,10 @@ import os import shutil import tempfile +from pathlib import Path +import pytest +from django.core.exceptions import SuspiciousFileOperation from django.test import TestCase, override_settings from readthedocs.builds.storage import BuildMediaFileSystemStorage @@ -118,3 +121,61 @@ def test_walk(self): self.assertEqual(top, 'files/api') self.assertCountEqual(dirs, []) self.assertCountEqual(files, ['index.html']) + + def test_rclone_sync(self): + tmp_files_dir = Path(tempfile.mkdtemp()) / "files" + shutil.copytree(files_dir, tmp_files_dir, symlinks=True) + storage_dir = "files" + + tree = [ + ("api", ["index.html"]), + "api.fjson", + "conf.py", + "test.html", + ] + with override_settings(DOCROOT=tmp_files_dir): + self.storage.rclone_sync(tmp_files_dir, storage_dir) + self.assertFileTree(storage_dir, tree) + + tree = [ + ("api", ["index.html"]), + "conf.py", + "test.html", + ] + (tmp_files_dir / "api.fjson").unlink() + with override_settings(DOCROOT=tmp_files_dir): + self.storage.rclone_sync(tmp_files_dir, storage_dir) + self.assertFileTree(storage_dir, tree) + + tree = [ + "conf.py", + "test.html", + ] + shutil.rmtree(tmp_files_dir / "api") + with override_settings(DOCROOT=tmp_files_dir): + self.storage.rclone_sync(tmp_files_dir, storage_dir) + self.assertFileTree(storage_dir, tree) + + @pytest.mark.skip( + "Waiting for https://github.com/readthedocs/readthedocs.org/pull/9890" + ) + def test_rclone_sync_source_symlink(self): + tmp_dir = Path(tempfile.mkdtemp()) + tmp_symlink_dir = Path(tempfile.mkdtemp()) / "files" + tmp_symlink_dir.symlink_to(tmp_dir) + + with override_settings(DOCROOT=tmp_dir): + with pytest.raises(SuspiciousFileOperation, match="symbolic link"): + self.storage.rclone_sync(tmp_symlink_dir, "files") + + @pytest.mark.skip( + "Waiting for https://github.com/readthedocs/readthedocs.org/pull/9890" + ) + def test_rclone_sync_source_outside_docroot(self): + tmp_dir = Path(tempfile.mkdtemp()) + tmp_docroot = Path(tempfile.mkdtemp()) / "docroot" + tmp_docroot.mkdir() + + with override_settings(DOCROOT=tmp_docroot): + with pytest.raises(SuspiciousFileOperation, match="outside the docroot"): + self.storage.rclone_sync(tmp_dir, "files") diff --git a/readthedocs/storage/rclone.py b/readthedocs/storage/rclone.py index db6ebe46ded..c7b6d7ae915 100644 --- a/readthedocs/storage/rclone.py +++ b/readthedocs/storage/rclone.py @@ -8,11 +8,13 @@ import subprocess import structlog +from django.utils._os import safe_join as safe_join_fs +from storages.utils import safe_join log = structlog.get_logger(__name__) -class RClone: +class BaseRClone: """ RClone base class. @@ -24,8 +26,7 @@ class RClone: This base class allows you to use the local file system as remote. :param remote_type: You can see the full list of supported providers at - https://rclone.org/#providers. Defaults to use the local filesystem - (https://rclone.org/local/). + https://rclone.org/#providers. :param rclone_bin: Binary name or path to the rclone binary. Defaults to ``rclone``. :param default_options: Options passed to the rclone command. @@ -33,10 +34,10 @@ class RClone: Useful to pass secrets to the command, since all arguments and options will be logged. """ - remote_type = "local" + remote_type = None rclone_bin = "rclone" default_options = [ - # Number of file transfers to run in parallel. + # Number of file transfers to run in parallel. "--transfers=8", # Skip based on checksum (if available) & size, not mod-time & size. "--checksum", @@ -44,6 +45,11 @@ class RClone: ] env_vars = {} + # pylint: disable=no-self-use + def _build_target_path(self, path): + """Build the final target path for the remote.""" + return path + def build_target(self, path): """ Build the proper target using the current remote type. @@ -54,6 +60,7 @@ def build_target(self, path): :param path: Path to the remote target. """ + path = self._build_target_path(path) return f":{self.remote_type}:{path}" def execute(self, action, args, options=None): @@ -74,7 +81,6 @@ def execute(self, action, args, options=None): *args, ] env = os.environ.copy() - # env = {} env.update(self.env_vars) log.info("Executing rclone command.", command=command) log.debug("env", env=env) @@ -102,11 +108,31 @@ def sync(self, source, destination): :params source: Local path to the source directory. :params destination: Remote path to the destination directory. """ - # TODO: check if source can be a symlink. return self.execute("sync", args=[source, self.build_target(destination)]) -class RCloneS3Remote(RClone): +class RCloneLocal(BaseRClone): + + """ + RClone remote implementation for the local file system. + + Used for local testing only. + + See https://rclone.org/local/. + + :param location: Root directory where the files will be stored. + """ + + remote_type = "local" + + def __init__(self, location): + self.location = location + + def _build_target_path(self, path): + return safe_join_fs(self.location, path) + + +class RCloneS3Remote(BaseRClone): """ RClone remote implementation for S3. @@ -139,11 +165,6 @@ def __init__( acl=None, endpoint=None, ): - super().__init__() - - # When using minion, the region is set to None. - region = region or "" - # rclone S3 options passed as env vars. # https://rclone.org/s3/#standard-options. self.env_vars = { @@ -159,7 +180,6 @@ def __init__( self.env_vars["RCLONE_S3_ENDPOINT"] = endpoint self.bucket_name = bucket_name - def build_target(self, path): + def _build_target_path(self, path): """Overridden to prepend the bucket name to the path.""" - path = f"{self.bucket_name}/{path}" - return super().build_target(path) + return safe_join(self.bucket_name, path) diff --git a/readthedocs/storage/s3_storage.py b/readthedocs/storage/s3_storage.py index f6b06b4a27c..d0d8e42cccb 100644 --- a/readthedocs/storage/s3_storage.py +++ b/readthedocs/storage/s3_storage.py @@ -22,7 +22,6 @@ class S3BuildMediaStorageMixin(BuildMediaStorageMixin, S3Boto3Storage): - @cached_property def _rclone(self): provider = "AWS" @@ -35,7 +34,7 @@ def _rclone(self): bucket_name=self.bucket_name, access_key_id=self.access_key, secret_acces_key=self.secret_key, - region=self.region_name, + region=self.region_name or "", acl=self.default_acl, endpoint=self.endpoint_url, provider=provider, From 6102976b713a1e1cf923adbf5f2bdb4c25ffbee5 Mon Sep 17 00:00:00 2001 From: Santos Gallegos Date: Mon, 16 Jan 2023 12:17:48 -0500 Subject: [PATCH 4/6] Updates from review --- readthedocs/builds/storage.py | 2 +- readthedocs/projects/tasks/builds.py | 2 +- .../rtd_tests/tests/test_build_storage.py | 10 +++---- readthedocs/storage/rclone.py | 26 ++++++++++--------- 4 files changed, 21 insertions(+), 19 deletions(-) diff --git a/readthedocs/builds/storage.py b/readthedocs/builds/storage.py index 4c057fcc02b..b85bb6641dc 100644 --- a/readthedocs/builds/storage.py +++ b/readthedocs/builds/storage.py @@ -159,7 +159,7 @@ def sync_directory(self, source, destination): def _rclone(self): raise NotImplementedError - def rclone_sync(self, source, destination): + def rclone_sync_directory(self, source, destination): """Sync a directory recursively to storage using rclone sync.""" if destination in ("", "/"): raise SuspiciousFileOperation("Syncing all storage cannot be right") diff --git a/readthedocs/projects/tasks/builds.py b/readthedocs/projects/tasks/builds.py index af1a3977a48..b060ad7fab8 100644 --- a/readthedocs/projects/tasks/builds.py +++ b/readthedocs/projects/tasks/builds.py @@ -834,7 +834,7 @@ def store_build_artifacts( ) try: if self.data.project.has_feature(Feature.USE_RCLONE): - build_media_storage.rclone_sync(from_path, to_path) + build_media_storage.rclone_sync_directory(from_path, to_path) else: build_media_storage.sync_directory(from_path, to_path) except Exception: diff --git a/readthedocs/rtd_tests/tests/test_build_storage.py b/readthedocs/rtd_tests/tests/test_build_storage.py index 1344cb1eae9..d86450a9c2d 100644 --- a/readthedocs/rtd_tests/tests/test_build_storage.py +++ b/readthedocs/rtd_tests/tests/test_build_storage.py @@ -134,7 +134,7 @@ def test_rclone_sync(self): "test.html", ] with override_settings(DOCROOT=tmp_files_dir): - self.storage.rclone_sync(tmp_files_dir, storage_dir) + self.storage.rclone_sync_directory(tmp_files_dir, storage_dir) self.assertFileTree(storage_dir, tree) tree = [ @@ -144,7 +144,7 @@ def test_rclone_sync(self): ] (tmp_files_dir / "api.fjson").unlink() with override_settings(DOCROOT=tmp_files_dir): - self.storage.rclone_sync(tmp_files_dir, storage_dir) + self.storage.rclone_sync_directory(tmp_files_dir, storage_dir) self.assertFileTree(storage_dir, tree) tree = [ @@ -153,7 +153,7 @@ def test_rclone_sync(self): ] shutil.rmtree(tmp_files_dir / "api") with override_settings(DOCROOT=tmp_files_dir): - self.storage.rclone_sync(tmp_files_dir, storage_dir) + self.storage.rclone_sync_directory(tmp_files_dir, storage_dir) self.assertFileTree(storage_dir, tree) @pytest.mark.skip( @@ -166,7 +166,7 @@ def test_rclone_sync_source_symlink(self): with override_settings(DOCROOT=tmp_dir): with pytest.raises(SuspiciousFileOperation, match="symbolic link"): - self.storage.rclone_sync(tmp_symlink_dir, "files") + self.storage.rclone_sync_directory(tmp_symlink_dir, "files") @pytest.mark.skip( "Waiting for https://github.com/readthedocs/readthedocs.org/pull/9890" @@ -178,4 +178,4 @@ def test_rclone_sync_source_outside_docroot(self): with override_settings(DOCROOT=tmp_docroot): with pytest.raises(SuspiciousFileOperation, match="outside the docroot"): - self.storage.rclone_sync(tmp_dir, "files") + self.storage.rclone_sync_directory(tmp_dir, "files") diff --git a/readthedocs/storage/rclone.py b/readthedocs/storage/rclone.py index c7b6d7ae915..2ed5850668e 100644 --- a/readthedocs/storage/rclone.py +++ b/readthedocs/storage/rclone.py @@ -31,13 +31,15 @@ class BaseRClone: Defaults to ``rclone``. :param default_options: Options passed to the rclone command. :parm env_vars: Environment variables used when executing the rclone command. - Useful to pass secrets to the command, since all arguments and options will be logged. + Useful to pass secrets to the ``rclone` command, since all arguments and + options will be logged. """ remote_type = None rclone_bin = "rclone" default_options = [ # Number of file transfers to run in parallel. + # Default value is 4. "--transfers=8", # Skip based on checksum (if available) & size, not mod-time & size. "--checksum", @@ -46,13 +48,13 @@ class BaseRClone: env_vars = {} # pylint: disable=no-self-use - def _build_target_path(self, path): - """Build the final target path for the remote.""" + def _get_target_path(self, path): + """Get the final target path for the remote.""" return path - def build_target(self, path): + def get_target(self, path): """ - Build the proper target using the current remote type. + Get the proper target using the current remote type. We start the remote with `:` to create it on the fly, instead of having to create a configuration file. @@ -60,21 +62,21 @@ def build_target(self, path): :param path: Path to the remote target. """ - path = self._build_target_path(path) + path = self._get_target_path(path) return f":{self.remote_type}:{path}" - def execute(self, action, args, options=None): + def execute(self, subcommand, args, options=None): """ Execute an rclone subcommand. - :param action: Name of the subcommand. + :param subcommand: Name of the subcommand. :param list args: List of positional arguments passed the to command. :param list options: List of options passed to the command. """ options = options or [] command = [ self.rclone_bin, - action, + subcommand, *self.default_options, *options, "--", @@ -108,7 +110,7 @@ def sync(self, source, destination): :params source: Local path to the source directory. :params destination: Remote path to the destination directory. """ - return self.execute("sync", args=[source, self.build_target(destination)]) + return self.execute("sync", args=[source, self.get_target(destination)]) class RCloneLocal(BaseRClone): @@ -128,7 +130,7 @@ class RCloneLocal(BaseRClone): def __init__(self, location): self.location = location - def _build_target_path(self, path): + def _get_target_path(self, path): return safe_join_fs(self.location, path) @@ -180,6 +182,6 @@ def __init__( self.env_vars["RCLONE_S3_ENDPOINT"] = endpoint self.bucket_name = bucket_name - def _build_target_path(self, path): + def _get_target_path(self, path): """Overridden to prepend the bucket name to the path.""" return safe_join(self.bucket_name, path) From 3580d4d288dd69b38419235c0af25141d5a51726 Mon Sep 17 00:00:00 2001 From: Santos Gallegos Date: Tue, 24 Jan 2023 14:40:52 -0500 Subject: [PATCH 5/6] Updates from review --- readthedocs/storage/rclone.py | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/readthedocs/storage/rclone.py b/readthedocs/storage/rclone.py index 2ed5850668e..3552e2c3f86 100644 --- a/readthedocs/storage/rclone.py +++ b/readthedocs/storage/rclone.py @@ -47,10 +47,16 @@ class BaseRClone: ] env_vars = {} - # pylint: disable=no-self-use def _get_target_path(self, path): - """Get the final target path for the remote.""" - return path + """ + Get the final target path for the remote. + + .. note:: + + This doesn't include the remote type, + this is just the destination path. + """ + raise NotImplementedError def get_target(self, path): """ @@ -85,16 +91,15 @@ def execute(self, subcommand, args, options=None): env = os.environ.copy() env.update(self.env_vars) log.info("Executing rclone command.", command=command) - log.debug("env", env=env) + log.debug("Executing rclone commmad.", env=env) result = subprocess.run( command, capture_output=True, env=env, - # TODO: Fail or let the caller decide what to do? check=True, ) log.debug( - "Result.", + "rclone execution finished.", stdout=result.stdout.decode(), stderr=result.stderr.decode(), exit_code=result.returncode, @@ -139,7 +144,7 @@ class RCloneS3Remote(BaseRClone): """ RClone remote implementation for S3. - All secrets will be passed as environ variables. + All secrets will be passed as environ variables to the rclone command. See https://rclone.org/s3/. From 3c474228060b50a0cfa9026d293ad753ae1e0b09 Mon Sep 17 00:00:00 2001 From: Santos Gallegos Date: Tue, 24 Jan 2023 16:47:21 -0500 Subject: [PATCH 6/6] Make use of checks from #9890 --- readthedocs/builds/storage.py | 3 +-- readthedocs/rtd_tests/tests/test_build_storage.py | 6 ------ 2 files changed, 1 insertion(+), 8 deletions(-) diff --git a/readthedocs/builds/storage.py b/readthedocs/builds/storage.py index 6c6ca68c7a6..b58380e28be 100644 --- a/readthedocs/builds/storage.py +++ b/readthedocs/builds/storage.py @@ -183,8 +183,7 @@ def rclone_sync_directory(self, source, destination): if destination in ("", "/"): raise SuspiciousFileOperation("Syncing all storage cannot be right") - # TODO: borrow the symlink check from #9890 when merged. - # self._check_suspicious_path(source) + self._check_suspicious_path(source) return self._rclone.sync(source, destination) def join(self, directory, filepath): diff --git a/readthedocs/rtd_tests/tests/test_build_storage.py b/readthedocs/rtd_tests/tests/test_build_storage.py index 01a2a1e3fcb..95b4bed50ec 100644 --- a/readthedocs/rtd_tests/tests/test_build_storage.py +++ b/readthedocs/rtd_tests/tests/test_build_storage.py @@ -192,9 +192,6 @@ def test_rclone_sync(self): self.storage.rclone_sync_directory(tmp_files_dir, storage_dir) self.assertFileTree(storage_dir, tree) - @pytest.mark.skip( - "Waiting for https://github.com/readthedocs/readthedocs.org/pull/9890" - ) def test_rclone_sync_source_symlink(self): tmp_dir = Path(tempfile.mkdtemp()) tmp_symlink_dir = Path(tempfile.mkdtemp()) / "files" @@ -204,9 +201,6 @@ def test_rclone_sync_source_symlink(self): with pytest.raises(SuspiciousFileOperation, match="symbolic link"): self.storage.rclone_sync_directory(tmp_symlink_dir, "files") - @pytest.mark.skip( - "Waiting for https://github.com/readthedocs/readthedocs.org/pull/9890" - ) def test_rclone_sync_source_outside_docroot(self): tmp_dir = Path(tempfile.mkdtemp()) tmp_docroot = Path(tempfile.mkdtemp()) / "docroot"