-
-
Notifications
You must be signed in to change notification settings - Fork 3.6k
Build: use rclone for sync #9842
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
05d44e3
3427b61
9d29550
6102976
3580d4d
8d81c89
3c47422
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,192 @@ | ||
""" | ||
Wrapper around the rclone command. | ||
|
||
See https://rclone.org/docs. | ||
""" | ||
|
||
import os | ||
import subprocess | ||
|
||
import structlog | ||
from django.utils._os import safe_join as safe_join_fs | ||
from storages.utils import safe_join | ||
|
||
log = structlog.get_logger(__name__) | ||
|
||
|
||
class BaseRClone: | ||
|
||
""" | ||
RClone base class. | ||
|
||
This class allows you to interact with an rclone remote without | ||
a configuration file, the remote declaration and its options | ||
are passed in the command itself. | ||
|
||
This base class allows you to use the local file system as remote. | ||
|
||
:param remote_type: You can see the full list of supported providers at | ||
https://rclone.org/#providers. | ||
:param rclone_bin: Binary name or path to the rclone binary. | ||
Defaults to ``rclone``. | ||
:param default_options: Options passed to the rclone command. | ||
:parm env_vars: Environment variables used when executing the rclone command. | ||
Useful to pass secrets to the ``rclone` command, since all arguments and | ||
options will be logged. | ||
""" | ||
|
||
remote_type = None | ||
rclone_bin = "rclone" | ||
default_options = [ | ||
# Number of file transfers to run in parallel. | ||
# Default value is 4. | ||
"--transfers=8", | ||
# Skip based on checksum (if available) & size, not mod-time & size. | ||
"--checksum", | ||
"--verbose", | ||
humitos marked this conversation as resolved.
Show resolved
Hide resolved
|
||
] | ||
env_vars = {} | ||
|
||
def _get_target_path(self, path): | ||
""" | ||
Get the final target path for the remote. | ||
|
||
.. note:: | ||
|
||
This doesn't include the remote type, | ||
this is just the destination path. | ||
""" | ||
raise NotImplementedError | ||
|
||
def get_target(self, path): | ||
""" | ||
Get the proper target using the current remote type. | ||
|
||
We start the remote with `:` to create it on the fly, | ||
instead of having to create a configuration file. | ||
See https://rclone.org/docs/#backend-path-to-dir. | ||
|
||
:param path: Path to the remote target. | ||
""" | ||
path = self._get_target_path(path) | ||
return f":{self.remote_type}:{path}" | ||
|
||
def execute(self, subcommand, args, options=None): | ||
""" | ||
Execute an rclone subcommand. | ||
|
||
:param subcommand: Name of the subcommand. | ||
:param list args: List of positional arguments passed the to command. | ||
:param list options: List of options passed to the command. | ||
humitos marked this conversation as resolved.
Show resolved
Hide resolved
|
||
""" | ||
options = options or [] | ||
command = [ | ||
self.rclone_bin, | ||
subcommand, | ||
*self.default_options, | ||
*options, | ||
"--", | ||
*args, | ||
] | ||
env = os.environ.copy() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is there any reason why we want to use the same environment than where the process is running? can't we just pass only the variable from There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. When you pass additional env vars, they override ALL the env variables, this means that other env vars like PATH will be undefined. |
||
env.update(self.env_vars) | ||
log.info("Executing rclone command.", command=command) | ||
log.debug("Executing rclone commmad.", env=env) | ||
result = subprocess.run( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
I think it's possible since we have the files to be uploaded in the host, and we only need to pass the correct environment variables only to that particular command. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We discussed this with Eric some weeks ago, to make it secure it should be run from another container, otherwise the user could manipulate the executable to expose the secret env vars we pass to it. I'll be +1 on exploring that idea later. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good point! 💯 We should have some integrity checking before executing or similar. Sounds good to explore in a future iteration 👍🏼 |
||
command, | ||
capture_output=True, | ||
env=env, | ||
check=True, | ||
) | ||
log.debug( | ||
"rclone execution finished.", | ||
stdout=result.stdout.decode(), | ||
stderr=result.stderr.decode(), | ||
exit_code=result.returncode, | ||
) | ||
return result | ||
|
||
def sync(self, source, destination): | ||
""" | ||
Run the `rclone sync` command. | ||
|
||
See https://rclone.org/commands/rclone_sync/. | ||
|
||
:params source: Local path to the source directory. | ||
:params destination: Remote path to the destination directory. | ||
""" | ||
return self.execute("sync", args=[source, self.get_target(destination)]) | ||
|
||
|
||
class RCloneLocal(BaseRClone): | ||
|
||
""" | ||
RClone remote implementation for the local file system. | ||
humitos marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
Used for local testing only. | ||
humitos marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
See https://rclone.org/local/. | ||
|
||
:param location: Root directory where the files will be stored. | ||
""" | ||
|
||
remote_type = "local" | ||
|
||
def __init__(self, location): | ||
self.location = location | ||
|
||
def _get_target_path(self, path): | ||
return safe_join_fs(self.location, path) | ||
|
||
|
||
class RCloneS3Remote(BaseRClone): | ||
|
||
""" | ||
RClone remote implementation for S3. | ||
|
||
All secrets will be passed as environ variables to the rclone command. | ||
|
||
See https://rclone.org/s3/. | ||
|
||
:params bucket_name: Name of the S3 bucket. | ||
:params access_key_id: AWS access key id. | ||
:params secret_acces_key: AWS secret access key. | ||
:params region: AWS region. | ||
:params provider: S3 provider, defaults to ``AWS``. | ||
Useful to use Minio during development. | ||
See https://rclone.org/s3/#s3-provider. | ||
:param acl: Canned ACL used when creating buckets and storing or copying objects. | ||
See https://rclone.org/s3/#s3-acl. | ||
stsewd marked this conversation as resolved.
Show resolved
Hide resolved
|
||
:param endpoint: Custom S3 endpoint, useful for development. | ||
""" | ||
|
||
remote_type = "s3" | ||
|
||
def __init__( | ||
self, | ||
bucket_name, | ||
access_key_id, | ||
secret_acces_key, | ||
region, | ||
provider="AWS", | ||
acl=None, | ||
endpoint=None, | ||
): | ||
# rclone S3 options passed as env vars. | ||
# https://rclone.org/s3/#standard-options. | ||
self.env_vars = { | ||
"RCLONE_S3_PROVIDER": provider, | ||
"RCLONE_S3_ACCESS_KEY_ID": access_key_id, | ||
"RCLONE_S3_SECRET_ACCESS_KEY": secret_acces_key, | ||
"RCLONE_S3_REGION": region, | ||
"RCLONE_S3_LOCATION_CONSTRAINT": region, | ||
} | ||
if acl: | ||
self.env_vars["RCLONE_S3_ACL"] = acl | ||
if endpoint: | ||
self.env_vars["RCLONE_S3_ENDPOINT"] = endpoint | ||
self.bucket_name = bucket_name | ||
|
||
def _get_target_path(self, path): | ||
"""Overridden to prepend the bucket name to the path.""" | ||
return safe_join(self.bucket_name, path) |
Uh oh!
There was an error while loading. Please reload this page.