From f9da50a538d39531fab226caa87be66ddd26526b Mon Sep 17 00:00:00 2001 From: xieqihui Date: Thu, 17 Oct 2019 18:32:46 +0800 Subject: [PATCH 1/5] Support customised S3 servers endpoint URL --- pandas/io/s3.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/pandas/io/s3.py b/pandas/io/s3.py index 7e0a37e8cba20..bc2a98288f532 100644 --- a/pandas/io/s3.py +++ b/pandas/io/s3.py @@ -1,4 +1,5 @@ """ s3 support for remote file interactivity """ +import os from typing import IO, Any, Optional, Tuple from urllib.parse import urlparse as parse_url @@ -25,7 +26,15 @@ def get_file_and_filesystem( if mode is None: mode = "rb" - fs = s3fs.S3FileSystem(anon=False) + # Support customised S3 servers endpoint URL via environment variable + # The S3_ENDPOINT should be the complete URL to S3 service following + # the format: http(s)://{host}:{port} + s3_endpoint = os.environ.get('S3_ENDPOINT') + if s3_endpoint: + client_kwargs = {'endpoint_url': s3_endpoint} + else: + client_kwargs = None + fs = s3fs.S3FileSystem(anon=False, client_kwargs=client_kwargs) try: file = fs.open(_strip_schema(filepath_or_buffer), mode) except (FileNotFoundError, NoCredentialsError): @@ -35,7 +44,7 @@ def get_file_and_filesystem( # aren't valid for that bucket. # A NoCredentialsError is raised if you don't have creds # for that bucket. - fs = s3fs.S3FileSystem(anon=True) + fs = s3fs.S3FileSystem(anon=True, client_kwargs=client_kwargs) file = fs.open(_strip_schema(filepath_or_buffer), mode) return file, fs From febbdfe655346dead0c6078195e647888b49894a Mon Sep 17 00:00:00 2001 From: xieqihui Date: Fri, 18 Oct 2019 10:08:40 +0800 Subject: [PATCH 2/5] add type annotation and format s3.py --- pandas/io/s3.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/pandas/io/s3.py b/pandas/io/s3.py index bc2a98288f532..b7698e9bbb68b 100644 --- a/pandas/io/s3.py +++ b/pandas/io/s3.py @@ -29,11 +29,10 @@ def get_file_and_filesystem( # Support customised S3 servers endpoint URL via environment variable # The S3_ENDPOINT should be the complete URL to S3 service following # the format: http(s)://{host}:{port} - s3_endpoint = os.environ.get('S3_ENDPOINT') - if s3_endpoint: - client_kwargs = {'endpoint_url': s3_endpoint} - else: - client_kwargs = None + s3_endpoint = os.environ.get("S3_ENDPOINT") + client_kwargs: Optional[Dict[str, str]] = { + "endpoint_url": s3_endpoint + } if s3_endpoint else None fs = s3fs.S3FileSystem(anon=False, client_kwargs=client_kwargs) try: file = fs.open(_strip_schema(filepath_or_buffer), mode) From 26885edc0d5d359588d8278046a66d0a54d1e683 Mon Sep 17 00:00:00 2001 From: xieqihui Date: Fri, 18 Oct 2019 10:19:38 +0800 Subject: [PATCH 3/5] Explain s3_endpoint --- pandas/io/s3.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/io/s3.py b/pandas/io/s3.py index b7698e9bbb68b..12d851c470001 100644 --- a/pandas/io/s3.py +++ b/pandas/io/s3.py @@ -28,7 +28,8 @@ def get_file_and_filesystem( # Support customised S3 servers endpoint URL via environment variable # The S3_ENDPOINT should be the complete URL to S3 service following - # the format: http(s)://{host}:{port} + # the format: http(s)://{host}:{port}. If S3_ENDPOINT is undefined, it will + # fallback to use the default AWS S3 endpoint as determined by boto3. s3_endpoint = os.environ.get("S3_ENDPOINT") client_kwargs: Optional[Dict[str, str]] = { "endpoint_url": s3_endpoint From 690752b6479c83762cfb433ac024b1b5c4f5945a Mon Sep 17 00:00:00 2001 From: xieqihui Date: Fri, 18 Oct 2019 11:05:00 +0800 Subject: [PATCH 4/5] fix type annotation in s3.py --- pandas/io/s3.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/s3.py b/pandas/io/s3.py index 12d851c470001..4845063829afa 100644 --- a/pandas/io/s3.py +++ b/pandas/io/s3.py @@ -1,6 +1,6 @@ """ s3 support for remote file interactivity """ import os -from typing import IO, Any, Optional, Tuple +from typing import IO, Any, Optional, Tuple, Dict from urllib.parse import urlparse as parse_url from pandas.compat._optional import import_optional_dependency From 1f24b9820b3f3d61f3753aa58ec87d7a19977080 Mon Sep 17 00:00:00 2001 From: xieqihui Date: Fri, 18 Oct 2019 14:08:20 +0800 Subject: [PATCH 5/5] isort s3.py --- pandas/io/s3.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/s3.py b/pandas/io/s3.py index 4845063829afa..5c0efefb52b06 100644 --- a/pandas/io/s3.py +++ b/pandas/io/s3.py @@ -1,6 +1,6 @@ """ s3 support for remote file interactivity """ import os -from typing import IO, Any, Optional, Tuple, Dict +from typing import IO, Any, Dict, Optional, Tuple from urllib.parse import urlparse as parse_url from pandas.compat._optional import import_optional_dependency