diff --git a/pandas/io/s3.py b/pandas/io/s3.py index 7e0a37e8cba20..5c0efefb52b06 100644 --- a/pandas/io/s3.py +++ b/pandas/io/s3.py @@ -1,5 +1,6 @@ """ s3 support for remote file interactivity """ -from typing import IO, Any, Optional, Tuple +import os +from typing import IO, Any, Dict, Optional, Tuple from urllib.parse import urlparse as parse_url from pandas.compat._optional import import_optional_dependency @@ -25,7 +26,15 @@ def get_file_and_filesystem( if mode is None: mode = "rb" - fs = s3fs.S3FileSystem(anon=False) + # Support customised S3 servers endpoint URL via environment variable + # The S3_ENDPOINT should be the complete URL to S3 service following + # the format: http(s)://{host}:{port}. If S3_ENDPOINT is undefined, it will + # fallback to use the default AWS S3 endpoint as determined by boto3. + s3_endpoint = os.environ.get("S3_ENDPOINT") + client_kwargs: Optional[Dict[str, str]] = { + "endpoint_url": s3_endpoint + } if s3_endpoint else None + fs = s3fs.S3FileSystem(anon=False, client_kwargs=client_kwargs) try: file = fs.open(_strip_schema(filepath_or_buffer), mode) except (FileNotFoundError, NoCredentialsError): @@ -35,7 +44,7 @@ def get_file_and_filesystem( # aren't valid for that bucket. # A NoCredentialsError is raised if you don't have creds # for that bucket. - fs = s3fs.S3FileSystem(anon=True) + fs = s3fs.S3FileSystem(anon=True, client_kwargs=client_kwargs) file = fs.open(_strip_schema(filepath_or_buffer), mode) return file, fs