From 9d38c579aa86a7542e23b98a151cb64069ed5936 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 18 Jun 2020 16:04:28 -0500 Subject: [PATCH] REGR: Fixed reading from public S3 buckets with credentials Closes https://github.com/pandas-dev/pandas/issues/34626 This works in 1.0.4 I think, so no whatsnew. --- pandas/io/s3.py | 8 ++++---- pandas/tests/io/test_s3.py | 22 ++++++++++++++++++++++ 2 files changed, 26 insertions(+), 4 deletions(-) diff --git a/pandas/io/s3.py b/pandas/io/s3.py index 329c861d2386a..62dba9cffb32f 100644 --- a/pandas/io/s3.py +++ b/pandas/io/s3.py @@ -16,8 +16,8 @@ def _strip_schema(url): return result.netloc + result.path -def get_fs(): - return s3fs.S3FileSystem(anon=False) +def get_fs(anon=False): + return s3fs.S3FileSystem(anon=anon) def get_file_and_filesystem( @@ -31,14 +31,14 @@ def get_file_and_filesystem( fs = get_fs() try: file = fs.open(_strip_schema(filepath_or_buffer), mode) - except (FileNotFoundError, NoCredentialsError): + except (FileNotFoundError, NoCredentialsError, PermissionError): # boto3 has troubles when trying to access a public file # when credentialed... # An OSError is raised if you have credentials, but they # aren't valid for that bucket. # A NoCredentialsError is raised if you don't have creds # for that bucket. - fs = get_fs() + fs = get_fs(anon=True) file = fs.open(_strip_schema(filepath_or_buffer), mode) return file, fs diff --git a/pandas/tests/io/test_s3.py b/pandas/tests/io/test_s3.py index 04c6979596eca..e7635a8ba0e35 100644 --- a/pandas/tests/io/test_s3.py +++ b/pandas/tests/io/test_s3.py @@ -1,8 +1,10 @@ from io import BytesIO +import os import pytest from pandas import read_csv +import pandas._testing as tm from pandas.io.common import is_s3_url @@ -23,3 +25,23 @@ def test_streaming_s3_objects(): for el in data: body = StreamingBody(BytesIO(el), content_length=len(el)) read_csv(body) + + +@tm.network +@pytest.mark.slow +def test_read_s3_public(): + # ensure we can read from a public bucket with credentials + pytest.importorskip("s3fs") + + with tm.ensure_safe_environment_variables(): + # temporary workaround as moto fails for botocore >= 1.11 otherwise, + # see https://github.com/spulec/moto/issues/1924 & 1952 + os.environ.setdefault("AWS_ACCESS_KEY_ID", "foobar_key") + os.environ.setdefault("AWS_SECRET_ACCESS_KEY", "foobar_secret") + df = read_csv( + "s3://gdelt-open-data/events/20130420.export.csv", + nrows=5, + sep="\t", + header=None, + ) + assert len(df) == 5