diff --git a/doc/source/whatsnew/v0.17.0.txt b/doc/source/whatsnew/v0.17.0.txt index 6ab299eb70eb5..b6e166619c1e8 100644 --- a/doc/source/whatsnew/v0.17.0.txt +++ b/doc/source/whatsnew/v0.17.0.txt @@ -389,3 +389,5 @@ Bug Fixes - Reading "famafrench" data via ``DataReader`` results in HTTP 404 error because of the website url is changed (:issue:`10591`). - Bug in `read_msgpack` where DataFrame to decode has duplicate column names (:issue:`9618`) + +- Bug in ``io.common.get_filepath_or_buffer`` which caused reading of valid S3 files to fail if the bucket also contained keys for which the user does not have read permission (:issue:`10604`) diff --git a/pandas/io/common.py b/pandas/io/common.py index 65cfdff1df14b..b341679176256 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -151,7 +151,7 @@ def get_filepath_or_buffer(filepath_or_buffer, encoding=None): except boto.exception.NoAuthHandlerFound: conn = boto.connect_s3(anon=True) - b = conn.get_bucket(parsed_url.netloc) + b = conn.get_bucket(parsed_url.netloc, validate=False) k = boto.s3.key.Key(b) k.key = parsed_url.path filepath_or_buffer = BytesIO(k.get_contents_as_string( diff --git a/pandas/io/tests/test_parsers.py b/pandas/io/tests/test_parsers.py index 0f0486e8ea596..a4940ebdd6079 100755 --- a/pandas/io/tests/test_parsers.py +++ b/pandas/io/tests/test_parsers.py @@ -4075,6 +4075,12 @@ def test_parse_public_s3_bucket(self): nt.assert_false(df.empty) tm.assert_frame_equal(pd.read_csv(tm.get_data_path('tips.csv')), df) + # Read public file from bucket with not-public contents + df = pd.read_csv('s3://cant_get_it/tips.csv') + nt.assert_true(isinstance(df, pd.DataFrame)) + nt.assert_false(df.empty) + tm.assert_frame_equal(pd.read_csv(tm.get_data_path('tips.csv')), df) + @tm.network def test_s3_fails(self): import boto @@ -4082,9 +4088,11 @@ def test_s3_fails(self): 'S3ResponseError: 404 Not Found'): pd.read_csv('s3://nyqpug/asdf.csv') + # Receive a permission error when trying to read a private bucket. + # It's irrelevant here that this isn't actually a table. with tm.assertRaisesRegexp(boto.exception.S3ResponseError, - 'S3ResponseError: 403 Forbidden'): - pd.read_csv('s3://cant_get_it/tips.csv') + 'S3ResponseError: 403 Forbidden'): + pd.read_csv('s3://cant_get_it/') def assert_same_values_and_dtype(res, exp):