Skip to content

Commit 6c48d12

Browse files
author
Tom Augspurger
committed
Merge pull request #10604 from stephen-hoover/more-permissive-s3-reads
ENH: More permissive S3 reading
2 parents 751164d + eefa29f commit 6c48d12

File tree

3 files changed

+13
-3
lines changed

3 files changed

+13
-3
lines changed

doc/source/whatsnew/v0.17.0.txt

+2
Original file line numberDiff line numberDiff line change
@@ -389,3 +389,5 @@ Bug Fixes
389389
- Bug in operator equal on Index not being consistent with Series (:issue:`9947`)
390390
- Reading "famafrench" data via ``DataReader`` results in HTTP 404 error because of the website url is changed (:issue:`10591`).
391391
- Bug in `read_msgpack` where DataFrame to decode has duplicate column names (:issue:`9618`)
392+
393+
- Bug in ``io.common.get_filepath_or_buffer`` which caused reading of valid S3 files to fail if the bucket also contained keys for which the user does not have read permission (:issue:`10604`)

pandas/io/common.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,7 @@ def get_filepath_or_buffer(filepath_or_buffer, encoding=None):
151151
except boto.exception.NoAuthHandlerFound:
152152
conn = boto.connect_s3(anon=True)
153153

154-
b = conn.get_bucket(parsed_url.netloc)
154+
b = conn.get_bucket(parsed_url.netloc, validate=False)
155155
k = boto.s3.key.Key(b)
156156
k.key = parsed_url.path
157157
filepath_or_buffer = BytesIO(k.get_contents_as_string(

pandas/io/tests/test_parsers.py

+10-2
Original file line numberDiff line numberDiff line change
@@ -4132,16 +4132,24 @@ def test_parse_public_s3_bucket(self):
41324132
nt.assert_false(df.empty)
41334133
tm.assert_frame_equal(pd.read_csv(tm.get_data_path('tips.csv')), df)
41344134

4135+
# Read public file from bucket with not-public contents
4136+
df = pd.read_csv('s3://cant_get_it/tips.csv')
4137+
nt.assert_true(isinstance(df, pd.DataFrame))
4138+
nt.assert_false(df.empty)
4139+
tm.assert_frame_equal(pd.read_csv(tm.get_data_path('tips.csv')), df)
4140+
41354141
@tm.network
41364142
def test_s3_fails(self):
41374143
import boto
41384144
with tm.assertRaisesRegexp(boto.exception.S3ResponseError,
41394145
'S3ResponseError: 404 Not Found'):
41404146
pd.read_csv('s3://nyqpug/asdf.csv')
41414147

4148+
# Receive a permission error when trying to read a private bucket.
4149+
# It's irrelevant here that this isn't actually a table.
41424150
with tm.assertRaisesRegexp(boto.exception.S3ResponseError,
4143-
'S3ResponseError: 403 Forbidden'):
4144-
pd.read_csv('s3://cant_get_it/tips.csv')
4151+
'S3ResponseError: 403 Forbidden'):
4152+
pd.read_csv('s3://cant_get_it/')
41454153

41464154

41474155
def assert_same_values_and_dtype(res, exp):

0 commit comments

Comments
 (0)