Skip to content

Commit 9d38c57

Browse files
committed
REGR: Fixed reading from public S3 buckets with credentials
Closes pandas-dev#34626 This works in 1.0.4 I think, so no whatsnew.
1 parent 3c959fc commit 9d38c57

File tree

2 files changed

+26
-4
lines changed

2 files changed

+26
-4
lines changed

pandas/io/s3.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,8 @@ def _strip_schema(url):
1616
return result.netloc + result.path
1717

1818

19-
def get_fs():
20-
return s3fs.S3FileSystem(anon=False)
19+
def get_fs(anon=False):
20+
return s3fs.S3FileSystem(anon=anon)
2121

2222

2323
def get_file_and_filesystem(
@@ -31,14 +31,14 @@ def get_file_and_filesystem(
3131
fs = get_fs()
3232
try:
3333
file = fs.open(_strip_schema(filepath_or_buffer), mode)
34-
except (FileNotFoundError, NoCredentialsError):
34+
except (FileNotFoundError, NoCredentialsError, PermissionError):
3535
# boto3 has troubles when trying to access a public file
3636
# when credentialed...
3737
# An OSError is raised if you have credentials, but they
3838
# aren't valid for that bucket.
3939
# A NoCredentialsError is raised if you don't have creds
4040
# for that bucket.
41-
fs = get_fs()
41+
fs = get_fs(anon=True)
4242
file = fs.open(_strip_schema(filepath_or_buffer), mode)
4343
return file, fs
4444

pandas/tests/io/test_s3.py

+22
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
11
from io import BytesIO
2+
import os
23

34
import pytest
45

56
from pandas import read_csv
7+
import pandas._testing as tm
68

79
from pandas.io.common import is_s3_url
810

@@ -23,3 +25,23 @@ def test_streaming_s3_objects():
2325
for el in data:
2426
body = StreamingBody(BytesIO(el), content_length=len(el))
2527
read_csv(body)
28+
29+
30+
@tm.network
31+
@pytest.mark.slow
32+
def test_read_s3_public():
33+
# ensure we can read from a public bucket with credentials
34+
pytest.importorskip("s3fs")
35+
36+
with tm.ensure_safe_environment_variables():
37+
# temporary workaround as moto fails for botocore >= 1.11 otherwise,
38+
# see https://github.com/spulec/moto/issues/1924 & 1952
39+
os.environ.setdefault("AWS_ACCESS_KEY_ID", "foobar_key")
40+
os.environ.setdefault("AWS_SECRET_ACCESS_KEY", "foobar_secret")
41+
df = read_csv(
42+
"s3://gdelt-open-data/events/20130420.export.csv",
43+
nrows=5,
44+
sep="\t",
45+
header=None,
46+
)
47+
assert len(df) == 5

0 commit comments

Comments
 (0)