Skip to content

Commit 6b2dd37

Browse files
authored
IO: Fix S3 Error Handling (#33645)
1 parent e0d4622 commit 6b2dd37

File tree

5 files changed

+35
-27
lines changed

5 files changed

+35
-27
lines changed

doc/source/whatsnew/v1.1.0.rst

+2
Original file line numberDiff line numberDiff line change
@@ -583,6 +583,8 @@ I/O
583583
- Bug in :func:`pandas.io.json.json_normalize` where location specified by `record_path` doesn't point to an array. (:issue:`26284`)
584584
- :func:`pandas.read_hdf` has a more explicit error message when loading an
585585
unsupported HDF file (:issue:`9539`)
586+
- Bug in :meth:`~DataFrame.to_parquet` was not raising ``PermissionError`` when writing to a private s3 bucket with invalid creds. (:issue:`27679`)
587+
- Bug in :meth:`~DataFrame.to_csv` was silently failing when writing to an invalid s3 bucket. (:issue:`32486`)
586588

587589
Plotting
588590
^^^^^^^^

pandas/io/formats/csvs.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ def __init__(
6262
# Extract compression mode as given, if dict
6363
compression, self.compression_args = get_compression_method(compression)
6464

65-
self.path_or_buf, _, _, _ = get_filepath_or_buffer(
65+
self.path_or_buf, _, _, self.should_close = get_filepath_or_buffer(
6666
path_or_buf, encoding=encoding, compression=compression, mode=mode
6767
)
6868
self.sep = sep
@@ -223,6 +223,8 @@ def save(self) -> None:
223223
f.close()
224224
for _fh in handles:
225225
_fh.close()
226+
elif self.should_close:
227+
f.close()
226228

227229
def _save_header(self):
228230
writer = self.writer

pandas/io/parquet.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ def write(
9292
**kwargs,
9393
):
9494
self.validate_dataframe(df)
95-
path, _, _, _ = get_filepath_or_buffer(path, mode="wb")
95+
path, _, _, should_close = get_filepath_or_buffer(path, mode="wb")
9696

9797
from_pandas_kwargs: Dict[str, Any] = {"schema": kwargs.pop("schema", None)}
9898
if index is not None:
@@ -109,6 +109,8 @@ def write(
109109
)
110110
else:
111111
self.api.parquet.write_table(table, path, compression=compression, **kwargs)
112+
if should_close:
113+
path.close()
112114

113115
def read(self, path, columns=None, **kwargs):
114116
path, _, _, should_close = get_filepath_or_buffer(path)

pandas/tests/io/parser/test_network.py

+18-2
Original file line numberDiff line numberDiff line change
@@ -54,8 +54,8 @@ def tips_df(datapath):
5454
@pytest.mark.usefixtures("s3_resource")
5555
@td.skip_if_not_us_locale()
5656
class TestS3:
57+
@td.skip_if_no("s3fs")
5758
def test_parse_public_s3_bucket(self, tips_df):
58-
pytest.importorskip("s3fs")
5959

6060
# more of an integration test due to the not-public contents portion
6161
# can probably mock this though.
@@ -159,7 +159,7 @@ def test_parse_public_s3_bucket_nrows_python(self, tips_df):
159159
assert not df.empty
160160
tm.assert_frame_equal(tips_df.iloc[:10], df)
161161

162-
def test_s3_fails(self):
162+
def test_read_s3_fails(self):
163163
with pytest.raises(IOError):
164164
read_csv("s3://nyqpug/asdf.csv")
165165

@@ -168,6 +168,22 @@ def test_s3_fails(self):
168168
with pytest.raises(IOError):
169169
read_csv("s3://cant_get_it/file.csv")
170170

171+
def test_write_s3_csv_fails(self, tips_df):
172+
# GH 32486
173+
# Attempting to write to an invalid S3 path should raise
174+
with pytest.raises(
175+
FileNotFoundError, match="The specified bucket does not exist"
176+
):
177+
tips_df.to_csv("s3://an_s3_bucket_data_doesnt_exit/not_real.csv")
178+
179+
@td.skip_if_no("pyarrow")
180+
def test_write_s3_parquet_fails(self, tips_df):
181+
# GH 27679
182+
with pytest.raises(
183+
FileNotFoundError, match="The specified bucket does not exist"
184+
):
185+
tips_df.to_parquet("s3://an_s3_bucket_data_doesnt_exit/not_real.parquet")
186+
171187
def test_read_csv_handles_boto_s3_object(self, s3_resource, tips_file):
172188
# see gh-16135
173189

pandas/tests/io/test_gcs.py

+9-23
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,15 @@ def open(*args):
5656

5757
monkeypatch.setattr("gcsfs.GCSFileSystem", MockGCSFileSystem)
5858
df1.to_csv("gs://test/test.csv", index=True)
59-
df2 = read_csv(StringIO(s.getvalue()), parse_dates=["dt"], index_col=0)
59+
60+
def mock_get_filepath_or_buffer(*args, **kwargs):
61+
return StringIO(df1.to_csv()), None, None, False
62+
63+
monkeypatch.setattr(
64+
"pandas.io.gcs.get_filepath_or_buffer", mock_get_filepath_or_buffer
65+
)
66+
67+
df2 = read_csv("gs://test/test.csv", parse_dates=["dt"], index_col=0)
6068

6169
tm.assert_frame_equal(df1, df2)
6270

@@ -86,28 +94,6 @@ def open(self, path, mode="r", *args):
8694
)
8795

8896

89-
@td.skip_if_no("gcsfs")
90-
def test_gcs_get_filepath_or_buffer(monkeypatch):
91-
df1 = DataFrame(
92-
{
93-
"int": [1, 3],
94-
"float": [2.0, np.nan],
95-
"str": ["t", "s"],
96-
"dt": date_range("2018-06-18", periods=2),
97-
}
98-
)
99-
100-
def mock_get_filepath_or_buffer(*args, **kwargs):
101-
return (StringIO(df1.to_csv(index=False)), None, None, False)
102-
103-
monkeypatch.setattr(
104-
"pandas.io.gcs.get_filepath_or_buffer", mock_get_filepath_or_buffer
105-
)
106-
df2 = read_csv("gs://test/test.csv", parse_dates=["dt"])
107-
108-
tm.assert_frame_equal(df1, df2)
109-
110-
11197
@td.skip_if_installed("gcsfs")
11298
def test_gcs_not_present_exception():
11399
with pytest.raises(ImportError) as e:

0 commit comments

Comments
 (0)