diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 03a547fadd7ca..77acf02bade21 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -582,6 +582,8 @@ I/O - Bug in :func:`pandas.io.json.json_normalize` where location specified by `record_path` doesn't point to an array. (:issue:`26284`) - :func:`pandas.read_hdf` has a more explicit error message when loading an unsupported HDF file (:issue:`9539`) +- Bug in :meth:`~DataFrame.to_parquet` was not raising ``PermissionError`` when writing to a private s3 bucket with invalid creds. (:issue:`27679`) +- Bug in :meth:`~DataFrame.to_csv` was silently failing when writing to an invalid s3 bucket. (:issue:`32486`) Plotting ^^^^^^^^ diff --git a/pandas/io/formats/csvs.py b/pandas/io/formats/csvs.py index 091f7662630ff..dcd764bec7426 100644 --- a/pandas/io/formats/csvs.py +++ b/pandas/io/formats/csvs.py @@ -62,7 +62,7 @@ def __init__( # Extract compression mode as given, if dict compression, self.compression_args = get_compression_method(compression) - self.path_or_buf, _, _, _ = get_filepath_or_buffer( + self.path_or_buf, _, _, self.should_close = get_filepath_or_buffer( path_or_buf, encoding=encoding, compression=compression, mode=mode ) self.sep = sep @@ -223,6 +223,8 @@ def save(self) -> None: f.close() for _fh in handles: _fh.close() + elif self.should_close: + f.close() def _save_header(self): writer = self.writer diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py index 33747d2a6dd83..068210eddcc1b 100644 --- a/pandas/io/parquet.py +++ b/pandas/io/parquet.py @@ -92,7 +92,7 @@ def write( **kwargs, ): self.validate_dataframe(df) - path, _, _, _ = get_filepath_or_buffer(path, mode="wb") + path, _, _, should_close = get_filepath_or_buffer(path, mode="wb") from_pandas_kwargs: Dict[str, Any] = {"schema": kwargs.pop("schema", None)} if index is not None: @@ -109,6 +109,8 @@ def write( ) else: self.api.parquet.write_table(table, path, compression=compression, **kwargs) + if should_close: + path.close() def read(self, path, columns=None, **kwargs): path, _, _, should_close = get_filepath_or_buffer(path) diff --git a/pandas/tests/io/parser/test_network.py b/pandas/tests/io/parser/test_network.py index b7164477c31f2..0f09659a24936 100644 --- a/pandas/tests/io/parser/test_network.py +++ b/pandas/tests/io/parser/test_network.py @@ -54,8 +54,8 @@ def tips_df(datapath): @pytest.mark.usefixtures("s3_resource") @td.skip_if_not_us_locale() class TestS3: + @td.skip_if_no("s3fs") def test_parse_public_s3_bucket(self, tips_df): - pytest.importorskip("s3fs") # more of an integration test due to the not-public contents portion # can probably mock this though. @@ -159,7 +159,7 @@ def test_parse_public_s3_bucket_nrows_python(self, tips_df): assert not df.empty tm.assert_frame_equal(tips_df.iloc[:10], df) - def test_s3_fails(self): + def test_read_s3_fails(self): with pytest.raises(IOError): read_csv("s3://nyqpug/asdf.csv") @@ -168,6 +168,22 @@ def test_s3_fails(self): with pytest.raises(IOError): read_csv("s3://cant_get_it/file.csv") + def test_write_s3_csv_fails(self, tips_df): + # GH 32486 + # Attempting to write to an invalid S3 path should raise + with pytest.raises( + FileNotFoundError, match="The specified bucket does not exist" + ): + tips_df.to_csv("s3://an_s3_bucket_data_doesnt_exit/not_real.csv") + + @td.skip_if_no("pyarrow") + def test_write_s3_parquet_fails(self, tips_df): + # GH 27679 + with pytest.raises( + FileNotFoundError, match="The specified bucket does not exist" + ): + tips_df.to_parquet("s3://an_s3_bucket_data_doesnt_exit/not_real.parquet") + def test_read_csv_handles_boto_s3_object(self, s3_resource, tips_file): # see gh-16135 diff --git a/pandas/tests/io/test_gcs.py b/pandas/tests/io/test_gcs.py index 557a9d5c13987..cf745fcc492a1 100644 --- a/pandas/tests/io/test_gcs.py +++ b/pandas/tests/io/test_gcs.py @@ -56,7 +56,15 @@ def open(*args): monkeypatch.setattr("gcsfs.GCSFileSystem", MockGCSFileSystem) df1.to_csv("gs://test/test.csv", index=True) - df2 = read_csv(StringIO(s.getvalue()), parse_dates=["dt"], index_col=0) + + def mock_get_filepath_or_buffer(*args, **kwargs): + return StringIO(df1.to_csv()), None, None, False + + monkeypatch.setattr( + "pandas.io.gcs.get_filepath_or_buffer", mock_get_filepath_or_buffer + ) + + df2 = read_csv("gs://test/test.csv", parse_dates=["dt"], index_col=0) tm.assert_frame_equal(df1, df2) @@ -86,28 +94,6 @@ def open(self, path, mode="r", *args): ) -@td.skip_if_no("gcsfs") -def test_gcs_get_filepath_or_buffer(monkeypatch): - df1 = DataFrame( - { - "int": [1, 3], - "float": [2.0, np.nan], - "str": ["t", "s"], - "dt": date_range("2018-06-18", periods=2), - } - ) - - def mock_get_filepath_or_buffer(*args, **kwargs): - return (StringIO(df1.to_csv(index=False)), None, None, False) - - monkeypatch.setattr( - "pandas.io.gcs.get_filepath_or_buffer", mock_get_filepath_or_buffer - ) - df2 = read_csv("gs://test/test.csv", parse_dates=["dt"]) - - tm.assert_frame_equal(df1, df2) - - @td.skip_if_installed("gcsfs") def test_gcs_not_present_exception(): with pytest.raises(ImportError) as e: