From b8e636b42e932f24a6be64c50dfd8eb3a5bd60f0 Mon Sep 17 00:00:00 2001 From: alimcmaster1 Date: Sun, 19 Apr 2020 01:36:30 +0100 Subject: [PATCH 1/8] Fix S3 Error Handling --- pandas/io/formats/csvs.py | 4 ++-- pandas/io/parquet.py | 4 +++- pandas/tests/io/parser/test_network.py | 16 +++++++++++++++- 3 files changed, 20 insertions(+), 4 deletions(-) diff --git a/pandas/io/formats/csvs.py b/pandas/io/formats/csvs.py index 091f7662630ff..14ebfb8375cc6 100644 --- a/pandas/io/formats/csvs.py +++ b/pandas/io/formats/csvs.py @@ -62,7 +62,7 @@ def __init__( # Extract compression mode as given, if dict compression, self.compression_args = get_compression_method(compression) - self.path_or_buf, _, _, _ = get_filepath_or_buffer( + self.path_or_buf, _, _, self.should_close = get_filepath_or_buffer( path_or_buf, encoding=encoding, compression=compression, mode=mode ) self.sep = sep @@ -219,7 +219,7 @@ def save(self) -> None: ) f.write(buf) close = True - if close: + if close or self.should_close: f.close() for _fh in handles: _fh.close() diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py index 33747d2a6dd83..068210eddcc1b 100644 --- a/pandas/io/parquet.py +++ b/pandas/io/parquet.py @@ -92,7 +92,7 @@ def write( **kwargs, ): self.validate_dataframe(df) - path, _, _, _ = get_filepath_or_buffer(path, mode="wb") + path, _, _, should_close = get_filepath_or_buffer(path, mode="wb") from_pandas_kwargs: Dict[str, Any] = {"schema": kwargs.pop("schema", None)} if index is not None: @@ -109,6 +109,8 @@ def write( ) else: self.api.parquet.write_table(table, path, compression=compression, **kwargs) + if should_close: + path.close() def read(self, path, columns=None, **kwargs): path, _, _, should_close = get_filepath_or_buffer(path) diff --git a/pandas/tests/io/parser/test_network.py b/pandas/tests/io/parser/test_network.py index b7164477c31f2..cb958c1735be0 100644 --- a/pandas/tests/io/parser/test_network.py +++ b/pandas/tests/io/parser/test_network.py @@ -159,7 +159,7 @@ def test_parse_public_s3_bucket_nrows_python(self, tips_df): assert not df.empty tm.assert_frame_equal(tips_df.iloc[:10], df) - def test_s3_fails(self): + def test_read_s3_fails(self): with pytest.raises(IOError): read_csv("s3://nyqpug/asdf.csv") @@ -168,6 +168,20 @@ def test_s3_fails(self): with pytest.raises(IOError): read_csv("s3://cant_get_it/file.csv") + def test_write_s3_csv_fails(self, tips_df): + # GH 32486 + with pytest.raises( + FileNotFoundError, match="The specified bucket does not exist" + ): + tips_df.to_csv("s3://an_s3_bucket_data_doesnt_exit/not_real.csv") + + def test_write_s3_parquet_fails(self, tips_df): + # GH 27679 + with pytest.raises( + FileNotFoundError, match="The specified bucket does not exist" + ): + tips_df.to_parquet("s3://an_s3_bucket_data_doesnt_exit/not_real.parquet") + def test_read_csv_handles_boto_s3_object(self, s3_resource, tips_file): # see gh-16135 From c891f22d167760437e114184065ecfc1f600ca56 Mon Sep 17 00:00:00 2001 From: alimcmaster1 Date: Sun, 19 Apr 2020 02:23:31 +0100 Subject: [PATCH 2/8] elif close --- pandas/io/formats/csvs.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/io/formats/csvs.py b/pandas/io/formats/csvs.py index 14ebfb8375cc6..dcd764bec7426 100644 --- a/pandas/io/formats/csvs.py +++ b/pandas/io/formats/csvs.py @@ -219,10 +219,12 @@ def save(self) -> None: ) f.write(buf) close = True - if close or self.should_close: + if close: f.close() for _fh in handles: _fh.close() + elif self.should_close: + f.close() def _save_header(self): writer = self.writer From 6c60dc1a4090c3824fe2355f4873ad875d21cfc6 Mon Sep 17 00:00:00 2001 From: alimcmaster1 Date: Sun, 19 Apr 2020 03:36:16 +0100 Subject: [PATCH 3/8] Fix gcs test --- pandas/tests/io/test_gcs.py | 32 +++++++++----------------------- 1 file changed, 9 insertions(+), 23 deletions(-) diff --git a/pandas/tests/io/test_gcs.py b/pandas/tests/io/test_gcs.py index 557a9d5c13987..cf745fcc492a1 100644 --- a/pandas/tests/io/test_gcs.py +++ b/pandas/tests/io/test_gcs.py @@ -56,7 +56,15 @@ def open(*args): monkeypatch.setattr("gcsfs.GCSFileSystem", MockGCSFileSystem) df1.to_csv("gs://test/test.csv", index=True) - df2 = read_csv(StringIO(s.getvalue()), parse_dates=["dt"], index_col=0) + + def mock_get_filepath_or_buffer(*args, **kwargs): + return StringIO(df1.to_csv()), None, None, False + + monkeypatch.setattr( + "pandas.io.gcs.get_filepath_or_buffer", mock_get_filepath_or_buffer + ) + + df2 = read_csv("gs://test/test.csv", parse_dates=["dt"], index_col=0) tm.assert_frame_equal(df1, df2) @@ -86,28 +94,6 @@ def open(self, path, mode="r", *args): ) -@td.skip_if_no("gcsfs") -def test_gcs_get_filepath_or_buffer(monkeypatch): - df1 = DataFrame( - { - "int": [1, 3], - "float": [2.0, np.nan], - "str": ["t", "s"], - "dt": date_range("2018-06-18", periods=2), - } - ) - - def mock_get_filepath_or_buffer(*args, **kwargs): - return (StringIO(df1.to_csv(index=False)), None, None, False) - - monkeypatch.setattr( - "pandas.io.gcs.get_filepath_or_buffer", mock_get_filepath_or_buffer - ) - df2 = read_csv("gs://test/test.csv", parse_dates=["dt"]) - - tm.assert_frame_equal(df1, df2) - - @td.skip_if_installed("gcsfs") def test_gcs_not_present_exception(): with pytest.raises(ImportError) as e: From b745e1db45bdec46f8ba2073b795a7611acf57ac Mon Sep 17 00:00:00 2001 From: alimcmaster1 Date: Sun, 19 Apr 2020 03:43:49 +0100 Subject: [PATCH 4/8] Add whatsnew --- doc/source/whatsnew/v1.1.0.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 03a547fadd7ca..b856fd5820613 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -582,6 +582,8 @@ I/O - Bug in :func:`pandas.io.json.json_normalize` where location specified by `record_path` doesn't point to an array. (:issue:`26284`) - :func:`pandas.read_hdf` has a more explicit error message when loading an unsupported HDF file (:issue:`9539`) +- Bug in :meth:`to_parquet` was not raising ``NoCredentialsError`` when writing to s3. (:issue:`27679`) +- Bug in :meth:`to_csv` was silently failing when writing to an invalid s3 bucket. (:issue:`32486`) Plotting ^^^^^^^^ From 31701e8e9111d5d6ffed5ff896278637fcd1efaf Mon Sep 17 00:00:00 2001 From: alimcmaster1 Date: Sun, 19 Apr 2020 10:19:26 +0100 Subject: [PATCH 5/8] Add whatsnew --- doc/source/whatsnew/v1.1.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index b856fd5820613..9347bb6b8a22e 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -582,7 +582,7 @@ I/O - Bug in :func:`pandas.io.json.json_normalize` where location specified by `record_path` doesn't point to an array. (:issue:`26284`) - :func:`pandas.read_hdf` has a more explicit error message when loading an unsupported HDF file (:issue:`9539`) -- Bug in :meth:`to_parquet` was not raising ``NoCredentialsError`` when writing to s3. (:issue:`27679`) +- Bug in :meth:`to_parquet` was not raising ``PermissionError`` when writing to a private s3 bucket with invalid creds. (:issue:`27679`) - Bug in :meth:`to_csv` was silently failing when writing to an invalid s3 bucket. (:issue:`32486`) Plotting From 83ffcb637f44179cc2cf621003f2c6fd05b7829c Mon Sep 17 00:00:00 2001 From: alimcmaster1 Date: Mon, 20 Apr 2020 16:45:36 +0100 Subject: [PATCH 6/8] Update doc/source/whatsnew/v1.1.0.rst Co-Authored-By: Tom Augspurger --- doc/source/whatsnew/v1.1.0.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 9347bb6b8a22e..77acf02bade21 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -582,8 +582,8 @@ I/O - Bug in :func:`pandas.io.json.json_normalize` where location specified by `record_path` doesn't point to an array. (:issue:`26284`) - :func:`pandas.read_hdf` has a more explicit error message when loading an unsupported HDF file (:issue:`9539`) -- Bug in :meth:`to_parquet` was not raising ``PermissionError`` when writing to a private s3 bucket with invalid creds. (:issue:`27679`) -- Bug in :meth:`to_csv` was silently failing when writing to an invalid s3 bucket. (:issue:`32486`) +- Bug in :meth:`~DataFrame.to_parquet` was not raising ``PermissionError`` when writing to a private s3 bucket with invalid creds. (:issue:`27679`) +- Bug in :meth:`~DataFrame.to_csv` was silently failing when writing to an invalid s3 bucket. (:issue:`32486`) Plotting ^^^^^^^^ From 25ca427f27e2b1523dfdd8bbeb8927d945e691fc Mon Sep 17 00:00:00 2001 From: alimcmaster1 Date: Mon, 20 Apr 2020 17:09:30 +0100 Subject: [PATCH 7/8] importer skip --- pandas/tests/io/parser/test_network.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/io/parser/test_network.py b/pandas/tests/io/parser/test_network.py index cb958c1735be0..2b8cb0b354774 100644 --- a/pandas/tests/io/parser/test_network.py +++ b/pandas/tests/io/parser/test_network.py @@ -176,6 +176,7 @@ def test_write_s3_csv_fails(self, tips_df): tips_df.to_csv("s3://an_s3_bucket_data_doesnt_exit/not_real.csv") def test_write_s3_parquet_fails(self, tips_df): + pytest.importorskip("pyarrow") # GH 27679 with pytest.raises( FileNotFoundError, match="The specified bucket does not exist" From 7bd1c9fc96627a65dc70ed64f10ff2740a13c1df Mon Sep 17 00:00:00 2001 From: alimcmaster1 Date: Tue, 21 Apr 2020 02:43:41 +0100 Subject: [PATCH 8/8] Update as per jreback comments --- pandas/tests/io/parser/test_network.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/tests/io/parser/test_network.py b/pandas/tests/io/parser/test_network.py index 2b8cb0b354774..0f09659a24936 100644 --- a/pandas/tests/io/parser/test_network.py +++ b/pandas/tests/io/parser/test_network.py @@ -54,8 +54,8 @@ def tips_df(datapath): @pytest.mark.usefixtures("s3_resource") @td.skip_if_not_us_locale() class TestS3: + @td.skip_if_no("s3fs") def test_parse_public_s3_bucket(self, tips_df): - pytest.importorskip("s3fs") # more of an integration test due to the not-public contents portion # can probably mock this though. @@ -170,13 +170,14 @@ def test_read_s3_fails(self): def test_write_s3_csv_fails(self, tips_df): # GH 32486 + # Attempting to write to an invalid S3 path should raise with pytest.raises( FileNotFoundError, match="The specified bucket does not exist" ): tips_df.to_csv("s3://an_s3_bucket_data_doesnt_exit/not_real.csv") + @td.skip_if_no("pyarrow") def test_write_s3_parquet_fails(self, tips_df): - pytest.importorskip("pyarrow") # GH 27679 with pytest.raises( FileNotFoundError, match="The specified bucket does not exist"