IO: Fix S3 Error Handling (#33645)

alimcmaster1 · web-flow · commit 6b2dd378fc38 · 2020-04-21T08:39:05.000-04:00
diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst
@@ -583,6 +583,8 @@ I/O
 - Bug in :func:`pandas.io.json.json_normalize` where location specified by `record_path` doesn't point to an array. (:issue:`26284`)
 - :func:`pandas.read_hdf` has a more explicit error message when loading an
   unsupported HDF file (:issue:`9539`)
+- Bug in :meth:`~DataFrame.to_parquet` was not raising ``PermissionError`` when writing to a private s3 bucket with invalid creds. (:issue:`27679`)
+- Bug in :meth:`~DataFrame.to_csv` was silently failing when writing to an invalid s3 bucket. (:issue:`32486`)
 
 Plotting
 ^^^^^^^^
diff --git a/pandas/io/formats/csvs.py b/pandas/io/formats/csvs.py
@@ -62,7 +62,7 @@ def __init__(
         # Extract compression mode as given, if dict
         compression, self.compression_args = get_compression_method(compression)
 
-        self.path_or_buf, _, _, _ = get_filepath_or_buffer(
+        self.path_or_buf, _, _, self.should_close = get_filepath_or_buffer(
             path_or_buf, encoding=encoding, compression=compression, mode=mode
         )
         self.sep = sep
@@ -223,6 +223,8 @@ def save(self) -> None:
                 f.close()
                 for _fh in handles:
                     _fh.close()
+            elif self.should_close:
+                f.close()
 
     def _save_header(self):
         writer = self.writer
diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py
@@ -92,7 +92,7 @@ def write(
         **kwargs,
     ):
         self.validate_dataframe(df)
-        path, _, _, _ = get_filepath_or_buffer(path, mode="wb")
+        path, _, _, should_close = get_filepath_or_buffer(path, mode="wb")
 
         from_pandas_kwargs: Dict[str, Any] = {"schema": kwargs.pop("schema", None)}
         if index is not None:
@@ -109,6 +109,8 @@ def write(
             )
         else:
             self.api.parquet.write_table(table, path, compression=compression, **kwargs)
+        if should_close:
+            path.close()
 
     def read(self, path, columns=None, **kwargs):
         path, _, _, should_close = get_filepath_or_buffer(path)
diff --git a/pandas/tests/io/parser/test_network.py b/pandas/tests/io/parser/test_network.py
@@ -54,8 +54,8 @@ def tips_df(datapath):
 @pytest.mark.usefixtures("s3_resource")
 @td.skip_if_not_us_locale()
 class TestS3:
+    @td.skip_if_no("s3fs")
     def test_parse_public_s3_bucket(self, tips_df):
-        pytest.importorskip("s3fs")
 
         # more of an integration test due to the not-public contents portion
         # can probably mock this though.
@@ -159,7 +159,7 @@ def test_parse_public_s3_bucket_nrows_python(self, tips_df):
             assert not df.empty
             tm.assert_frame_equal(tips_df.iloc[:10], df)
 
-    def test_s3_fails(self):
+    def test_read_s3_fails(self):
         with pytest.raises(IOError):
             read_csv("s3://nyqpug/asdf.csv")
 
@@ -168,6 +168,22 @@ def test_s3_fails(self):
         with pytest.raises(IOError):
             read_csv("s3://cant_get_it/file.csv")
 
+    def test_write_s3_csv_fails(self, tips_df):
+        # GH 32486
+        # Attempting to write to an invalid S3 path should raise
+        with pytest.raises(
+            FileNotFoundError, match="The specified bucket does not exist"
+        ):
+            tips_df.to_csv("s3://an_s3_bucket_data_doesnt_exit/not_real.csv")
+
+    @td.skip_if_no("pyarrow")
+    def test_write_s3_parquet_fails(self, tips_df):
+        # GH 27679
+        with pytest.raises(
+            FileNotFoundError, match="The specified bucket does not exist"
+        ):
+            tips_df.to_parquet("s3://an_s3_bucket_data_doesnt_exit/not_real.parquet")
+
     def test_read_csv_handles_boto_s3_object(self, s3_resource, tips_file):
         # see gh-16135
 
diff --git a/pandas/tests/io/test_gcs.py b/pandas/tests/io/test_gcs.py
@@ -56,7 +56,15 @@ def open(*args):
 
     monkeypatch.setattr("gcsfs.GCSFileSystem", MockGCSFileSystem)
     df1.to_csv("gs://test/test.csv", index=True)
-    df2 = read_csv(StringIO(s.getvalue()), parse_dates=["dt"], index_col=0)
+
+    def mock_get_filepath_or_buffer(*args, **kwargs):
+        return StringIO(df1.to_csv()), None, None, False
+
+    monkeypatch.setattr(
+        "pandas.io.gcs.get_filepath_or_buffer", mock_get_filepath_or_buffer
+    )
+
+    df2 = read_csv("gs://test/test.csv", parse_dates=["dt"], index_col=0)
 
     tm.assert_frame_equal(df1, df2)
 
@@ -86,28 +94,6 @@ def open(self, path, mode="r", *args):
     )
 
 
-@td.skip_if_no("gcsfs")
-def test_gcs_get_filepath_or_buffer(monkeypatch):
-    df1 = DataFrame(
-        {
-            "int": [1, 3],
-            "float": [2.0, np.nan],
-            "str": ["t", "s"],
-            "dt": date_range("2018-06-18", periods=2),
-        }
-    )
-
-    def mock_get_filepath_or_buffer(*args, **kwargs):
-        return (StringIO(df1.to_csv(index=False)), None, None, False)
-
-    monkeypatch.setattr(
-        "pandas.io.gcs.get_filepath_or_buffer", mock_get_filepath_or_buffer
-    )
-    df2 = read_csv("gs://test/test.csv", parse_dates=["dt"])
-
-    tm.assert_frame_equal(df1, df2)
-
-
 @td.skip_if_installed("gcsfs")
 def test_gcs_not_present_exception():
     with pytest.raises(ImportError) as e: