diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index fa19a49b7ff45..5c36bd83adcce 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -232,6 +232,8 @@ I/O ^^^ - Bug in :meth:`DataFrame.to_stata` where no error is raised if the :class:`DataFrame` contains ``-np.inf`` (:issue:`45350`) - Bug in :meth:`DataFrame.info` where a new line at the end of the output is omitted when called on an empty :class:`DataFrame` (:issue:`45494`) +- Bug in :func:`read_parquet` when ``engine="pyarrow"`` which caused partial write to disk when column of unsupported datatype was passed (:issue:`44914`) +- Period ^^^^^^ diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py index 4880c7730ff07..c5bfbd2b6b35d 100644 --- a/pandas/io/parquet.py +++ b/pandas/io/parquet.py @@ -180,6 +180,9 @@ def write( mode="wb", is_dir=partition_cols is not None, ) + if isinstance(path_or_handle, io.BufferedWriter): + path_or_handle = path_or_handle.raw.name + try: if partition_cols is not None: # writes to multiple files under the given path diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index b60ff00f9d59e..93cc2fd5100c8 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -721,6 +721,27 @@ def test_unsupported(self, pa): # older pyarrows raise ArrowInvalid self.check_external_error_on_write(df, pa, pyarrow.ArrowException) + def test_unsupported_float16(self, pa): + # #44847, #44914 + # Not able to write float 16 column using pyarrow. + data = np.arange(2, 10, dtype=np.float16) + df = pd.DataFrame(data=data, columns=["fp16"]) + self.check_external_error_on_write(df, pa, pyarrow.ArrowException) + + @pytest.mark.parametrize("path_type", [str, pathlib.Path]) + def test_unsupported_float16_cleanup(self, pa, path_type): + # #44847, #44914 + # Not able to write float 16 column using pyarrow. + # Tests cleanup by pyarrow in case of an error + data = np.arange(2, 10, dtype=np.float16) + df = pd.DataFrame(data=data, columns=["fp16"]) + + with tm.ensure_clean() as path_str: + path = path_type(path_str) + with tm.external_error_raised(pyarrow.ArrowException): + df.to_parquet(path=path, engine=pa) + assert not os.path.isfile(path) + def test_categorical(self, pa): # supported in >= 0.7.0