From 05eb0808b1c508e7a76e087201844ffc4a9fb1da Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Wed, 6 Jul 2022 15:20:14 -0700 Subject: [PATCH] BUG: Return Float64 for read_parquet(use_nullable_dtypes=True) --- doc/source/whatsnew/v1.5.0.rst | 2 +- pandas/io/parquet.py | 2 ++ pandas/tests/io/test_parquet.py | 17 ++++++++++++++++- 3 files changed, 19 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 0b450fab53137..c70acc0a0b18c 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -948,7 +948,7 @@ I/O - Bug in :func:`read_sas` with RLE-compressed SAS7BDAT files that contain 0x40 control bytes (:issue:`31243`) - Bug in :func:`read_sas` that scrambled column names (:issue:`31243`) - Bug in :func:`read_sas` with RLE-compressed SAS7BDAT files that contain 0x00 control bytes (:issue:`47099`) -- +- Bug in :func:`read_parquet` with ``use_nullable_dtypes=True`` where ``float64`` dtype was returned instead of nullable ``Float64`` dtype (:issue:`45694`) Period ^^^^^^ diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py index cbf3bcc9278d5..d28309cda6788 100644 --- a/pandas/io/parquet.py +++ b/pandas/io/parquet.py @@ -231,6 +231,8 @@ def read( self.api.uint64(): pd.UInt64Dtype(), self.api.bool_(): pd.BooleanDtype(), self.api.string(): pd.StringDtype(), + self.api.float32(): pd.Float32Dtype(), + self.api.float64(): pd.Float64Dtype(), } to_pandas_kwargs["types_mapper"] = mapping.get manager = get_option("mode.data_manager") diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index 5b899079dfffd..64e4a15a42061 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -626,6 +626,9 @@ def test_use_nullable_dtypes(self, engine, request): "d": pyarrow.array([True, False, True, None]), # Test that nullable dtypes used even in absence of nulls "e": pyarrow.array([1, 2, 3, 4], "int64"), + # GH 45694 + "f": pyarrow.array([1.0, 2.0, 3.0, None], "float32"), + "g": pyarrow.array([1.0, 2.0, 3.0, None], "float64"), } ) with tm.ensure_clean() as path: @@ -642,6 +645,8 @@ def test_use_nullable_dtypes(self, engine, request): "c": pd.array(["a", "b", "c", None], dtype="string"), "d": pd.array([True, False, True, None], dtype="boolean"), "e": pd.array([1, 2, 3, 4], dtype="Int64"), + "f": pd.array([1.0, 2.0, 3.0, None], dtype="Float32"), + "g": pd.array([1.0, 2.0, 3.0, None], dtype="Float64"), } ) if engine == "fastparquet": @@ -672,7 +677,17 @@ def test_read_empty_array(self, pa, dtype): "value": pd.array([], dtype=dtype), } ) - check_round_trip(df, pa, read_kwargs={"use_nullable_dtypes": True}) + # GH 45694 + expected = None + if dtype == "float": + expected = pd.DataFrame( + { + "value": pd.array([], dtype="Float64"), + } + ) + check_round_trip( + df, pa, read_kwargs={"use_nullable_dtypes": True}, expected=expected + ) @pytest.mark.filterwarnings("ignore:CategoricalBlock is deprecated:DeprecationWarning")