Skip to content

Commit 954994b

Browse files
mroeschkeyehoshuadimarsky
authored andcommitted
BUG: Return Float64 for read_parquet(use_nullable_dtypes=True) (pandas-dev#47619)
1 parent 8ab784f commit 954994b

File tree

3 files changed

+19
-2
lines changed

3 files changed

+19
-2
lines changed

doc/source/whatsnew/v1.5.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -948,7 +948,7 @@ I/O
948948
- Bug in :func:`read_sas` with RLE-compressed SAS7BDAT files that contain 0x40 control bytes (:issue:`31243`)
949949
- Bug in :func:`read_sas` that scrambled column names (:issue:`31243`)
950950
- Bug in :func:`read_sas` with RLE-compressed SAS7BDAT files that contain 0x00 control bytes (:issue:`47099`)
951-
-
951+
- Bug in :func:`read_parquet` with ``use_nullable_dtypes=True`` where ``float64`` dtype was returned instead of nullable ``Float64`` dtype (:issue:`45694`)
952952

953953
Period
954954
^^^^^^

pandas/io/parquet.py

+2
Original file line numberDiff line numberDiff line change
@@ -231,6 +231,8 @@ def read(
231231
self.api.uint64(): pd.UInt64Dtype(),
232232
self.api.bool_(): pd.BooleanDtype(),
233233
self.api.string(): pd.StringDtype(),
234+
self.api.float32(): pd.Float32Dtype(),
235+
self.api.float64(): pd.Float64Dtype(),
234236
}
235237
to_pandas_kwargs["types_mapper"] = mapping.get
236238
manager = get_option("mode.data_manager")

pandas/tests/io/test_parquet.py

+16-1
Original file line numberDiff line numberDiff line change
@@ -626,6 +626,9 @@ def test_use_nullable_dtypes(self, engine, request):
626626
"d": pyarrow.array([True, False, True, None]),
627627
# Test that nullable dtypes used even in absence of nulls
628628
"e": pyarrow.array([1, 2, 3, 4], "int64"),
629+
# GH 45694
630+
"f": pyarrow.array([1.0, 2.0, 3.0, None], "float32"),
631+
"g": pyarrow.array([1.0, 2.0, 3.0, None], "float64"),
629632
}
630633
)
631634
with tm.ensure_clean() as path:
@@ -642,6 +645,8 @@ def test_use_nullable_dtypes(self, engine, request):
642645
"c": pd.array(["a", "b", "c", None], dtype="string"),
643646
"d": pd.array([True, False, True, None], dtype="boolean"),
644647
"e": pd.array([1, 2, 3, 4], dtype="Int64"),
648+
"f": pd.array([1.0, 2.0, 3.0, None], dtype="Float32"),
649+
"g": pd.array([1.0, 2.0, 3.0, None], dtype="Float64"),
645650
}
646651
)
647652
if engine == "fastparquet":
@@ -672,7 +677,17 @@ def test_read_empty_array(self, pa, dtype):
672677
"value": pd.array([], dtype=dtype),
673678
}
674679
)
675-
check_round_trip(df, pa, read_kwargs={"use_nullable_dtypes": True})
680+
# GH 45694
681+
expected = None
682+
if dtype == "float":
683+
expected = pd.DataFrame(
684+
{
685+
"value": pd.array([], dtype="Float64"),
686+
}
687+
)
688+
check_round_trip(
689+
df, pa, read_kwargs={"use_nullable_dtypes": True}, expected=expected
690+
)
676691

677692

678693
@pytest.mark.filterwarnings("ignore:CategoricalBlock is deprecated:DeprecationWarning")

0 commit comments

Comments
 (0)