Skip to content

Commit 5b2f4a5

Browse files
authored
BUG: DataFrame[dt64].quantile(axis=1) when empty returning f8 (#45294)
1 parent a377f03 commit 5b2f4a5

File tree

3 files changed

+44
-13
lines changed

3 files changed

+44
-13
lines changed

doc/source/whatsnew/v1.5.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,7 @@ Categorical
118118

119119
Datetimelike
120120
^^^^^^^^^^^^
121+
- Bug in :meth:`DataFrame.quantile` with datetime-like dtypes and no rows incorrectly returning ``float64`` dtype instead of retaining datetime-like dtype (:issue:`41544`)
121122
- Bug in :func:`to_datetime` with sequences of ``np.str_`` objects incorrectly raising (:issue:`32264`)
122123
- Bug in :class:`Timestamp` construction when passing datetime components as positional arguments and ``tzinfo`` as a keyword argument incorrectly raising (:issue:`31929`)
123124
-

pandas/core/frame.py

+20-5
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,7 @@
123123
is_object_dtype,
124124
is_scalar,
125125
is_sequence,
126+
needs_i8_conversion,
126127
pandas_dtype,
127128
)
128129
from pandas.core.dtypes.dtypes import ExtensionDtype
@@ -10462,27 +10463,41 @@ def quantile(
1046210463
Name: 0.5, dtype: object
1046310464
"""
1046410465
validate_percentile(q)
10466+
axis = self._get_axis_number(axis)
1046510467

1046610468
if not is_list_like(q):
1046710469
# BlockManager.quantile expects listlike, so we wrap and unwrap here
10468-
res = self.quantile(
10470+
res_df = self.quantile(
1046910471
[q], axis=axis, numeric_only=numeric_only, interpolation=interpolation
1047010472
)
10471-
return res.iloc[0]
10473+
res = res_df.iloc[0]
10474+
if axis == 1 and len(self) == 0:
10475+
# GH#41544 try to get an appropriate dtype
10476+
dtype = find_common_type(list(self.dtypes))
10477+
if needs_i8_conversion(dtype):
10478+
return res.astype(dtype)
10479+
return res
1047210480

1047310481
q = Index(q, dtype=np.float64)
1047410482
data = self._get_numeric_data() if numeric_only else self
10475-
axis = self._get_axis_number(axis)
1047610483

1047710484
if axis == 1:
1047810485
data = data.T
1047910486

1048010487
if len(data.columns) == 0:
1048110488
# GH#23925 _get_numeric_data may have dropped all columns
1048210489
cols = Index([], name=self.columns.name)
10490+
10491+
dtype = np.float64
10492+
if axis == 1:
10493+
# GH#41544 try to get an appropriate dtype
10494+
cdtype = find_common_type(list(self.dtypes))
10495+
if needs_i8_conversion(cdtype):
10496+
dtype = cdtype
10497+
1048310498
if is_list_like(q):
10484-
return self._constructor([], index=q, columns=cols)
10485-
return self._constructor_sliced([], index=cols, name=q, dtype=np.float64)
10499+
return self._constructor([], index=q, columns=cols, dtype=dtype)
10500+
return self._constructor_sliced([], index=cols, name=q, dtype=dtype)
1048610501

1048710502
res = data._mgr.quantile(qs=q, axis=1, interpolation=interpolation)
1048810503

pandas/tests/frame/methods/test_quantile.py

+23-8
Original file line numberDiff line numberDiff line change
@@ -293,6 +293,28 @@ def test_quantile_datetime(self):
293293
expected = DataFrame(index=[0.5])
294294
tm.assert_frame_equal(result, expected)
295295

296+
@pytest.mark.parametrize(
297+
"dtype",
298+
[
299+
"datetime64[ns]",
300+
"datetime64[ns, US/Pacific]",
301+
"timedelta64[ns]",
302+
"Period[D]",
303+
],
304+
)
305+
def test_quantile_dt64_empty(self, dtype):
306+
# GH#41544
307+
df = DataFrame(columns=["a", "b"], dtype=dtype)
308+
309+
res = df.quantile(0.5, axis=1, numeric_only=False)
310+
expected = Series([], index=[], name=0.5, dtype=dtype)
311+
tm.assert_series_equal(res, expected)
312+
313+
# no columns in result, so no dtype preservation
314+
res = df.quantile([0.5], axis=1, numeric_only=False)
315+
expected = DataFrame(index=[0.5])
316+
tm.assert_frame_equal(res, expected)
317+
296318
def test_quantile_invalid(self, datetime_frame):
297319
msg = "percentiles should all be in the interval \\[0, 1\\]"
298320
for invalid in [-1, 2, [0.5, -1], [0.5, 2]]:
@@ -722,14 +744,7 @@ def test_empty_numeric(self, dtype, expected_data, expected_index, axis):
722744
@pytest.mark.parametrize(
723745
"dtype, expected_data, expected_index, axis, expected_dtype",
724746
[
725-
pytest.param(
726-
"datetime64[ns]",
727-
[],
728-
[],
729-
1,
730-
"datetime64[ns]",
731-
marks=pytest.mark.xfail(reason="#GH 41544"),
732-
),
747+
["datetime64[ns]", [], [], 1, "datetime64[ns]"],
733748
["datetime64[ns]", [pd.NaT, pd.NaT], ["a", "b"], 0, "datetime64[ns]"],
734749
],
735750
)

0 commit comments

Comments
 (0)