Skip to content

BUG: dtype of DataFrame.idxmax/idxmin incorrect for empty frames #53296

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
May 22, 2023
Merged
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -432,6 +432,7 @@ Reshaping
^^^^^^^^^
- Bug in :func:`crosstab` when ``dropna=False`` would not keep ``np.nan`` in the result (:issue:`10772`)
- Bug in :meth:`DataFrame.agg` and :meth:`Series.agg` on non-unique columns would return incorrect type when dist-like argument passed in (:issue:`51099`)
- Bug in :meth:`DataFrame.idxmin` and :meth:`DataFrame.idxmax`, where the axis dtype would be lost for empty frames (:issue:`53265`)
- Bug in :meth:`DataFrame.merge` not merging correctly when having ``MultiIndex`` with single level (:issue:`52331`)
- Bug in :meth:`DataFrame.stack` losing extension dtypes when columns is a :class:`MultiIndex` and frame contains mixed dtypes (:issue:`45740`)
- Bug in :meth:`DataFrame.transpose` inferring dtype for object column (:issue:`51546`)
Expand Down
10 changes: 10 additions & 0 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -11161,6 +11161,11 @@ def idxmin(
self, axis: Axis = 0, skipna: bool = True, numeric_only: bool = False
) -> Series:
axis = self._get_axis_number(axis)

if self.empty and len(self.axes[axis]):
axis_dtype = self.axes[axis].dtype
return self._constructor_sliced(dtype=axis_dtype)

if numeric_only:
data = self._get_numeric_data()
else:
Expand All @@ -11186,6 +11191,11 @@ def idxmax(
self, axis: Axis = 0, skipna: bool = True, numeric_only: bool = False
) -> Series:
axis = self._get_axis_number(axis)

if self.empty and len(self.axes[axis]):
axis_dtype = self.axes[axis].dtype
return self._constructor_sliced(dtype=axis_dtype)

if numeric_only:
data = self._get_numeric_data()
else:
Expand Down
24 changes: 24 additions & 0 deletions pandas/tests/frame/test_reductions.py
Original file line number Diff line number Diff line change
Expand Up @@ -964,6 +964,18 @@ def test_idxmin(self, float_frame, int_frame, skipna, axis):
expected = df.apply(Series.idxmin, axis=axis, skipna=skipna)
tm.assert_series_equal(result, expected)

@pytest.mark.parametrize("axis", [0, 1])
def test_idxmin_empty(self, index, skipna, axis):
# GH53265
if axis == 0:
frame = DataFrame(index=index)
else:
frame = DataFrame(columns=index)

result = frame.idxmin(axis=axis, skipna=skipna)
expected = Series(dtype=index.dtype)
tm.assert_series_equal(result, expected)

@pytest.mark.parametrize("numeric_only", [True, False])
def test_idxmin_numeric_only(self, numeric_only):
df = DataFrame({"a": [2, 3, 1], "b": [2, 1, 1], "c": list("xyx")})
Expand Down Expand Up @@ -992,6 +1004,18 @@ def test_idxmax(self, float_frame, int_frame, skipna, axis):
expected = df.apply(Series.idxmax, axis=axis, skipna=skipna)
tm.assert_series_equal(result, expected)

@pytest.mark.parametrize("axis", [0, 1])
def test_idxmax_empty(self, index, skipna, axis):
# GH53265
if axis == 0:
frame = DataFrame(index=index)
else:
frame = DataFrame(columns=index)

result = frame.idxmax(axis=axis, skipna=skipna)
expected = Series(dtype=index.dtype)
tm.assert_series_equal(result, expected)

@pytest.mark.parametrize("numeric_only", [True, False])
def test_idxmax_numeric_only(self, numeric_only):
df = DataFrame({"a": [2, 3, 1], "b": [2, 1, 1], "c": list("xyx")})
Expand Down