From 6bdb7f4016ca7f9249c2361a3702ab3787753932 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 17 Jul 2023 09:15:36 -0700 Subject: [PATCH] BUG: idxmin/max dtype --- doc/source/whatsnew/v2.1.0.rst | 1 + pandas/core/arrays/categorical.py | 3 +++ pandas/core/frame.py | 16 +++++++--------- pandas/core/series.py | 6 ++++-- pandas/tests/frame/test_reductions.py | 2 ++ pandas/tests/reductions/test_reductions.py | 18 ++++++++++++++++++ 6 files changed, 35 insertions(+), 11 deletions(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index b7cc254d5c7e5..812a790e945f4 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -497,6 +497,7 @@ Missing ^^^^^^^ - Bug in :meth:`DataFrame.interpolate` failing to fill across multiblock data when ``method`` is "pad", "ffill", "bfill", or "backfill" (:issue:`53898`) - Bug in :meth:`DataFrame.interpolate` ignoring ``inplace`` when :class:`DataFrame` is empty (:issue:`53199`) +- Bug in :meth:`Series.idxmin`, :meth:`Series.idxmax`, :meth:`DataFrame.idxmin`, :meth:`DataFrame.idxmax` with a :class:`DatetimeIndex` index containing ``NaT`` incorrectly returning ``NaN`` instead of ``NaT`` (:issue:`43587`) - Bug in :meth:`Series.interpolate` and :meth:`DataFrame.interpolate` failing to raise on invalid ``downcast`` keyword, which can be only ``None`` or "infer" (:issue:`53103`) - Bug in :meth:`Series.interpolate` and :meth:`DataFrame.interpolate` with complex dtype incorrectly failing to fill ``NaN`` entries (:issue:`53635`) - diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 33cd5fe147d2e..8eef81bc65f20 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -2323,6 +2323,9 @@ def _reduce( self, name: str, *, skipna: bool = True, keepdims: bool = False, **kwargs ): result = super()._reduce(name, skipna=skipna, keepdims=keepdims, **kwargs) + if name in ["argmax", "argmin"]: + # don't wrap in Categorical! + return result if keepdims: return type(self)(result, dtype=self.dtype) else: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index a469f4965117b..317fe9e56d847 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -11212,13 +11212,12 @@ def idxmin( nanops.nanargmin, "argmin", axis=axis, skipna=skipna, numeric_only=False ) indices = res._values - - # indices will always be 1d array since axis is not None and - # values is a 2d array for DataFrame # indices will always be np.ndarray since axis is not N index = data._get_axis(axis) - result = [index[i] if i >= 0 else np.nan for i in indices] + result = algorithms.take( + index._values, indices, allow_fill=True, fill_value=index._na_value + ) final_result = data._constructor_sliced(result, index=data._get_agg_axis(axis)) return final_result.__finalize__(self, method="idxmin") @@ -11241,13 +11240,12 @@ def idxmax( nanops.nanargmax, "argmax", axis=axis, skipna=skipna, numeric_only=False ) indices = res._values - - # indices will always be 1d array since axis is not None and - # values is a 2d array for DataFrame - assert isinstance(indices, (np.ndarray, ExtensionArray)) # for mypy + # indices will always be 1d array since axis is not None index = data._get_axis(axis) - result = [index[i] if i >= 0 else np.nan for i in indices] + result = algorithms.take( + index._values, indices, allow_fill=True, fill_value=index._na_value + ) final_result = data._constructor_sliced(result, index=data._get_agg_axis(axis)) return final_result.__finalize__(self, method="idxmax") diff --git a/pandas/core/series.py b/pandas/core/series.py index f17a633259816..c528fa6fda1e2 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2532,7 +2532,8 @@ def idxmin(self, axis: Axis = 0, skipna: bool = True, *args, **kwargs) -> Hashab axis = self._get_axis_number(axis) i = self.argmin(axis, skipna, *args, **kwargs) if i == -1: - return np.nan + # GH#43587 give correct NA value for Index. + return self.index._na_value return self.index[i] def idxmax(self, axis: Axis = 0, skipna: bool = True, *args, **kwargs) -> Hashable: @@ -2602,7 +2603,8 @@ def idxmax(self, axis: Axis = 0, skipna: bool = True, *args, **kwargs) -> Hashab axis = self._get_axis_number(axis) i = self.argmax(axis, skipna, *args, **kwargs) if i == -1: - return np.nan + # GH#43587 give correct NA value for Index. + return self.index._na_value return self.index[i] def round(self, decimals: int = 0, *args, **kwargs) -> Series: diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py index 555d8f1b18797..c5a775fc4a9c4 100644 --- a/pandas/tests/frame/test_reductions.py +++ b/pandas/tests/frame/test_reductions.py @@ -970,6 +970,7 @@ def test_idxmin(self, float_frame, int_frame, skipna, axis): for df in [frame, int_frame]: result = df.idxmin(axis=axis, skipna=skipna) expected = df.apply(Series.idxmin, axis=axis, skipna=skipna) + expected = expected.astype(df.index.dtype) tm.assert_series_equal(result, expected) @pytest.mark.parametrize("axis", [0, 1]) @@ -1010,6 +1011,7 @@ def test_idxmax(self, float_frame, int_frame, skipna, axis): for df in [frame, int_frame]: result = df.idxmax(axis=axis, skipna=skipna) expected = df.apply(Series.idxmax, axis=axis, skipna=skipna) + expected = expected.astype(df.index.dtype) tm.assert_series_equal(result, expected) @pytest.mark.parametrize("axis", [0, 1]) diff --git a/pandas/tests/reductions/test_reductions.py b/pandas/tests/reductions/test_reductions.py index 83b9a83c0a6a2..4c29d464dedd6 100644 --- a/pandas/tests/reductions/test_reductions.py +++ b/pandas/tests/reductions/test_reductions.py @@ -808,6 +808,24 @@ def test_numpy_argmax(self): with pytest.raises(ValueError, match=msg): np.argmax(s, out=data) + def test_idxmin_dt64index(self): + # GH#43587 should have NaT instead of NaN + ser = Series( + [1.0, 2.0, np.nan], index=DatetimeIndex(["NaT", "2015-02-08", "NaT"]) + ) + res = ser.idxmin(skipna=False) + assert res is NaT + res = ser.idxmax(skipna=False) + assert res is NaT + + df = ser.to_frame() + res = df.idxmin(skipna=False) + assert res.dtype == "M8[ns]" + assert res.isna().all() + res = df.idxmax(skipna=False) + assert res.dtype == "M8[ns]" + assert res.isna().all() + def test_idxmin(self): # test idxmin # _check_stat_op approach can not be used here because of isna check.