From 6bdb7f4016ca7f9249c2361a3702ab3787753932 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Mon, 17 Jul 2023 09:15:36 -0700
Subject: [PATCH] BUG: idxmin/max dtype

---
 doc/source/whatsnew/v2.1.0.rst             |  1 +
 pandas/core/arrays/categorical.py          |  3 +++
 pandas/core/frame.py                       | 16 +++++++---------
 pandas/core/series.py                      |  6 ++++--
 pandas/tests/frame/test_reductions.py      |  2 ++
 pandas/tests/reductions/test_reductions.py | 18 ++++++++++++++++++
 6 files changed, 35 insertions(+), 11 deletions(-)

diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst
index b7cc254d5c7e5..812a790e945f4 100644
--- a/doc/source/whatsnew/v2.1.0.rst
+++ b/doc/source/whatsnew/v2.1.0.rst
@@ -497,6 +497,7 @@ Missing
 ^^^^^^^
 - Bug in :meth:`DataFrame.interpolate` failing to fill across multiblock data when ``method`` is "pad", "ffill", "bfill", or "backfill" (:issue:`53898`)
 - Bug in :meth:`DataFrame.interpolate` ignoring ``inplace`` when :class:`DataFrame` is empty (:issue:`53199`)
+- Bug in :meth:`Series.idxmin`, :meth:`Series.idxmax`, :meth:`DataFrame.idxmin`, :meth:`DataFrame.idxmax` with a :class:`DatetimeIndex` index containing ``NaT`` incorrectly returning ``NaN`` instead of ``NaT`` (:issue:`43587`)
 - Bug in :meth:`Series.interpolate` and :meth:`DataFrame.interpolate` failing to raise on invalid ``downcast`` keyword, which can be only ``None`` or "infer" (:issue:`53103`)
 - Bug in :meth:`Series.interpolate` and :meth:`DataFrame.interpolate` with complex dtype incorrectly failing to fill ``NaN`` entries (:issue:`53635`)
 -
diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index 33cd5fe147d2e..8eef81bc65f20 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -2323,6 +2323,9 @@ def _reduce(
         self, name: str, *, skipna: bool = True, keepdims: bool = False, **kwargs
     ):
         result = super()._reduce(name, skipna=skipna, keepdims=keepdims, **kwargs)
+        if name in ["argmax", "argmin"]:
+            # don't wrap in Categorical!
+            return result
         if keepdims:
             return type(self)(result, dtype=self.dtype)
         else:
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index a469f4965117b..317fe9e56d847 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -11212,13 +11212,12 @@ def idxmin(
             nanops.nanargmin, "argmin", axis=axis, skipna=skipna, numeric_only=False
         )
         indices = res._values
-
-        # indices will always be 1d array since axis is not None and
-        # values is a 2d array for DataFrame
         # indices will always be np.ndarray since axis is not N
 
         index = data._get_axis(axis)
-        result = [index[i] if i >= 0 else np.nan for i in indices]
+        result = algorithms.take(
+            index._values, indices, allow_fill=True, fill_value=index._na_value
+        )
         final_result = data._constructor_sliced(result, index=data._get_agg_axis(axis))
         return final_result.__finalize__(self, method="idxmin")
 
@@ -11241,13 +11240,12 @@ def idxmax(
             nanops.nanargmax, "argmax", axis=axis, skipna=skipna, numeric_only=False
         )
         indices = res._values
-
-        # indices will always be 1d array since axis is not None and
-        # values is a 2d array for DataFrame
-        assert isinstance(indices, (np.ndarray, ExtensionArray))  # for mypy
+        # indices will always be 1d array since axis is not None
 
         index = data._get_axis(axis)
-        result = [index[i] if i >= 0 else np.nan for i in indices]
+        result = algorithms.take(
+            index._values, indices, allow_fill=True, fill_value=index._na_value
+        )
         final_result = data._constructor_sliced(result, index=data._get_agg_axis(axis))
         return final_result.__finalize__(self, method="idxmax")
 
diff --git a/pandas/core/series.py b/pandas/core/series.py
index f17a633259816..c528fa6fda1e2 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -2532,7 +2532,8 @@ def idxmin(self, axis: Axis = 0, skipna: bool = True, *args, **kwargs) -> Hashab
         axis = self._get_axis_number(axis)
         i = self.argmin(axis, skipna, *args, **kwargs)
         if i == -1:
-            return np.nan
+            # GH#43587 give correct NA value for Index.
+            return self.index._na_value
         return self.index[i]
 
     def idxmax(self, axis: Axis = 0, skipna: bool = True, *args, **kwargs) -> Hashable:
@@ -2602,7 +2603,8 @@ def idxmax(self, axis: Axis = 0, skipna: bool = True, *args, **kwargs) -> Hashab
         axis = self._get_axis_number(axis)
         i = self.argmax(axis, skipna, *args, **kwargs)
         if i == -1:
-            return np.nan
+            # GH#43587 give correct NA value for Index.
+            return self.index._na_value
         return self.index[i]
 
     def round(self, decimals: int = 0, *args, **kwargs) -> Series:
diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py
index 555d8f1b18797..c5a775fc4a9c4 100644
--- a/pandas/tests/frame/test_reductions.py
+++ b/pandas/tests/frame/test_reductions.py
@@ -970,6 +970,7 @@ def test_idxmin(self, float_frame, int_frame, skipna, axis):
         for df in [frame, int_frame]:
             result = df.idxmin(axis=axis, skipna=skipna)
             expected = df.apply(Series.idxmin, axis=axis, skipna=skipna)
+            expected = expected.astype(df.index.dtype)
             tm.assert_series_equal(result, expected)
 
     @pytest.mark.parametrize("axis", [0, 1])
@@ -1010,6 +1011,7 @@ def test_idxmax(self, float_frame, int_frame, skipna, axis):
         for df in [frame, int_frame]:
             result = df.idxmax(axis=axis, skipna=skipna)
             expected = df.apply(Series.idxmax, axis=axis, skipna=skipna)
+            expected = expected.astype(df.index.dtype)
             tm.assert_series_equal(result, expected)
 
     @pytest.mark.parametrize("axis", [0, 1])
diff --git a/pandas/tests/reductions/test_reductions.py b/pandas/tests/reductions/test_reductions.py
index 83b9a83c0a6a2..4c29d464dedd6 100644
--- a/pandas/tests/reductions/test_reductions.py
+++ b/pandas/tests/reductions/test_reductions.py
@@ -808,6 +808,24 @@ def test_numpy_argmax(self):
         with pytest.raises(ValueError, match=msg):
             np.argmax(s, out=data)
 
+    def test_idxmin_dt64index(self):
+        # GH#43587 should have NaT instead of NaN
+        ser = Series(
+            [1.0, 2.0, np.nan], index=DatetimeIndex(["NaT", "2015-02-08", "NaT"])
+        )
+        res = ser.idxmin(skipna=False)
+        assert res is NaT
+        res = ser.idxmax(skipna=False)
+        assert res is NaT
+
+        df = ser.to_frame()
+        res = df.idxmin(skipna=False)
+        assert res.dtype == "M8[ns]"
+        assert res.isna().all()
+        res = df.idxmax(skipna=False)
+        assert res.dtype == "M8[ns]"
+        assert res.isna().all()
+
     def test_idxmin(self):
         # test idxmin
         # _check_stat_op approach can not be used here because of isna check.