Skip to content

BUG: idxmin/max dtype #54166

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jul 18, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -502,6 +502,7 @@ Missing
^^^^^^^
- Bug in :meth:`DataFrame.interpolate` failing to fill across multiblock data when ``method`` is "pad", "ffill", "bfill", or "backfill" (:issue:`53898`)
- Bug in :meth:`DataFrame.interpolate` ignoring ``inplace`` when :class:`DataFrame` is empty (:issue:`53199`)
- Bug in :meth:`Series.idxmin`, :meth:`Series.idxmax`, :meth:`DataFrame.idxmin`, :meth:`DataFrame.idxmax` with a :class:`DatetimeIndex` index containing ``NaT`` incorrectly returning ``NaN`` instead of ``NaT`` (:issue:`43587`)
- Bug in :meth:`Series.interpolate` and :meth:`DataFrame.interpolate` failing to raise on invalid ``downcast`` keyword, which can be only ``None`` or "infer" (:issue:`53103`)
- Bug in :meth:`Series.interpolate` and :meth:`DataFrame.interpolate` with complex dtype incorrectly failing to fill ``NaN`` entries (:issue:`53635`)
-
Expand Down
3 changes: 3 additions & 0 deletions pandas/core/arrays/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -2323,6 +2323,9 @@ def _reduce(
self, name: str, *, skipna: bool = True, keepdims: bool = False, **kwargs
):
result = super()._reduce(name, skipna=skipna, keepdims=keepdims, **kwargs)
if name in ["argmax", "argmin"]:
# don't wrap in Categorical!
return result
if keepdims:
return type(self)(result, dtype=self.dtype)
else:
Expand Down
16 changes: 7 additions & 9 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -11235,13 +11235,12 @@ def idxmin(
nanops.nanargmin, "argmin", axis=axis, skipna=skipna, numeric_only=False
)
indices = res._values

# indices will always be 1d array since axis is not None and
# values is a 2d array for DataFrame
# indices will always be np.ndarray since axis is not N

index = data._get_axis(axis)
result = [index[i] if i >= 0 else np.nan for i in indices]
result = algorithms.take(
index._values, indices, allow_fill=True, fill_value=index._na_value
)
final_result = data._constructor_sliced(result, index=data._get_agg_axis(axis))
return final_result.__finalize__(self, method="idxmin")

Expand All @@ -11264,13 +11263,12 @@ def idxmax(
nanops.nanargmax, "argmax", axis=axis, skipna=skipna, numeric_only=False
)
indices = res._values

# indices will always be 1d array since axis is not None and
# values is a 2d array for DataFrame
assert isinstance(indices, (np.ndarray, ExtensionArray)) # for mypy
# indices will always be 1d array since axis is not None

index = data._get_axis(axis)
result = [index[i] if i >= 0 else np.nan for i in indices]
result = algorithms.take(
index._values, indices, allow_fill=True, fill_value=index._na_value
)
final_result = data._constructor_sliced(result, index=data._get_agg_axis(axis))
return final_result.__finalize__(self, method="idxmax")

Expand Down
6 changes: 4 additions & 2 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -2532,7 +2532,8 @@ def idxmin(self, axis: Axis = 0, skipna: bool = True, *args, **kwargs) -> Hashab
axis = self._get_axis_number(axis)
i = self.argmin(axis, skipna, *args, **kwargs)
if i == -1:
return np.nan
# GH#43587 give correct NA value for Index.
return self.index._na_value
return self.index[i]

def idxmax(self, axis: Axis = 0, skipna: bool = True, *args, **kwargs) -> Hashable:
Expand Down Expand Up @@ -2602,7 +2603,8 @@ def idxmax(self, axis: Axis = 0, skipna: bool = True, *args, **kwargs) -> Hashab
axis = self._get_axis_number(axis)
i = self.argmax(axis, skipna, *args, **kwargs)
if i == -1:
return np.nan
# GH#43587 give correct NA value for Index.
return self.index._na_value
return self.index[i]

def round(self, decimals: int = 0, *args, **kwargs) -> Series:
Expand Down
2 changes: 2 additions & 0 deletions pandas/tests/frame/test_reductions.py
Original file line number Diff line number Diff line change
Expand Up @@ -970,6 +970,7 @@ def test_idxmin(self, float_frame, int_frame, skipna, axis):
for df in [frame, int_frame]:
result = df.idxmin(axis=axis, skipna=skipna)
expected = df.apply(Series.idxmin, axis=axis, skipna=skipna)
expected = expected.astype(df.index.dtype)
tm.assert_series_equal(result, expected)

@pytest.mark.parametrize("axis", [0, 1])
Expand Down Expand Up @@ -1009,6 +1010,7 @@ def test_idxmax(self, float_frame, int_frame, skipna, axis):
for df in [frame, int_frame]:
result = df.idxmax(axis=axis, skipna=skipna)
expected = df.apply(Series.idxmax, axis=axis, skipna=skipna)
expected = expected.astype(df.index.dtype)
tm.assert_series_equal(result, expected)

@pytest.mark.parametrize("axis", [0, 1])
Expand Down
18 changes: 18 additions & 0 deletions pandas/tests/reductions/test_reductions.py
Original file line number Diff line number Diff line change
Expand Up @@ -809,6 +809,24 @@ def test_numpy_argmax(self):
with pytest.raises(ValueError, match=msg):
np.argmax(s, out=data)

def test_idxmin_dt64index(self):
# GH#43587 should have NaT instead of NaN
ser = Series(
[1.0, 2.0, np.nan], index=DatetimeIndex(["NaT", "2015-02-08", "NaT"])
)
res = ser.idxmin(skipna=False)
assert res is NaT
res = ser.idxmax(skipna=False)
assert res is NaT

df = ser.to_frame()
res = df.idxmin(skipna=False)
assert res.dtype == "M8[ns]"
assert res.isna().all()
res = df.idxmax(skipna=False)
assert res.dtype == "M8[ns]"
assert res.isna().all()

def test_idxmin(self):
# test idxmin
# _check_stat_op approach can not be used here because of isna check.
Expand Down