Skip to content

Commit 8b1f996

Browse files
jbrockmendelukarroum
authored andcommitted
BUG: DataFrame.min/max dt64 with skipna=False (pandas-dev#37425)
1 parent e4015e7 commit 8b1f996

File tree

5 files changed

+71
-24
lines changed

5 files changed

+71
-24
lines changed

doc/source/whatsnew/v1.2.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -404,6 +404,7 @@ Numeric
404404
- Bug in :class:`IntervalArray` comparisons with :class:`Series` not returning :class:`Series` (:issue:`36908`)
405405
- Bug in :class:`DataFrame` allowing arithmetic operations with list of array-likes with undefined results. Behavior changed to raising ``ValueError`` (:issue:`36702`)
406406
- Bug in :meth:`DataFrame.std`` with ``timedelta64`` dtype and ``skipna=False`` (:issue:`37392`)
407+
- Bug in :meth:`DataFrame.min` and :meth:`DataFrame.max` with ``datetime64`` dtype and ``skipna=False`` (:issue:`36907`)
407408

408409
Conversion
409410
^^^^^^^^^^

pandas/compat/numpy/function.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -387,20 +387,20 @@ def validate_resampler_func(method: str, args, kwargs) -> None:
387387
raise TypeError("too many arguments passed in")
388388

389389

390-
def validate_minmax_axis(axis: Optional[int]) -> None:
390+
def validate_minmax_axis(axis: Optional[int], ndim: int = 1) -> None:
391391
"""
392392
Ensure that the axis argument passed to min, max, argmin, or argmax is zero
393393
or None, as otherwise it will be incorrectly ignored.
394394
395395
Parameters
396396
----------
397397
axis : int or None
398+
ndim : int, default 1
398399
399400
Raises
400401
------
401402
ValueError
402403
"""
403-
ndim = 1 # hard-coded for Index
404404
if axis is None:
405405
return
406406
if axis >= ndim or (axis < 0 and ndim + axis < 0):

pandas/core/arrays/datetimelike.py

+33-21
Original file line numberDiff line numberDiff line change
@@ -1264,13 +1264,24 @@ def min(self, axis=None, skipna=True, *args, **kwargs):
12641264
Series.min : Return the minimum value in a Series.
12651265
"""
12661266
nv.validate_min(args, kwargs)
1267-
nv.validate_minmax_axis(axis)
1267+
nv.validate_minmax_axis(axis, self.ndim)
12681268

1269-
result = nanops.nanmin(self.asi8, skipna=skipna, mask=self.isna())
1270-
if isna(result):
1271-
# Period._from_ordinal does not handle np.nan gracefully
1272-
return NaT
1273-
return self._box_func(result)
1269+
if is_period_dtype(self.dtype):
1270+
# pass datetime64 values to nanops to get correct NaT semantics
1271+
result = nanops.nanmin(
1272+
self._ndarray.view("M8[ns]"), axis=axis, skipna=skipna
1273+
)
1274+
if result is NaT:
1275+
return NaT
1276+
result = result.view("i8")
1277+
if axis is None or self.ndim == 1:
1278+
return self._box_func(result)
1279+
return self._from_backing_data(result)
1280+
1281+
result = nanops.nanmin(self._ndarray, axis=axis, skipna=skipna)
1282+
if lib.is_scalar(result):
1283+
return self._box_func(result)
1284+
return self._from_backing_data(result)
12741285

12751286
def max(self, axis=None, skipna=True, *args, **kwargs):
12761287
"""
@@ -1286,23 +1297,24 @@ def max(self, axis=None, skipna=True, *args, **kwargs):
12861297
# TODO: skipna is broken with max.
12871298
# See https://github.com/pandas-dev/pandas/issues/24265
12881299
nv.validate_max(args, kwargs)
1289-
nv.validate_minmax_axis(axis)
1290-
1291-
mask = self.isna()
1292-
if skipna:
1293-
values = self[~mask].asi8
1294-
elif mask.any():
1295-
return NaT
1296-
else:
1297-
values = self.asi8
1300+
nv.validate_minmax_axis(axis, self.ndim)
12981301

1299-
if not len(values):
1300-
# short-circuit for empty max / min
1301-
return NaT
1302+
if is_period_dtype(self.dtype):
1303+
# pass datetime64 values to nanops to get correct NaT semantics
1304+
result = nanops.nanmax(
1305+
self._ndarray.view("M8[ns]"), axis=axis, skipna=skipna
1306+
)
1307+
if result is NaT:
1308+
return result
1309+
result = result.view("i8")
1310+
if axis is None or self.ndim == 1:
1311+
return self._box_func(result)
1312+
return self._from_backing_data(result)
13021313

1303-
result = nanops.nanmax(values, skipna=skipna)
1304-
# Don't have to worry about NA `result`, since no NA went in.
1305-
return self._box_func(result)
1314+
result = nanops.nanmax(self._ndarray, axis=axis, skipna=skipna)
1315+
if lib.is_scalar(result):
1316+
return self._box_func(result)
1317+
return self._from_backing_data(result)
13061318

13071319
def mean(self, skipna=True, axis: Optional[int] = 0):
13081320
"""

pandas/core/nanops.py

+10-1
Original file line numberDiff line numberDiff line change
@@ -340,6 +340,7 @@ def _wrap_results(result, dtype: DtypeObj, fill_value=None):
340340
if result == fill_value:
341341
result = np.nan
342342
if tz is not None:
343+
# we get here e.g. via nanmean when we call it on a DTA[tz]
343344
result = Timestamp(result, tz=tz)
344345
elif isna(result):
345346
result = np.datetime64("NaT", "ns")
@@ -919,10 +920,13 @@ def reduction(
919920
mask: Optional[np.ndarray] = None,
920921
) -> Dtype:
921922

923+
orig_values = values
922924
values, mask, dtype, dtype_max, fill_value = _get_values(
923925
values, skipna, fill_value_typ=fill_value_typ, mask=mask
924926
)
925927

928+
datetimelike = orig_values.dtype.kind in ["m", "M"]
929+
926930
if (axis is not None and values.shape[axis] == 0) or values.size == 0:
927931
try:
928932
result = getattr(values, meth)(axis, dtype=dtype_max)
@@ -933,7 +937,12 @@ def reduction(
933937
result = getattr(values, meth)(axis)
934938

935939
result = _wrap_results(result, dtype, fill_value)
936-
return _maybe_null_out(result, axis, mask, values.shape)
940+
result = _maybe_null_out(result, axis, mask, values.shape)
941+
942+
if datetimelike and not skipna:
943+
result = _mask_datetimelike_result(result, axis, mask, orig_values)
944+
945+
return result
937946

938947
return reduction
939948

pandas/tests/frame/test_analytics.py

+25
Original file line numberDiff line numberDiff line change
@@ -1169,6 +1169,31 @@ def test_min_max_dt64_with_NaT(self):
11691169
exp = Series([pd.NaT], index=["foo"])
11701170
tm.assert_series_equal(res, exp)
11711171

1172+
def test_min_max_dt64_with_NaT_skipna_false(self, tz_naive_fixture):
1173+
# GH#36907
1174+
tz = tz_naive_fixture
1175+
df = DataFrame(
1176+
{
1177+
"a": [
1178+
Timestamp("2020-01-01 08:00:00", tz=tz),
1179+
Timestamp("1920-02-01 09:00:00", tz=tz),
1180+
],
1181+
"b": [Timestamp("2020-02-01 08:00:00", tz=tz), pd.NaT],
1182+
}
1183+
)
1184+
1185+
res = df.min(axis=1, skipna=False)
1186+
expected = Series([df.loc[0, "a"], pd.NaT])
1187+
assert expected.dtype == df["a"].dtype
1188+
1189+
tm.assert_series_equal(res, expected)
1190+
1191+
res = df.max(axis=1, skipna=False)
1192+
expected = Series([df.loc[0, "b"], pd.NaT])
1193+
assert expected.dtype == df["a"].dtype
1194+
1195+
tm.assert_series_equal(res, expected)
1196+
11721197
def test_min_max_dt64_api_consistency_with_NaT(self):
11731198
# Calling the following sum functions returned an error for dataframes but
11741199
# returned NaT for series. These tests check that the API is consistent in

0 commit comments

Comments
 (0)