diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 82cd54182a33d..64e5f78d961d1 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -381,15 +381,15 @@ def sum( nv.validate_sum( (), dict(dtype=dtype, out=out, keepdims=keepdims, initial=initial) ) - if not len(self): - return NaT - if not skipna and self._hasnans: + if not self.size and (self.ndim == 1 or axis is None): return NaT result = nanops.nansum( self._data, axis=axis, skipna=skipna, min_count=min_count ) - return Timedelta(result) + if is_scalar(result): + return Timedelta(result) + return self._from_backing_data(result) def std( self, diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index f2354f649b1e3..83399a87e5667 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -327,7 +327,10 @@ def _na_ok_dtype(dtype: DtypeObj) -> bool: def _wrap_results(result, dtype: DtypeObj, fill_value=None): """ wrap our results if needed """ - if is_datetime64_any_dtype(dtype): + if result is NaT: + pass + + elif is_datetime64_any_dtype(dtype): if fill_value is None: # GH#24293 fill_value = iNaT @@ -498,18 +501,45 @@ def nansum( >>> nanops.nansum(s) 3.0 """ + orig_values = values + values, mask, dtype, dtype_max, _ = _get_values( values, skipna, fill_value=0, mask=mask ) dtype_sum = dtype_max + datetimelike = False if is_float_dtype(dtype): dtype_sum = dtype elif is_timedelta64_dtype(dtype): + datetimelike = True dtype_sum = np.float64 + the_sum = values.sum(axis, dtype=dtype_sum) the_sum = _maybe_null_out(the_sum, axis, mask, values.shape, min_count=min_count) - return _wrap_results(the_sum, dtype) + the_sum = _wrap_results(the_sum, dtype) + if datetimelike and not skipna: + the_sum = _mask_datetimelike_result(the_sum, axis, mask, orig_values) + return the_sum + + +def _mask_datetimelike_result( + result: Union[np.ndarray, np.datetime64, np.timedelta64], + axis: Optional[int], + mask: Optional[np.ndarray], + orig_values: np.ndarray, +): + if mask is None: + mask = isna(orig_values) + if isinstance(result, np.ndarray): + # we need to apply the mask + result = result.astype("i8").view(orig_values.dtype) + axis_mask = mask.any(axis=axis) + result[axis_mask] = iNaT + else: + if mask.any(): + result = NaT + return result @disallow(PeriodDtype) @@ -544,21 +574,25 @@ def nanmean( >>> nanops.nanmean(s) 1.5 """ + orig_values = values + values, mask, dtype, dtype_max, _ = _get_values( values, skipna, fill_value=0, mask=mask ) dtype_sum = dtype_max dtype_count = np.float64 + # not using needs_i8_conversion because that includes period - if ( - is_integer_dtype(dtype) - or is_datetime64_any_dtype(dtype) - or is_timedelta64_dtype(dtype) - ): + datetimelike = False + if dtype.kind in ["m", "M"]: + datetimelike = True + dtype_sum = np.float64 + elif is_integer_dtype(dtype): dtype_sum = np.float64 elif is_float_dtype(dtype): dtype_sum = dtype dtype_count = dtype + count = _get_counts(values.shape, mask, axis, dtype=dtype_count) the_sum = _ensure_numeric(values.sum(axis, dtype=dtype_sum)) @@ -573,7 +607,10 @@ def nanmean( else: the_mean = the_sum / count if count > 0 else np.nan - return _wrap_results(the_mean, dtype) + the_mean = _wrap_results(the_mean, dtype) + if datetimelike and not skipna: + the_mean = _mask_datetimelike_result(the_mean, axis, mask, orig_values) + return the_mean @bottleneck_switch() @@ -639,16 +676,37 @@ def get_median(x): # empty set so return nans of shape "everything but the passed axis" # since "axis" is where the reduction would occur if we had a nonempty # array - shp = np.array(values.shape) - dims = np.arange(values.ndim) - ret = np.empty(shp[dims != axis]) - ret.fill(np.nan) + ret = get_empty_reduction_result(values.shape, axis, np.float_, np.nan) return _wrap_results(ret, dtype) # otherwise return a scalar value return _wrap_results(get_median(values) if notempty else np.nan, dtype) +def get_empty_reduction_result( + shape: Tuple[int, ...], axis: int, dtype: np.dtype, fill_value: Any +) -> np.ndarray: + """ + The result from a reduction on an empty ndarray. + + Parameters + ---------- + shape : Tuple[int] + axis : int + dtype : np.dtype + fill_value : Any + + Returns + ------- + np.ndarray + """ + shp = np.array(shape) + dims = np.arange(len(shape)) + ret = np.empty(shp[dims != axis], dtype=dtype) + ret.fill(fill_value) + return ret + + def _get_counts_nanvar( value_counts: Tuple[int], mask: Optional[np.ndarray], diff --git a/pandas/tests/arrays/test_timedeltas.py b/pandas/tests/arrays/test_timedeltas.py index b3b8f4d55e4de..a09e85010318c 100644 --- a/pandas/tests/arrays/test_timedeltas.py +++ b/pandas/tests/arrays/test_timedeltas.py @@ -251,6 +251,30 @@ def test_npsum(self): assert isinstance(result, pd.Timedelta) assert result == expected + def test_sum_2d_skipna_false(self): + arr = np.arange(8).astype(np.int64).view("m8[s]").astype("m8[ns]").reshape(4, 2) + arr[-1, -1] = "Nat" + + tda = TimedeltaArray(arr) + + result = tda.sum(skipna=False) + assert result is pd.NaT + + result = tda.sum(axis=0, skipna=False) + expected = pd.TimedeltaIndex([pd.Timedelta(seconds=12), pd.NaT])._values + tm.assert_timedelta_array_equal(result, expected) + + result = tda.sum(axis=1, skipna=False) + expected = pd.TimedeltaIndex( + [ + pd.Timedelta(seconds=1), + pd.Timedelta(seconds=5), + pd.Timedelta(seconds=9), + pd.NaT, + ] + )._values + tm.assert_timedelta_array_equal(result, expected) + def test_std(self): tdi = pd.TimedeltaIndex(["0H", "4H", "NaT", "4H", "0H", "2H"]) arr = tdi.array diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index ee136533b0775..a1f45324a920f 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -1273,6 +1273,32 @@ def test_preserve_timezone(self, initial: str, method): tm.assert_series_equal(result, expected) +def test_sum_timedelta64_skipna_false(): + # GH#17235 + arr = np.arange(8).astype(np.int64).view("m8[s]").reshape(4, 2) + arr[-1, -1] = "Nat" + + df = pd.DataFrame(arr) + + result = df.sum(skipna=False) + expected = pd.Series([pd.Timedelta(seconds=12), pd.NaT]) + tm.assert_series_equal(result, expected) + + result = df.sum(axis=0, skipna=False) + tm.assert_series_equal(result, expected) + + result = df.sum(axis=1, skipna=False) + expected = pd.Series( + [ + pd.Timedelta(seconds=1), + pd.Timedelta(seconds=5), + pd.Timedelta(seconds=9), + pd.NaT, + ] + ) + tm.assert_series_equal(result, expected) + + def test_mixed_frame_with_integer_sum(): # https://github.com/pandas-dev/pandas/issues/34520 df = pd.DataFrame([["a", 1]], columns=list("ab")) diff --git a/pandas/tests/test_nanops.py b/pandas/tests/test_nanops.py index c45e4508c6153..da474f2c2978c 100644 --- a/pandas/tests/test_nanops.py +++ b/pandas/tests/test_nanops.py @@ -1005,6 +1005,23 @@ def test_nanmean(self, tz): result = nanops.nanmean(obj) assert result == expected + @pytest.mark.parametrize("dtype", ["M8[ns]", "m8[ns]"]) + def test_nanmean_skipna_false(self, dtype): + arr = np.arange(12).astype(np.int64).view(dtype).reshape(4, 3) + + arr[-1, -1] = "NaT" + + result = nanops.nanmean(arr, skipna=False) + assert result is pd.NaT + + result = nanops.nanmean(arr, axis=0, skipna=False) + expected = np.array([4, 5, "NaT"], dtype=arr.dtype) + tm.assert_numpy_array_equal(result, expected) + + result = nanops.nanmean(arr, axis=1, skipna=False) + expected = np.array([arr[0, 1], arr[1, 1], arr[2, 1], arr[-1, -1]]) + tm.assert_numpy_array_equal(result, expected) + def test_use_bottleneck():