Skip to content

Commit 5859c15

Browse files
jbrockmendelJulianWgs
authored andcommitted
BUG: DataFrame[td64].sum(skipna=False) (pandas-dev#37148)
* BUG: DataFrame[td64].sum(skipna=False) * annotate, privatize * annotate * calculate mask in mask_datetimelike_result
1 parent ad00bfc commit 5859c15

File tree

5 files changed

+141
-16
lines changed

5 files changed

+141
-16
lines changed

pandas/core/arrays/timedeltas.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -381,15 +381,15 @@ def sum(
381381
nv.validate_sum(
382382
(), dict(dtype=dtype, out=out, keepdims=keepdims, initial=initial)
383383
)
384-
if not len(self):
385-
return NaT
386-
if not skipna and self._hasnans:
384+
if not self.size and (self.ndim == 1 or axis is None):
387385
return NaT
388386

389387
result = nanops.nansum(
390388
self._data, axis=axis, skipna=skipna, min_count=min_count
391389
)
392-
return Timedelta(result)
390+
if is_scalar(result):
391+
return Timedelta(result)
392+
return self._from_backing_data(result)
393393

394394
def std(
395395
self,

pandas/core/nanops.py

+70-12
Original file line numberDiff line numberDiff line change
@@ -327,7 +327,10 @@ def _na_ok_dtype(dtype: DtypeObj) -> bool:
327327

328328
def _wrap_results(result, dtype: DtypeObj, fill_value=None):
329329
""" wrap our results if needed """
330-
if is_datetime64_any_dtype(dtype):
330+
if result is NaT:
331+
pass
332+
333+
elif is_datetime64_any_dtype(dtype):
331334
if fill_value is None:
332335
# GH#24293
333336
fill_value = iNaT
@@ -498,18 +501,45 @@ def nansum(
498501
>>> nanops.nansum(s)
499502
3.0
500503
"""
504+
orig_values = values
505+
501506
values, mask, dtype, dtype_max, _ = _get_values(
502507
values, skipna, fill_value=0, mask=mask
503508
)
504509
dtype_sum = dtype_max
510+
datetimelike = False
505511
if is_float_dtype(dtype):
506512
dtype_sum = dtype
507513
elif is_timedelta64_dtype(dtype):
514+
datetimelike = True
508515
dtype_sum = np.float64
516+
509517
the_sum = values.sum(axis, dtype=dtype_sum)
510518
the_sum = _maybe_null_out(the_sum, axis, mask, values.shape, min_count=min_count)
511519

512-
return _wrap_results(the_sum, dtype)
520+
the_sum = _wrap_results(the_sum, dtype)
521+
if datetimelike and not skipna:
522+
the_sum = _mask_datetimelike_result(the_sum, axis, mask, orig_values)
523+
return the_sum
524+
525+
526+
def _mask_datetimelike_result(
527+
result: Union[np.ndarray, np.datetime64, np.timedelta64],
528+
axis: Optional[int],
529+
mask: Optional[np.ndarray],
530+
orig_values: np.ndarray,
531+
):
532+
if mask is None:
533+
mask = isna(orig_values)
534+
if isinstance(result, np.ndarray):
535+
# we need to apply the mask
536+
result = result.astype("i8").view(orig_values.dtype)
537+
axis_mask = mask.any(axis=axis)
538+
result[axis_mask] = iNaT
539+
else:
540+
if mask.any():
541+
result = NaT
542+
return result
513543

514544

515545
@disallow(PeriodDtype)
@@ -544,21 +574,25 @@ def nanmean(
544574
>>> nanops.nanmean(s)
545575
1.5
546576
"""
577+
orig_values = values
578+
547579
values, mask, dtype, dtype_max, _ = _get_values(
548580
values, skipna, fill_value=0, mask=mask
549581
)
550582
dtype_sum = dtype_max
551583
dtype_count = np.float64
584+
552585
# not using needs_i8_conversion because that includes period
553-
if (
554-
is_integer_dtype(dtype)
555-
or is_datetime64_any_dtype(dtype)
556-
or is_timedelta64_dtype(dtype)
557-
):
586+
datetimelike = False
587+
if dtype.kind in ["m", "M"]:
588+
datetimelike = True
589+
dtype_sum = np.float64
590+
elif is_integer_dtype(dtype):
558591
dtype_sum = np.float64
559592
elif is_float_dtype(dtype):
560593
dtype_sum = dtype
561594
dtype_count = dtype
595+
562596
count = _get_counts(values.shape, mask, axis, dtype=dtype_count)
563597
the_sum = _ensure_numeric(values.sum(axis, dtype=dtype_sum))
564598

@@ -573,7 +607,10 @@ def nanmean(
573607
else:
574608
the_mean = the_sum / count if count > 0 else np.nan
575609

576-
return _wrap_results(the_mean, dtype)
610+
the_mean = _wrap_results(the_mean, dtype)
611+
if datetimelike and not skipna:
612+
the_mean = _mask_datetimelike_result(the_mean, axis, mask, orig_values)
613+
return the_mean
577614

578615

579616
@bottleneck_switch()
@@ -639,16 +676,37 @@ def get_median(x):
639676
# empty set so return nans of shape "everything but the passed axis"
640677
# since "axis" is where the reduction would occur if we had a nonempty
641678
# array
642-
shp = np.array(values.shape)
643-
dims = np.arange(values.ndim)
644-
ret = np.empty(shp[dims != axis])
645-
ret.fill(np.nan)
679+
ret = get_empty_reduction_result(values.shape, axis, np.float_, np.nan)
646680
return _wrap_results(ret, dtype)
647681

648682
# otherwise return a scalar value
649683
return _wrap_results(get_median(values) if notempty else np.nan, dtype)
650684

651685

686+
def get_empty_reduction_result(
687+
shape: Tuple[int, ...], axis: int, dtype: np.dtype, fill_value: Any
688+
) -> np.ndarray:
689+
"""
690+
The result from a reduction on an empty ndarray.
691+
692+
Parameters
693+
----------
694+
shape : Tuple[int]
695+
axis : int
696+
dtype : np.dtype
697+
fill_value : Any
698+
699+
Returns
700+
-------
701+
np.ndarray
702+
"""
703+
shp = np.array(shape)
704+
dims = np.arange(len(shape))
705+
ret = np.empty(shp[dims != axis], dtype=dtype)
706+
ret.fill(fill_value)
707+
return ret
708+
709+
652710
def _get_counts_nanvar(
653711
value_counts: Tuple[int],
654712
mask: Optional[np.ndarray],

pandas/tests/arrays/test_timedeltas.py

+24
Original file line numberDiff line numberDiff line change
@@ -251,6 +251,30 @@ def test_npsum(self):
251251
assert isinstance(result, pd.Timedelta)
252252
assert result == expected
253253

254+
def test_sum_2d_skipna_false(self):
255+
arr = np.arange(8).astype(np.int64).view("m8[s]").astype("m8[ns]").reshape(4, 2)
256+
arr[-1, -1] = "Nat"
257+
258+
tda = TimedeltaArray(arr)
259+
260+
result = tda.sum(skipna=False)
261+
assert result is pd.NaT
262+
263+
result = tda.sum(axis=0, skipna=False)
264+
expected = pd.TimedeltaIndex([pd.Timedelta(seconds=12), pd.NaT])._values
265+
tm.assert_timedelta_array_equal(result, expected)
266+
267+
result = tda.sum(axis=1, skipna=False)
268+
expected = pd.TimedeltaIndex(
269+
[
270+
pd.Timedelta(seconds=1),
271+
pd.Timedelta(seconds=5),
272+
pd.Timedelta(seconds=9),
273+
pd.NaT,
274+
]
275+
)._values
276+
tm.assert_timedelta_array_equal(result, expected)
277+
254278
def test_std(self):
255279
tdi = pd.TimedeltaIndex(["0H", "4H", "NaT", "4H", "0H", "2H"])
256280
arr = tdi.array

pandas/tests/frame/test_analytics.py

+26
Original file line numberDiff line numberDiff line change
@@ -1228,6 +1228,32 @@ def test_frame_any_with_timedelta(self):
12281228
tm.assert_series_equal(result, expected)
12291229

12301230

1231+
def test_sum_timedelta64_skipna_false():
1232+
# GH#17235
1233+
arr = np.arange(8).astype(np.int64).view("m8[s]").reshape(4, 2)
1234+
arr[-1, -1] = "Nat"
1235+
1236+
df = pd.DataFrame(arr)
1237+
1238+
result = df.sum(skipna=False)
1239+
expected = pd.Series([pd.Timedelta(seconds=12), pd.NaT])
1240+
tm.assert_series_equal(result, expected)
1241+
1242+
result = df.sum(axis=0, skipna=False)
1243+
tm.assert_series_equal(result, expected)
1244+
1245+
result = df.sum(axis=1, skipna=False)
1246+
expected = pd.Series(
1247+
[
1248+
pd.Timedelta(seconds=1),
1249+
pd.Timedelta(seconds=5),
1250+
pd.Timedelta(seconds=9),
1251+
pd.NaT,
1252+
]
1253+
)
1254+
tm.assert_series_equal(result, expected)
1255+
1256+
12311257
def test_mixed_frame_with_integer_sum():
12321258
# https://github.com/pandas-dev/pandas/issues/34520
12331259
df = DataFrame([["a", 1]], columns=list("ab"))

pandas/tests/test_nanops.py

+17
Original file line numberDiff line numberDiff line change
@@ -1005,6 +1005,23 @@ def test_nanmean(self, tz):
10051005
result = nanops.nanmean(obj)
10061006
assert result == expected
10071007

1008+
@pytest.mark.parametrize("dtype", ["M8[ns]", "m8[ns]"])
1009+
def test_nanmean_skipna_false(self, dtype):
1010+
arr = np.arange(12).astype(np.int64).view(dtype).reshape(4, 3)
1011+
1012+
arr[-1, -1] = "NaT"
1013+
1014+
result = nanops.nanmean(arr, skipna=False)
1015+
assert result is pd.NaT
1016+
1017+
result = nanops.nanmean(arr, axis=0, skipna=False)
1018+
expected = np.array([4, 5, "NaT"], dtype=arr.dtype)
1019+
tm.assert_numpy_array_equal(result, expected)
1020+
1021+
result = nanops.nanmean(arr, axis=1, skipna=False)
1022+
expected = np.array([arr[0, 1], arr[1, 1], arr[2, 1], arr[-1, -1]])
1023+
tm.assert_numpy_array_equal(result, expected)
1024+
10081025

10091026
def test_use_bottleneck():
10101027

0 commit comments

Comments
 (0)