Skip to content

BUG: DataFrame[td64].sum(skipna=False) #37148

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Oct 24, 2020
8 changes: 4 additions & 4 deletions pandas/core/arrays/timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -381,15 +381,15 @@ def sum(
nv.validate_sum(
(), dict(dtype=dtype, out=out, keepdims=keepdims, initial=initial)
)
if not len(self):
return NaT
if not skipna and self._hasnans:
if not self.size and (self.ndim == 1 or axis is None):
return NaT

result = nanops.nansum(
self._data, axis=axis, skipna=skipna, min_count=min_count
)
return Timedelta(result)
if is_scalar(result):
return Timedelta(result)
return self._from_backing_data(result)

def std(
self,
Expand Down
66 changes: 54 additions & 12 deletions pandas/core/nanops.py
Original file line number Diff line number Diff line change
Expand Up @@ -327,7 +327,10 @@ def _na_ok_dtype(dtype: DtypeObj) -> bool:

def _wrap_results(result, dtype: DtypeObj, fill_value=None):
""" wrap our results if needed """
if is_datetime64_any_dtype(dtype):
if result is NaT:
pass

elif is_datetime64_any_dtype(dtype):
if fill_value is None:
# GH#24293
fill_value = iNaT
Expand Down Expand Up @@ -498,18 +501,40 @@ def nansum(
>>> nanops.nansum(s)
3.0
"""
orig_values = values

values, mask, dtype, dtype_max, _ = _get_values(
values, skipna, fill_value=0, mask=mask
)
dtype_sum = dtype_max
datetimelike = False
if is_float_dtype(dtype):
dtype_sum = dtype
elif is_timedelta64_dtype(dtype):
datetimelike = True
dtype_sum = np.float64
if mask is None and not skipna:
mask = isna(orig_values)

the_sum = values.sum(axis, dtype=dtype_sum)
the_sum = _maybe_null_out(the_sum, axis, mask, values.shape, min_count=min_count)

return _wrap_results(the_sum, dtype)
the_sum = _wrap_results(the_sum, dtype)
if datetimelike and not skipna:
the_sum = mask_datetimelike_result(the_sum, axis, mask, orig_values.dtype)
return the_sum


def mask_datetimelike_result(result, axis, mask, orig_dtype):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you type

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

will do

if isinstance(result, np.ndarray):
# we need to apply the mask
result = result.astype("i8").view(orig_dtype)
axis_mask = mask.any(axis=axis)
result[axis_mask] = iNaT
else:
if mask.any():
result = NaT
return result


@disallow(PeriodDtype)
Expand Down Expand Up @@ -544,21 +569,27 @@ def nanmean(
>>> nanops.nanmean(s)
1.5
"""
orig_values = values

values, mask, dtype, dtype_max, _ = _get_values(
values, skipna, fill_value=0, mask=mask
)
dtype_sum = dtype_max
dtype_count = np.float64

# not using needs_i8_conversion because that includes period
if (
is_integer_dtype(dtype)
or is_datetime64_any_dtype(dtype)
or is_timedelta64_dtype(dtype)
):
datetimelike = False
if dtype.kind in ["m", "M"]:
datetimelike = True
if mask is None and not skipna:
mask = isna(orig_values)
dtype_sum = np.float64
elif is_integer_dtype(dtype):
dtype_sum = np.float64
elif is_float_dtype(dtype):
dtype_sum = dtype
dtype_count = dtype

count = _get_counts(values.shape, mask, axis, dtype=dtype_count)
the_sum = _ensure_numeric(values.sum(axis, dtype=dtype_sum))

Expand All @@ -573,7 +604,10 @@ def nanmean(
else:
the_mean = the_sum / count if count > 0 else np.nan

return _wrap_results(the_mean, dtype)
the_mean = _wrap_results(the_mean, dtype)
if datetimelike and not skipna:
the_mean = mask_datetimelike_result(the_mean, axis, mask, orig_values.dtype)
return the_mean


@bottleneck_switch()
Expand Down Expand Up @@ -639,16 +673,24 @@ def get_median(x):
# empty set so return nans of shape "everything but the passed axis"
# since "axis" is where the reduction would occur if we had a nonempty
# array
shp = np.array(values.shape)
dims = np.arange(values.ndim)
ret = np.empty(shp[dims != axis])
ret.fill(np.nan)
ret = get_empty_reduction_result(values.shape, axis, np.float_, np.nan)
return _wrap_results(ret, dtype)

# otherwise return a scalar value
return _wrap_results(get_median(values) if notempty else np.nan, dtype)


def get_empty_reduction_result(shape, axis: int, dtype, fill_value) -> np.ndarray:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you type dtype, fill_value

"""
The result from a reduction on an empty ndarray.
"""
shp = np.array(shape)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you add Parametes to the doc-string

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sure, just pushed

dims = np.arange(len(shape))
ret = np.empty(shp[dims != axis], dtype=dtype)
ret.fill(fill_value)
return ret


def _get_counts_nanvar(
value_counts: Tuple[int],
mask: Optional[np.ndarray],
Expand Down
24 changes: 24 additions & 0 deletions pandas/tests/arrays/test_timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -251,6 +251,30 @@ def test_npsum(self):
assert isinstance(result, pd.Timedelta)
assert result == expected

def test_sum_2d_skipna_false(self):
arr = np.arange(8).astype(np.int64).view("m8[s]").astype("m8[ns]").reshape(4, 2)
arr[-1, -1] = "Nat"

tda = TimedeltaArray(arr)

result = tda.sum(skipna=False)
assert result is pd.NaT

result = tda.sum(axis=0, skipna=False)
expected = pd.TimedeltaIndex([pd.Timedelta(seconds=12), pd.NaT])._values
tm.assert_timedelta_array_equal(result, expected)

result = tda.sum(axis=1, skipna=False)
expected = pd.TimedeltaIndex(
[
pd.Timedelta(seconds=1),
pd.Timedelta(seconds=5),
pd.Timedelta(seconds=9),
pd.NaT,
]
)._values
tm.assert_timedelta_array_equal(result, expected)

def test_std(self):
tdi = pd.TimedeltaIndex(["0H", "4H", "NaT", "4H", "0H", "2H"])
arr = tdi.array
Expand Down
26 changes: 26 additions & 0 deletions pandas/tests/frame/test_analytics.py
Original file line number Diff line number Diff line change
Expand Up @@ -1273,6 +1273,32 @@ def test_preserve_timezone(self, initial: str, method):
tm.assert_series_equal(result, expected)


def test_sum_timedelta64_skipna_false():
# GH#17235
arr = np.arange(8).astype(np.int64).view("m8[s]").reshape(4, 2)
arr[-1, -1] = "Nat"

df = pd.DataFrame(arr)

result = df.sum(skipna=False)
expected = pd.Series([pd.Timedelta(seconds=12), pd.NaT])
tm.assert_series_equal(result, expected)

result = df.sum(axis=0, skipna=False)
tm.assert_series_equal(result, expected)

result = df.sum(axis=1, skipna=False)
expected = pd.Series(
[
pd.Timedelta(seconds=1),
pd.Timedelta(seconds=5),
pd.Timedelta(seconds=9),
pd.NaT,
]
)
tm.assert_series_equal(result, expected)


def test_mixed_frame_with_integer_sum():
# https://github.com/pandas-dev/pandas/issues/34520
df = pd.DataFrame([["a", 1]], columns=list("ab"))
Expand Down
17 changes: 17 additions & 0 deletions pandas/tests/test_nanops.py
Original file line number Diff line number Diff line change
Expand Up @@ -1005,6 +1005,23 @@ def test_nanmean(self, tz):
result = nanops.nanmean(obj)
assert result == expected

@pytest.mark.parametrize("dtype", ["M8[ns]", "m8[ns]"])
def test_nanmean_skipna_false(self, dtype):
arr = np.arange(12).astype(np.int64).view(dtype).reshape(4, 3)

arr[-1, -1] = "NaT"

result = nanops.nanmean(arr, skipna=False)
assert result is pd.NaT

result = nanops.nanmean(arr, axis=0, skipna=False)
expected = np.array([4, 5, "NaT"], dtype=arr.dtype)
tm.assert_numpy_array_equal(result, expected)

result = nanops.nanmean(arr, axis=1, skipna=False)
expected = np.array([arr[0, 1], arr[1, 1], arr[2, 1], arr[-1, -1]])
tm.assert_numpy_array_equal(result, expected)


def test_use_bottleneck():

Expand Down