From f6e7b53f9853c29fdc7775a1b06f4f559849ae04 Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 24 Oct 2020 17:34:06 -0700 Subject: [PATCH 1/3] BUG: DataFrame.std(skipna=False) with td64 dtype --- doc/source/whatsnew/v1.2.0.rst | 1 + pandas/core/nanops.py | 2 +- pandas/tests/arrays/test_timedeltas.py | 12 ++++++++++-- pandas/tests/frame/test_analytics.py | 15 +++++++++++++++ 4 files changed, 27 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index a9b4ad2e5374a..422bbbf14c791 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -402,6 +402,7 @@ Numeric - Bug in :class:`DataFrame` arithmetic ops incorrectly accepting keyword arguments (:issue:`36843`) - Bug in :class:`IntervalArray` comparisons with :class:`Series` not returning :class:`Series` (:issue:`36908`) - Bug in :class:`DataFrame` allowing arithmetic operations with list of array-likes with undefined results. Behavior changed to raising ``ValueError`` (:issue:`36702`) +- Bug in :meth:`DataFrame.std`` with ``timedelta64`` dtype and ``skipna=False`` (:issue:`??`) Conversion ^^^^^^^^^^ diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index 83399a87e5667..b101da196fdd8 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -228,7 +228,7 @@ def _maybe_get_mask( # Boolean data cannot contain nulls, so signal via mask being None return None - if skipna: + if skipna or needs_i8_conversion(values.dtype): mask = isna(values) return mask diff --git a/pandas/tests/arrays/test_timedeltas.py b/pandas/tests/arrays/test_timedeltas.py index 976f5d0c90f19..f67a554a435ef 100644 --- a/pandas/tests/arrays/test_timedeltas.py +++ b/pandas/tests/arrays/test_timedeltas.py @@ -3,6 +3,7 @@ import pandas as pd import pandas._testing as tm +from pandas.core import nanops from pandas.core.arrays import TimedeltaArray @@ -288,12 +289,19 @@ def test_std(self): assert isinstance(result, pd.Timedelta) assert result == expected + result = nanops.nanstd(np.asarray(arr), skipna=True) + assert isinstance(result, pd.Timedelta) + assert result == expected + result = arr.std(skipna=False) assert result is pd.NaT result = tdi.std(skipna=False) assert result is pd.NaT + result = nanops.nanstd(np.asarray(arr), skipna=False) + assert result is pd.NaT + def test_median(self): tdi = pd.TimedeltaIndex(["0H", "3H", "NaT", "5H06m", "0H", "2H"]) arr = tdi.array @@ -307,8 +315,8 @@ def test_median(self): assert isinstance(result, pd.Timedelta) assert result == expected - result = arr.std(skipna=False) + result = arr.median(skipna=False) assert result is pd.NaT - result = tdi.std(skipna=False) + result = tdi.median(skipna=False) assert result is pd.NaT diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index ddca67306d804..f030ef272a631 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -753,6 +753,21 @@ def test_operators_timedelta64(self): assert df["off1"].dtype == "timedelta64[ns]" assert df["off2"].dtype == "timedelta64[ns]" + def test_std_timedelta64_skipna_false(self): + tdi = pd.timedelta_range("1 Day", periods=10) + df = DataFrame({"A": tdi, "B": tdi}) + df.iloc[-2, -1] = pd.NaT + + result = df.std(skipna=False) + expected = Series( + [df["A"].std(), pd.NaT], index=["A", "B"], dtype="timedelta64[ns]" + ) + tm.assert_series_equal(result, expected) + + result = df.std(axis=1, skipna=False) + expected = Series([pd.Timedelta(0)] * 8 + [pd.NaT, pd.Timedelta(0)]) + tm.assert_series_equal(result, expected) + def test_sum_corner(self): empty_frame = DataFrame() From 384e3e8fae74f8ca6729d1d82ff8cda2d23687f6 Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 24 Oct 2020 17:37:06 -0700 Subject: [PATCH 2/3] GH ref --- doc/source/whatsnew/v1.2.0.rst | 2 +- pandas/tests/frame/test_analytics.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 422bbbf14c791..f54fa9d98a592 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -402,7 +402,7 @@ Numeric - Bug in :class:`DataFrame` arithmetic ops incorrectly accepting keyword arguments (:issue:`36843`) - Bug in :class:`IntervalArray` comparisons with :class:`Series` not returning :class:`Series` (:issue:`36908`) - Bug in :class:`DataFrame` allowing arithmetic operations with list of array-likes with undefined results. Behavior changed to raising ``ValueError`` (:issue:`36702`) -- Bug in :meth:`DataFrame.std`` with ``timedelta64`` dtype and ``skipna=False`` (:issue:`??`) +- Bug in :meth:`DataFrame.std`` with ``timedelta64`` dtype and ``skipna=False`` (:issue:`37392`) Conversion ^^^^^^^^^^ diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index f030ef272a631..ee4da37ce10f3 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -754,6 +754,7 @@ def test_operators_timedelta64(self): assert df["off2"].dtype == "timedelta64[ns]" def test_std_timedelta64_skipna_false(self): + # GH#37392 tdi = pd.timedelta_range("1 Day", periods=10) df = DataFrame({"A": tdi, "B": tdi}) df.iloc[-2, -1] = pd.NaT From 28fd04528bb877f5ba5a94726609c828d7f77c31 Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 24 Oct 2020 19:34:44 -0700 Subject: [PATCH 3/3] CLN: remove unnecessary branches --- pandas/core/nanops.py | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index b101da196fdd8..c7b6e132f9a74 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -381,18 +381,16 @@ def _na_for_min_count( if is_numeric_dtype(values): values = values.astype("float64") fill_value = na_value_for_dtype(values.dtype) + if fill_value is NaT: + fill_value = values.dtype.type("NaT", "ns") if values.ndim == 1: return fill_value else: assert axis is not None # assertion to make mypy happy result_shape = values.shape[:axis] + values.shape[axis + 1 :] - # calling np.full with dtype parameter throws an ValueError when called - # with dtype=np.datetime64 and and fill_value=pd.NaT - try: - result = np.full(result_shape, fill_value, dtype=values.dtype) - except ValueError: - result = np.full(result_shape, fill_value) + + result = np.full(result_shape, fill_value, dtype=values.dtype) return result @@ -526,11 +524,9 @@ def nansum( def _mask_datetimelike_result( result: Union[np.ndarray, np.datetime64, np.timedelta64], axis: Optional[int], - mask: Optional[np.ndarray], + mask: np.ndarray, orig_values: np.ndarray, ): - if mask is None: - mask = isna(orig_values) if isinstance(result, np.ndarray): # we need to apply the mask result = result.astype("i8").view(orig_values.dtype)