From 1bc513a6cddf0b67c244ff16cea9ac29c945a508 Mon Sep 17 00:00:00 2001 From: phofl Date: Fri, 18 Sep 2020 21:36:45 +0200 Subject: [PATCH 1/7] Fix bug with mixed dtypes and axis equals 1 --- doc/source/whatsnew/v1.2.0.rst | 2 +- pandas/core/window/rolling.py | 4 ++++ pandas/tests/window/test_rolling.py | 15 +++++++++++++++ 3 files changed, 20 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 3992e697db7e4..36f1e32e0a251 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -336,7 +336,7 @@ Groupby/resample/rolling - Bug in :meth:`DataFrameGroupby.tshift` failing to raise ``ValueError`` when a frequency cannot be inferred for the index of a group (:issue:`35937`) - Bug in :meth:`DataFrame.groupby` does not always maintain column index name for ``any``, ``all``, ``bfill``, ``ffill``, ``shift`` (:issue:`29764`) - Bug in :meth:`DataFrameGroupBy.apply` raising error with ``np.nan`` group(s) when ``dropna=False`` (:issue:`35889`) -- +- Bug in :meth:`Rolling.sum()` returned wrong values when dtypes where mixed between float and integer and axis was equal to one (:issue:`20649`) Reshaping ^^^^^^^^^ diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 21a7164411fb7..13b448c3b07b3 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -25,6 +25,7 @@ from pandas._typing import ArrayLike, Axis, FrameOrSeries, FrameOrSeriesUnion from pandas.compat._optional import import_optional_dependency from pandas.compat.numpy import function as nv +from pandas.core.dtypes.cast import infer_dtype_from from pandas.util._decorators import Appender, Substitution, cache_readonly, doc from pandas.core.dtypes.common import ( @@ -490,6 +491,9 @@ def _apply_blockwise( return self._apply_series(homogeneous_func) obj = self._create_data(self._selected_obj) + if self.axis == 1: + obj = obj.astype(infer_dtype_from(obj.values)[0], copy=False) + obj._mgr = obj._mgr.consolidate() mgr = obj._mgr def hfunc(bvalues: ArrayLike) -> ArrayLike: diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index 88afcec0f7bf4..0063913a75b84 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -771,3 +771,18 @@ def test_rolling_numerical_too_large_numbers(): index=dates, ) tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + ("func", "value"), + [("sum", 2.0), ("max", 1.0), ("min", 1.0), ("mean", 1.0), ("median", 1.0)], +) +def test_rolling_mixed_dtypes_axis_1(func, value): + # GH: 20649 + df = pd.DataFrame(1, index=[1, 2], columns=["a", "b", "c"]) + df["c"] = 1.0 + result = getattr(df.rolling(window=2, min_periods=1, axis=1), func)() + expected = pd.DataFrame( + {"a": [1.0, 1.0], "b": [value, value], "c": [value, value]}, index=[1, 2] + ) + tm.assert_frame_equal(result, expected) From a9bb0ba0fc2edd254f1fdc2fdb7fe5babc6397e4 Mon Sep 17 00:00:00 2001 From: phofl Date: Fri, 18 Sep 2020 22:15:33 +0200 Subject: [PATCH 2/7] Add test and move conversion --- pandas/core/window/rolling.py | 7 +++---- pandas/tests/window/test_rolling.py | 12 ++++++++++++ 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 13b448c3b07b3..e11e6bff7cb4f 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -244,7 +244,9 @@ def _create_data(self, obj: FrameOrSeries) -> FrameOrSeries: if self.on is not None and not isinstance(self.on, Index): if obj.ndim == 2: obj = obj.reindex(columns=obj.columns.difference([self.on]), copy=False) - + if self.axis == 1: + obj = obj.astype("float", copy=False) + obj._mgr = obj._mgr.consolidate() return obj def _gotitem(self, key, ndim, subset=None): @@ -491,9 +493,6 @@ def _apply_blockwise( return self._apply_series(homogeneous_func) obj = self._create_data(self._selected_obj) - if self.axis == 1: - obj = obj.astype(infer_dtype_from(obj.values)[0], copy=False) - obj._mgr = obj._mgr.consolidate() mgr = obj._mgr def hfunc(bvalues: ArrayLike) -> ArrayLike: diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index 0063913a75b84..91a31bf37388b 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -786,3 +786,15 @@ def test_rolling_mixed_dtypes_axis_1(func, value): {"a": [1.0, 1.0], "b": [value, value], "c": [value, value]}, index=[1, 2] ) tm.assert_frame_equal(result, expected) + + +def test_rolling_axis_one_with_nan(): + # GH: 35596 + df = pd.DataFrame([[0, 1, 2, 4, np.nan, np.nan, np.nan], + [0, 1, 2, np.nan, np.nan, np.nan, np.nan], + [0, 2, 2, np.nan, 2, np.nan, 1]]) + result = df.rolling(window=7, min_periods=1, axis='columns').sum() + expected = pd.DataFrame([[0.0, 1.0, 3.0, 7.0, 7.0, 7.0, 7.0], + [0.0, 1.0, 3.0, 3.0, 3.0, 3.0, 3.0], + [0.0, 2.0, 4.0, 4.0, 6.0, 6.0, 7.0]]) + tm.assert_frame_equal(result, expected) From 58b9fed9d8986ab785f3e42fca6884b158085622 Mon Sep 17 00:00:00 2001 From: phofl Date: Fri, 18 Sep 2020 22:16:25 +0200 Subject: [PATCH 3/7] Run black pandas --- pandas/tests/window/test_rolling.py | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index 91a31bf37388b..a186627f328ef 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -790,11 +790,19 @@ def test_rolling_mixed_dtypes_axis_1(func, value): def test_rolling_axis_one_with_nan(): # GH: 35596 - df = pd.DataFrame([[0, 1, 2, 4, np.nan, np.nan, np.nan], - [0, 1, 2, np.nan, np.nan, np.nan, np.nan], - [0, 2, 2, np.nan, 2, np.nan, 1]]) - result = df.rolling(window=7, min_periods=1, axis='columns').sum() - expected = pd.DataFrame([[0.0, 1.0, 3.0, 7.0, 7.0, 7.0, 7.0], - [0.0, 1.0, 3.0, 3.0, 3.0, 3.0, 3.0], - [0.0, 2.0, 4.0, 4.0, 6.0, 6.0, 7.0]]) + df = pd.DataFrame( + [ + [0, 1, 2, 4, np.nan, np.nan, np.nan], + [0, 1, 2, np.nan, np.nan, np.nan, np.nan], + [0, 2, 2, np.nan, 2, np.nan, 1], + ] + ) + result = df.rolling(window=7, min_periods=1, axis="columns").sum() + expected = pd.DataFrame( + [ + [0.0, 1.0, 3.0, 7.0, 7.0, 7.0, 7.0], + [0.0, 1.0, 3.0, 3.0, 3.0, 3.0, 3.0], + [0.0, 2.0, 4.0, 4.0, 6.0, 6.0, 7.0], + ] + ) tm.assert_frame_equal(result, expected) From e7d7c965d58297703280d90256066e026575435a Mon Sep 17 00:00:00 2001 From: phofl Date: Fri, 18 Sep 2020 22:44:37 +0200 Subject: [PATCH 4/7] Delete unused import --- pandas/core/window/rolling.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index e11e6bff7cb4f..cf1ac31f56535 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -25,7 +25,6 @@ from pandas._typing import ArrayLike, Axis, FrameOrSeries, FrameOrSeriesUnion from pandas.compat._optional import import_optional_dependency from pandas.compat.numpy import function as nv -from pandas.core.dtypes.cast import infer_dtype_from from pandas.util._decorators import Appender, Substitution, cache_readonly, doc from pandas.core.dtypes.common import ( From c4e83c6e49c0d839372abda874441bfd630897ff Mon Sep 17 00:00:00 2001 From: phofl Date: Fri, 18 Sep 2020 22:52:27 +0200 Subject: [PATCH 5/7] Coerce to float64 --- pandas/core/window/rolling.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index cf1ac31f56535..c09fb35190b38 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -244,7 +244,7 @@ def _create_data(self, obj: FrameOrSeries) -> FrameOrSeries: if obj.ndim == 2: obj = obj.reindex(columns=obj.columns.difference([self.on]), copy=False) if self.axis == 1: - obj = obj.astype("float", copy=False) + obj = obj.astype("float64", copy=False) obj._mgr = obj._mgr.consolidate() return obj From 1b263c67b00fd69e0cdd9b7fddc6bbca7ef6ca04 Mon Sep 17 00:00:00 2001 From: phofl Date: Fri, 18 Sep 2020 23:49:32 +0200 Subject: [PATCH 6/7] Add review comments --- doc/source/whatsnew/v1.2.0.rst | 2 +- pandas/core/window/rolling.py | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 36f1e32e0a251..0aa82fb936a95 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -336,7 +336,7 @@ Groupby/resample/rolling - Bug in :meth:`DataFrameGroupby.tshift` failing to raise ``ValueError`` when a frequency cannot be inferred for the index of a group (:issue:`35937`) - Bug in :meth:`DataFrame.groupby` does not always maintain column index name for ``any``, ``all``, ``bfill``, ``ffill``, ``shift`` (:issue:`29764`) - Bug in :meth:`DataFrameGroupBy.apply` raising error with ``np.nan`` group(s) when ``dropna=False`` (:issue:`35889`) -- Bug in :meth:`Rolling.sum()` returned wrong values when dtypes where mixed between float and integer and axis was equal to one (:issue:`20649`) +- Bug in :meth:`Rolling.sum()` returned wrong values when dtypes where mixed between float and integer and axis was equal to one (:issue:`20649`, :issue:`35596`) Reshaping ^^^^^^^^^ diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index c09fb35190b38..99348898e0e1e 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -244,6 +244,8 @@ def _create_data(self, obj: FrameOrSeries) -> FrameOrSeries: if obj.ndim == 2: obj = obj.reindex(columns=obj.columns.difference([self.on]), copy=False) if self.axis == 1: + # GH: 20649 in case of mixed dtype and axis=1 we have to convert everything + # to float to calculate the complete row at once obj = obj.astype("float64", copy=False) obj._mgr = obj._mgr.consolidate() return obj From fb85045056fc9cd773e714287a7a88ac55e19023 Mon Sep 17 00:00:00 2001 From: phofl Date: Sat, 19 Sep 2020 21:14:06 +0200 Subject: [PATCH 7/7] Exclude string dtypes and add tests --- pandas/core/window/rolling.py | 4 +++- pandas/tests/window/test_rolling.py | 13 +++++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 99348898e0e1e..06c3ad23f904f 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -245,7 +245,9 @@ def _create_data(self, obj: FrameOrSeries) -> FrameOrSeries: obj = obj.reindex(columns=obj.columns.difference([self.on]), copy=False) if self.axis == 1: # GH: 20649 in case of mixed dtype and axis=1 we have to convert everything - # to float to calculate the complete row at once + # to float to calculate the complete row at once. We exclude all non-numeric + # dtypes. + obj = obj.select_dtypes(include=["integer", "float"], exclude=["timedelta"]) obj = obj.astype("float64", copy=False) obj._mgr = obj._mgr.consolidate() return obj diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index a186627f328ef..4dfa0287bbb03 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -806,3 +806,16 @@ def test_rolling_axis_one_with_nan(): ] ) tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "value", + ["test", pd.to_datetime("2019-12-31"), pd.to_timedelta("1 days 06:05:01.00003")], +) +def test_rolling_axis_1_non_numeric_dtypes(value): + # GH: 20649 + df = pd.DataFrame({"a": [1, 2]}) + df["b"] = value + result = df.rolling(window=2, min_periods=1, axis=1).sum() + expected = pd.DataFrame({"a": [1.0, 2.0]}) + tm.assert_frame_equal(result, expected)