From 9a6228a93dc165098cb39f6cf5764e1bb1293da1 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Fri, 14 Jul 2023 17:32:07 -0700 Subject: [PATCH 1/3] BUG: groupby shift fill_value, freq followup --- doc/source/whatsnew/v2.1.0.rst | 2 +- pandas/core/groupby/groupby.py | 2 +- .../tests/groupby/test_groupby_shift_diff.py | 18 ++++++++++++++++++ 3 files changed, 20 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index b7cc254d5c7e5..cfc752a54c740 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -612,7 +612,7 @@ Other - Bug in :func:`assert_almost_equal` now throwing assertion error for two unequal sets (:issue:`51727`) - Bug in :func:`assert_frame_equal` checks category dtypes even when asked not to check index type (:issue:`52126`) - Bug in :meth:`DataFrame.reindex` with a ``fill_value`` that should be inferred with a :class:`ExtensionDtype` incorrectly inferring ``object`` dtype (:issue:`52586`) -- Bug in :meth:`DataFrame.shift` and :meth:`Series.shift` when passing both "freq" and "fill_value" silently ignoring "fill_value" instead of raising ``ValueError`` (:issue:`53832`) +- Bug in :meth:`DataFrame.shift` and :meth:`Series.shift` and :meth:`DataFrameGroupBy.shift` when passing both "freq" and "fill_value" silently ignoring "fill_value" instead of raising ``ValueError`` (:issue:`53832`) - Bug in :meth:`DataFrame.shift` with ``axis=1`` on a :class:`DataFrame` with a single :class:`ExtensionDtype` column giving incorrect results (:issue:`53832`) - Bug in :meth:`Series.align`, :meth:`DataFrame.align`, :meth:`Series.reindex`, :meth:`DataFrame.reindex`, :meth:`Series.interpolate`, :meth:`DataFrame.interpolate`, incorrectly failing to raise with method="asfreq" (:issue:`53620`) - Bug in :meth:`Series.map` when giving a callable to an empty series, the returned series had ``object`` dtype. It now keeps the original dtype (:issue:`52384`) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 85ec8c1b86374..8a02580e01114 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -4865,7 +4865,7 @@ def shift( periods: int = 1, freq=None, axis: Axis | lib.NoDefault = lib.no_default, - fill_value=None, + fill_value=lib.no_default, ): """ Shift each group by periods observations. diff --git a/pandas/tests/groupby/test_groupby_shift_diff.py b/pandas/tests/groupby/test_groupby_shift_diff.py index 656471b2f6eb0..a449114c4ac2d 100644 --- a/pandas/tests/groupby/test_groupby_shift_diff.py +++ b/pandas/tests/groupby/test_groupby_shift_diff.py @@ -7,6 +7,7 @@ Series, Timedelta, Timestamp, + date_range, ) import pandas._testing as tm @@ -154,3 +155,20 @@ def test_multindex_empty_shift_with_fill(): shifted_with_fill = df.groupby(["a", "b"]).shift(1, fill_value=0) tm.assert_frame_equal(shifted, shifted_with_fill) tm.assert_index_equal(shifted.index, shifted_with_fill.index) + + +def test_shift_periods_freq(): + data = {"a": [1, 2, 3, 4, 5, 6], "b": [0, 0, 0, 1, 1, 1]} + df = DataFrame(data, index=date_range(start="20100101", periods=6)) + result = df.groupby(df.index).shift(periods=-2, freq="D") + expected = DataFrame(data, index=date_range(start="2009-12-30", periods=6)) + tm.assert_frame_equal(result, expected) + + +def test_shift_disallow_freq_and_fill_value(): + # GH 53832 + data = {"a": [1, 2, 3, 4, 5, 6], "b": [0, 0, 0, 1, 1, 1]} + df = DataFrame(data, index=date_range(start="20100101", periods=6)) + msg = "Cannot pass both 'freq' and 'fill_value' to (Series|DataFrame).shift" + with pytest.raises(ValueError, match=msg): + df.groupby(df.index).shift(periods=-2, freq="D", fill_value="1") From dd64d9deb7d118b93e4d2315ad919366eb41d8e0 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Mon, 17 Jul 2023 11:32:53 -0700 Subject: [PATCH 2/3] gh ref, docstring note --- pandas/core/groupby/groupby.py | 3 +++ pandas/tests/groupby/test_groupby_shift_diff.py | 1 + 2 files changed, 4 insertions(+) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 2c91487e24c47..3863e90381b7a 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -4934,6 +4934,9 @@ def shift( fill_value : optional The scalar value to use for newly introduced missing values. + .. versionchanged:: 2.1.0 + Will raise a ``ValueError`` if ``freq`` is provided too. + Returns ------- Series or DataFrame diff --git a/pandas/tests/groupby/test_groupby_shift_diff.py b/pandas/tests/groupby/test_groupby_shift_diff.py index a449114c4ac2d..cec8ea9d351cf 100644 --- a/pandas/tests/groupby/test_groupby_shift_diff.py +++ b/pandas/tests/groupby/test_groupby_shift_diff.py @@ -158,6 +158,7 @@ def test_multindex_empty_shift_with_fill(): def test_shift_periods_freq(): + # GH 54093 data = {"a": [1, 2, 3, 4, 5, 6], "b": [0, 0, 0, 1, 1, 1]} df = DataFrame(data, index=date_range(start="20100101", periods=6)) result = df.groupby(df.index).shift(periods=-2, freq="D") From 177dc919ca4badd8e3d2df5f42b938b2902d03d6 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Mon, 17 Jul 2023 15:57:29 -0700 Subject: [PATCH 3/3] Change no_default to None for _reindex_with_indexers --- pandas/core/groupby/groupby.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 3863e90381b7a..9d87a28093371 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -4994,6 +4994,8 @@ def shift( f = lambda x: x.shift(periods, freq, axis, fill_value) return self._python_apply_general(f, self._selected_obj, is_transform=True) + if fill_value is lib.no_default: + fill_value = None ids, _, ngroups = self.grouper.group_info res_indexer = np.zeros(len(ids), dtype=np.int64)