Skip to content

Commit b7ce336

Browse files
smithto1feefladder
authored andcommitted
BUG: groupby.shift returns different columns when fill_value is specified (pandas-dev#41858)
1 parent 06b8a79 commit b7ce336

File tree

4 files changed

+21
-3
lines changed

4 files changed

+21
-3
lines changed

asv_bench/benchmarks/groupby.py

+12
Original file line numberDiff line numberDiff line change
@@ -369,6 +369,18 @@ def time_category_size(self):
369369
self.draws.groupby(self.cats).size()
370370

371371

372+
class Shift:
373+
def setup(self):
374+
N = 18
375+
self.df = DataFrame({"g": ["a", "b"] * 9, "v": list(range(N))})
376+
377+
def time_defaults(self):
378+
self.df.groupby("g").shift()
379+
380+
def time_fill_value(self):
381+
self.df.groupby("g").shift(fill_value=99)
382+
383+
372384
class FillNA:
373385
def setup(self):
374386
N = 100

doc/source/whatsnew/v1.4.0.rst

+2
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,7 @@ Performance improvements
168168
- Performance improvement in :meth:`.GroupBy.sample`, especially when ``weights`` argument provided (:issue:`34483`)
169169
- Performance improvement in :meth:`.GroupBy.transform` for user-defined functions (:issue:`41598`)
170170
- Performance improvement in constructing :class:`DataFrame` objects (:issue:`42631`)
171+
- Performance improvement in :meth:`GroupBy.shift` when ``fill_value`` argument is provided (:issue:`26615`)
171172

172173
.. ---------------------------------------------------------------------------
173174
@@ -262,6 +263,7 @@ Groupby/resample/rolling
262263
- Fixed bug in :meth:`SeriesGroupBy.apply` where passing an unrecognized string argument failed to raise ``TypeError`` when the underlying ``Series`` is empty (:issue:`42021`)
263264
- Bug in :meth:`Series.rolling.apply`, :meth:`DataFrame.rolling.apply`, :meth:`Series.expanding.apply` and :meth:`DataFrame.expanding.apply` with ``engine="numba"`` where ``*args`` were being cached with the user passed function (:issue:`42287`)
264265
- Bug in :meth:`DataFrame.groupby.rolling.var` would calculate the rolling variance only on the first group (:issue:`42442`)
266+
- Bug in :meth:`GroupBy.shift` that would return the grouping columns if ``fill_value`` was not None (:issue:`41556`)
265267

266268
Reshaping
267269
^^^^^^^^^

pandas/core/groupby/groupby.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -2822,6 +2822,7 @@ def _get_cythonized_result(
28222822
result_is_index: bool = False,
28232823
pre_processing=None,
28242824
post_processing=None,
2825+
fill_value=None,
28252826
**kwargs,
28262827
):
28272828
"""
@@ -2872,6 +2873,8 @@ def _get_cythonized_result(
28722873
second argument, i.e. the signature should be
28732874
(ndarray, Type). If `needs_nullable=True`, a third argument should be
28742875
`nullable`, to allow for processing specific to nullable values.
2876+
fill_value : any, default None
2877+
The scalar value to use for newly introduced missing values.
28752878
**kwargs : dict
28762879
Extra arguments to be passed back to Cython funcs
28772880
@@ -2947,7 +2950,7 @@ def blk_func(values: ArrayLike) -> ArrayLike:
29472950
result = result.reshape(-1)
29482951

29492952
if result_is_index:
2950-
result = algorithms.take_nd(values, result)
2953+
result = algorithms.take_nd(values, result, fill_value=fill_value)
29512954

29522955
if post_processing:
29532956
pp_kwargs = {}
@@ -3023,7 +3026,7 @@ def shift(self, periods=1, freq=None, axis=0, fill_value=None):
30233026
tshift : Shift the time index, using the index’s frequency
30243027
if available.
30253028
"""
3026-
if freq is not None or axis != 0 or not isna(fill_value):
3029+
if freq is not None or axis != 0:
30273030
return self.apply(lambda x: x.shift(periods, freq, axis, fill_value))
30283031

30293032
return self._get_cythonized_result(
@@ -3033,6 +3036,7 @@ def shift(self, periods=1, freq=None, axis=0, fill_value=None):
30333036
needs_ngroups=True,
30343037
result_is_index=True,
30353038
periods=periods,
3039+
fill_value=fill_value,
30363040
)
30373041

30383042
@final

pandas/tests/groupby/test_groupby_shift_diff.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ def test_group_shift_with_fill_value():
5555
columns=["Z"],
5656
index=None,
5757
)
58-
result = g.shift(-1, fill_value=0)[["Z"]]
58+
result = g.shift(-1, fill_value=0)
5959

6060
tm.assert_frame_equal(result, expected)
6161

0 commit comments

Comments
 (0)