Skip to content

Commit da2a451

Browse files
Merge remote-tracking branch 'upstream/master' into fix-39904
2 parents 101c10c + ab599f3 commit da2a451

File tree

15 files changed

+141
-58
lines changed

15 files changed

+141
-58
lines changed

doc/source/user_guide/window.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ be calculated with :meth:`~Rolling.apply` by specifying a separate column of wei
101101
102102
All windowing operations support a ``min_periods`` argument that dictates the minimum amount of
103103
non-``np.nan`` values a window must have; otherwise, the resulting value is ``np.nan``.
104-
``min_peridos`` defaults to 1 for time-based windows and ``window`` for fixed windows
104+
``min_periods`` defaults to 1 for time-based windows and ``window`` for fixed windows
105105

106106
.. ipython:: python
107107

doc/source/whatsnew/v1.3.0.rst

+32
Original file line numberDiff line numberDiff line change
@@ -302,6 +302,38 @@ cast to ``dtype=object`` (:issue:`38709`)
302302
ser2
303303
304304
305+
.. _whatsnew_130.notable_bug_fixes.rolling_groupby_column:
306+
307+
GroupBy.rolling no longer returns grouped-by column in values
308+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
309+
310+
The group-by column will now be dropped from the result of a
311+
``groupby.rolling`` operation (:issue:`32262`)
312+
313+
.. ipython:: python
314+
315+
df = pd.DataFrame({"A": [1, 1, 2, 3], "B": [0, 1, 2, 3]})
316+
df
317+
318+
*Previous behavior*:
319+
320+
.. code-block:: ipython
321+
322+
In [1]: df.groupby("A").rolling(2).sum()
323+
Out[1]:
324+
A B
325+
A
326+
1 0 NaN NaN
327+
1 2.0 1.0
328+
2 2 NaN NaN
329+
3 3 NaN NaN
330+
331+
*New behavior*:
332+
333+
.. ipython:: python
334+
335+
df.groupby("A").rolling(2).sum()
336+
305337
.. _whatsnew_130.notable_bug_fixes.rolling_var_precision:
306338

307339
Removed artificial truncation in rolling variance and standard deviation

pandas/_libs/groupby.pyx

+6-11
Original file line numberDiff line numberDiff line change
@@ -681,18 +681,17 @@ group_mean_float64 = _group_mean['double']
681681

682682
@cython.wraparound(False)
683683
@cython.boundscheck(False)
684-
def _group_ohlc(floating[:, ::1] out,
685-
int64_t[::1] counts,
686-
ndarray[floating, ndim=2] values,
687-
const intp_t[:] labels,
688-
Py_ssize_t min_count=-1):
684+
def group_ohlc(floating[:, ::1] out,
685+
int64_t[::1] counts,
686+
ndarray[floating, ndim=2] values,
687+
const intp_t[:] labels,
688+
Py_ssize_t min_count=-1):
689689
"""
690690
Only aggregates on axis=0
691691
"""
692692
cdef:
693693
Py_ssize_t i, j, N, K, lab
694-
floating val, count
695-
Py_ssize_t ngroups = len(counts)
694+
floating val
696695

697696
assert min_count == -1, "'min_count' only used in add and prod"
698697

@@ -727,10 +726,6 @@ def _group_ohlc(floating[:, ::1] out,
727726
out[lab, 3] = val
728727

729728

730-
group_ohlc_float32 = _group_ohlc['float']
731-
group_ohlc_float64 = _group_ohlc['double']
732-
733-
734729
@cython.boundscheck(False)
735730
@cython.wraparound(False)
736731
def group_quantile(ndarray[float64_t] out,

pandas/core/frame.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -542,7 +542,7 @@ class DataFrame(NDFrame, OpsMixin):
542542
>>> from dataclasses import make_dataclass
543543
>>> Point = make_dataclass("Point", [("x", int), ("y", int)])
544544
>>> pd.DataFrame([Point(0, 0), Point(0, 3), Point(2, 3)])
545-
x y
545+
x y
546546
0 0 0
547547
1 0 3
548548
2 2 3

pandas/core/groupby/generic.py

+8-9
Original file line numberDiff line numberDiff line change
@@ -355,15 +355,15 @@ def _aggregate_multiple_funcs(self, arg):
355355
# TODO: index should not be Optional - see GH 35490
356356
def _wrap_series_output(
357357
self,
358-
output: Mapping[base.OutputKey, Union[Series, np.ndarray]],
358+
output: Mapping[base.OutputKey, Union[Series, ArrayLike]],
359359
index: Optional[Index],
360360
) -> FrameOrSeriesUnion:
361361
"""
362362
Wraps the output of a SeriesGroupBy operation into the expected result.
363363
364364
Parameters
365365
----------
366-
output : Mapping[base.OutputKey, Union[Series, np.ndarray]]
366+
output : Mapping[base.OutputKey, Union[Series, np.ndarray, ExtensionArray]]
367367
Data to wrap.
368368
index : pd.Index or None
369369
Index to apply to the output.
@@ -420,14 +420,14 @@ def _wrap_aggregated_output(
420420
return self._reindex_output(result)
421421

422422
def _wrap_transformed_output(
423-
self, output: Mapping[base.OutputKey, Union[Series, np.ndarray]]
423+
self, output: Mapping[base.OutputKey, Union[Series, ArrayLike]]
424424
) -> Series:
425425
"""
426426
Wraps the output of a SeriesGroupBy aggregation into the expected result.
427427
428428
Parameters
429429
----------
430-
output : dict[base.OutputKey, Union[Series, np.ndarray]]
430+
output : dict[base.OutputKey, Union[Series, np.ndarray, ExtensionArray]]
431431
Dict with a sole key of 0 and a value of the result values.
432432
433433
Returns
@@ -1119,6 +1119,7 @@ def cast_agg_result(result, values: ArrayLike, how: str) -> ArrayLike:
11191119

11201120
if isinstance(values, Categorical) and isinstance(result, np.ndarray):
11211121
# If the Categorical op didn't raise, it is dtype-preserving
1122+
# We get here with how="first", "last", "min", "max"
11221123
result = type(values)._from_sequence(result.ravel(), dtype=values.dtype)
11231124
# Note this will have result.dtype == dtype from above
11241125

@@ -1195,9 +1196,7 @@ def array_func(values: ArrayLike) -> ArrayLike:
11951196
assert how == "ohlc"
11961197
raise
11971198

1198-
# error: Incompatible types in assignment (expression has type
1199-
# "ExtensionArray", variable has type "ndarray")
1200-
result = py_fallback(values) # type: ignore[assignment]
1199+
result = py_fallback(values)
12011200

12021201
return cast_agg_result(result, values, how)
12031202

@@ -1753,14 +1752,14 @@ def _wrap_aggregated_output(
17531752
return self._reindex_output(result)
17541753

17551754
def _wrap_transformed_output(
1756-
self, output: Mapping[base.OutputKey, Union[Series, np.ndarray]]
1755+
self, output: Mapping[base.OutputKey, Union[Series, ArrayLike]]
17571756
) -> DataFrame:
17581757
"""
17591758
Wraps the output of DataFrameGroupBy transformations into the expected result.
17601759
17611760
Parameters
17621761
----------
1763-
output : Mapping[base.OutputKey, Union[Series, np.ndarray]]
1762+
output : Mapping[base.OutputKey, Union[Series, np.ndarray, ExtensionArray]]
17641763
Data to wrap.
17651764
17661765
Returns

pandas/core/groupby/groupby.py

+3-4
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,6 @@ class providing the base-class of operations.
8484
import pandas.core.algorithms as algorithms
8585
from pandas.core.arrays import (
8686
Categorical,
87-
DatetimeArray,
8887
ExtensionArray,
8988
)
9089
from pandas.core.base import (
@@ -1026,7 +1025,7 @@ def _cumcount_array(self, ascending: bool = True):
10261025
def _cython_transform(
10271026
self, how: str, numeric_only: bool = True, axis: int = 0, **kwargs
10281027
):
1029-
output: Dict[base.OutputKey, np.ndarray] = {}
1028+
output: Dict[base.OutputKey, ArrayLike] = {}
10301029

10311030
for idx, obj in enumerate(self._iterate_slices()):
10321031
name = obj.name
@@ -1054,7 +1053,7 @@ def _wrap_aggregated_output(
10541053
):
10551054
raise AbstractMethodError(self)
10561055

1057-
def _wrap_transformed_output(self, output: Mapping[base.OutputKey, np.ndarray]):
1056+
def _wrap_transformed_output(self, output: Mapping[base.OutputKey, ArrayLike]):
10581057
raise AbstractMethodError(self)
10591058

10601059
def _wrap_applied_output(self, data, keys, values, not_indexed_same: bool = False):
@@ -1099,7 +1098,7 @@ def _agg_general(
10991098
def _cython_agg_general(
11001099
self, how: str, alt=None, numeric_only: bool = True, min_count: int = -1
11011100
):
1102-
output: Dict[base.OutputKey, Union[np.ndarray, DatetimeArray]] = {}
1101+
output: Dict[base.OutputKey, ArrayLike] = {}
11031102
# Ideally we would be able to enumerate self._iterate_slices and use
11041103
# the index from enumeration as the key of output, but ohlc in particular
11051104
# returns a (n x 4) array. Output requires 1D ndarrays as values, so we

pandas/core/groupby/ops.py

+16-13
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
import pandas._libs.groupby as libgroupby
3232
import pandas._libs.reduction as libreduction
3333
from pandas._typing import (
34+
ArrayLike,
3435
DtypeObj,
3536
F,
3637
FrameOrSeries,
@@ -485,6 +486,12 @@ def _get_cython_func_and_vals(
485486
func = _get_cython_function(kind, how, values.dtype, is_numeric)
486487
else:
487488
raise
489+
else:
490+
if values.dtype.kind in ["i", "u"]:
491+
if how in ["ohlc"]:
492+
# The output may still include nans, so we have to cast
493+
values = ensure_float64(values)
494+
488495
return func, values
489496

490497
@final
@@ -524,7 +531,7 @@ def _disallow_invalid_ops(
524531
@final
525532
def _ea_wrap_cython_operation(
526533
self, kind: str, values, how: str, axis: int, min_count: int = -1, **kwargs
527-
) -> np.ndarray:
534+
) -> ArrayLike:
528535
"""
529536
If we have an ExtensionArray, unwrap, call _cython_operation, and
530537
re-wrap if appropriate.
@@ -576,7 +583,7 @@ def _ea_wrap_cython_operation(
576583
@final
577584
def _cython_operation(
578585
self, kind: str, values, how: str, axis: int, min_count: int = -1, **kwargs
579-
) -> np.ndarray:
586+
) -> ArrayLike:
580587
"""
581588
Returns the values of a cython operation.
582589
"""
@@ -683,11 +690,11 @@ def _cython_operation(
683690
# e.g. if we are int64 and need to restore to datetime64/timedelta64
684691
# "rank" is the only member of cython_cast_blocklist we get here
685692
dtype = maybe_cast_result_dtype(orig_values.dtype, how)
686-
# error: Incompatible types in assignment (expression has type
687-
# "Union[ExtensionArray, ndarray]", variable has type "ndarray")
688-
result = maybe_downcast_to_dtype(result, dtype) # type: ignore[assignment]
693+
op_result = maybe_downcast_to_dtype(result, dtype)
694+
else:
695+
op_result = result
689696

690-
return result
697+
return op_result
691698

692699
@final
693700
def _aggregate(
@@ -784,14 +791,10 @@ def _aggregate_series_pure_python(self, obj: Series, func: F):
784791
counts[label] = group.shape[0]
785792
result[label] = res
786793

787-
result = lib.maybe_convert_objects(result, try_float=False)
788-
# error: Incompatible types in assignment (expression has type
789-
# "Union[ExtensionArray, ndarray]", variable has type "ndarray")
790-
result = maybe_cast_result( # type: ignore[assignment]
791-
result, obj, numeric_only=True
792-
)
794+
out = lib.maybe_convert_objects(result, try_float=False)
795+
out = maybe_cast_result(out, obj, numeric_only=True)
793796

794-
return result, counts
797+
return out, counts
795798

796799

797800
class BinGrouper(BaseGrouper):

pandas/core/missing.py

+1-4
Original file line numberDiff line numberDiff line change
@@ -861,7 +861,4 @@ def _rolling_window(a: np.ndarray, window: int):
861861
# https://stackoverflow.com/a/6811241
862862
shape = a.shape[:-1] + (a.shape[-1] - window + 1, window)
863863
strides = a.strides + (a.strides[-1],)
864-
# error: Module has no attribute "stride_tricks"
865-
return np.lib.stride_tricks.as_strided( # type: ignore[attr-defined]
866-
a, shape=shape, strides=strides
867-
)
864+
return np.lib.stride_tricks.as_strided(a, shape=shape, strides=strides)

pandas/core/strings/accessor.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -1925,13 +1925,13 @@ def get_dummies(self, sep="|"):
19251925
Examples
19261926
--------
19271927
>>> pd.Series(['a|b', 'a', 'a|c']).str.get_dummies()
1928-
a b c
1928+
a b c
19291929
0 1 1 0
19301930
1 1 0 0
19311931
2 1 0 1
19321932
19331933
>>> pd.Series(['a|b', np.nan, 'a|c']).str.get_dummies()
1934-
a b c
1934+
a b c
19351935
0 1 1 0
19361936
1 0 0 0
19371937
2 1 0 1

pandas/core/window/rolling.py

+4
Original file line numberDiff line numberDiff line change
@@ -558,6 +558,10 @@ def __init__(
558558
if _grouper is None:
559559
raise ValueError("Must pass a Grouper object.")
560560
self._grouper = _grouper
561+
# GH 32262: It's convention to keep the grouping column in
562+
# groupby.<agg_func>, but unexpected to users in
563+
# groupby.rolling.<agg_func>
564+
obj = obj.drop(columns=self._grouper.names, errors="ignore")
561565
super().__init__(obj, *args, **kwargs)
562566

563567
def _apply(

pandas/plotting/_matplotlib/core.py

+24-11
Original file line numberDiff line numberDiff line change
@@ -579,13 +579,24 @@ def legend_title(self) -> Optional[str]:
579579
stringified = map(pprint_thing, self.data.columns.names)
580580
return ",".join(stringified)
581581

582-
def _add_legend_handle(self, handle, label, index=None):
583-
if label is not None:
584-
if self.mark_right and index is not None:
585-
if self.on_right(index):
586-
label = label + " (right)"
587-
self.legend_handles.append(handle)
588-
self.legend_labels.append(label)
582+
def _mark_right_label(self, label: str, index: int) -> str:
583+
"""
584+
Append ``(right)`` to the label of a line if it's plotted on the right axis.
585+
586+
Note that ``(right)`` is only appended when ``subplots=False``.
587+
"""
588+
if not self.subplots and self.mark_right and self.on_right(index):
589+
label += " (right)"
590+
return label
591+
592+
def _append_legend_handles_labels(self, handle: Artist, label: str) -> None:
593+
"""
594+
Append current handle and label to ``legend_handles`` and ``legend_labels``.
595+
596+
These will be used to make the legend.
597+
"""
598+
self.legend_handles.append(handle)
599+
self.legend_labels.append(label)
589600

590601
def _make_legend(self):
591602
ax, leg, handle = self._get_ax_legend_handle(self.axes[0])
@@ -1078,7 +1089,7 @@ def _make_plot(self):
10781089
cbar.ax.set_yticklabels(self.data[c].cat.categories)
10791090

10801091
if label is not None:
1081-
self._add_legend_handle(scatter, label)
1092+
self._append_legend_handles_labels(scatter, label)
10821093
else:
10831094
self.legend = False
10841095

@@ -1170,6 +1181,7 @@ def _make_plot(self):
11701181
kwds = dict(kwds, **errors)
11711182

11721183
label = pprint_thing(label) # .encode('utf-8')
1184+
label = self._mark_right_label(label, index=i)
11731185
kwds["label"] = label
11741186

11751187
newlines = plotf(
@@ -1182,7 +1194,7 @@ def _make_plot(self):
11821194
is_errorbar=is_errorbar,
11831195
**kwds,
11841196
)
1185-
self._add_legend_handle(newlines[0], label, index=i)
1197+
self._append_legend_handles_labels(newlines[0], label)
11861198

11871199
if self._is_ts_plot():
11881200

@@ -1458,6 +1470,7 @@ def _make_plot(self):
14581470
kwds = dict(kwds, **errors)
14591471

14601472
label = pprint_thing(label)
1473+
label = self._mark_right_label(label, index=i)
14611474

14621475
if (("yerr" in kwds) or ("xerr" in kwds)) and (kwds.get("ecolor") is None):
14631476
kwds["ecolor"] = mpl.rcParams["xtick.color"]
@@ -1508,7 +1521,7 @@ def _make_plot(self):
15081521
log=self.log,
15091522
**kwds,
15101523
)
1511-
self._add_legend_handle(rect, label, index=i)
1524+
self._append_legend_handles_labels(rect, label)
15121525

15131526
def _post_plot_logic(self, ax: Axes, data):
15141527
if self.use_index:
@@ -1620,4 +1633,4 @@ def blank_labeler(label, value):
16201633
# leglabels is used for legend labels
16211634
leglabels = labels if labels is not None else idx
16221635
for p, l in zip(patches, leglabels):
1623-
self._add_legend_handle(p, l)
1636+
self._append_legend_handles_labels(p, l)

0 commit comments

Comments
 (0)