Skip to content

Commit dccbd00

Browse files
make _GroupBy generic class
1 parent fbb3f06 commit dccbd00

File tree

2 files changed

+21
-63
lines changed

2 files changed

+21
-63
lines changed

pandas/core/groupby/generic.py

+3-51
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131

3232
from pandas._libs import Timestamp, lib
3333
from pandas._typing import FrameOrSeries
34-
from pandas.util._decorators import Appender, Substitution, doc
34+
from pandas.util._decorators import Appender, Substitution
3535

3636
from pandas.core.dtypes.cast import (
3737
maybe_convert_objects,
@@ -149,7 +149,7 @@ def pinner(cls):
149149

150150

151151
@pin_whitelisted_properties(Series, base.series_apply_whitelist)
152-
class SeriesGroupBy(GroupBy):
152+
class SeriesGroupBy(GroupBy[Series]):
153153
_apply_whitelist = base.series_apply_whitelist
154154

155155
def _iterate_slices(self) -> Iterable[Series]:
@@ -789,30 +789,6 @@ def count(self) -> Series:
789789
)
790790
return self._reindex_output(result, fill_value=0)
791791

792-
@doc(GroupBy.sum.__doc__)
793-
def sum(self, numeric_only: bool = True, min_count: int = 0) -> Series:
794-
return super().sum(numeric_only=numeric_only, min_count=min_count)
795-
796-
@doc(GroupBy.prod.__doc__)
797-
def prod(self, numeric_only: bool = True, min_count: int = 0) -> Series:
798-
return super().prod(numeric_only=numeric_only, min_count=min_count)
799-
800-
@doc(GroupBy.min.__doc__)
801-
def min(self, numeric_only: bool = False, min_count: int = -1) -> Series:
802-
return super().min(numeric_only=numeric_only, min_count=min_count)
803-
804-
@doc(GroupBy.max.__doc__)
805-
def max(self, numeric_only: bool = False, min_count: int = -1) -> Series:
806-
return super().max(numeric_only=numeric_only, min_count=min_count)
807-
808-
@doc(GroupBy.first.__doc__)
809-
def first(self, numeric_only: bool = False, min_count: int = -1) -> Series:
810-
return super().first(numeric_only=numeric_only, min_count=min_count)
811-
812-
@doc(GroupBy.last.__doc__)
813-
def last(self, numeric_only: bool = False, min_count: int = -1) -> Series:
814-
return super().last(numeric_only=numeric_only, min_count=min_count)
815-
816792
def _apply_to_column_groupbys(self, func):
817793
""" return a pass thru """
818794
return func(self)
@@ -837,7 +813,7 @@ def pct_change(self, periods=1, fill_method="pad", limit=None, freq=None):
837813

838814

839815
@pin_whitelisted_properties(DataFrame, base.dataframe_apply_whitelist)
840-
class DataFrameGroupBy(GroupBy):
816+
class DataFrameGroupBy(GroupBy[DataFrame]):
841817

842818
_apply_whitelist = base.dataframe_apply_whitelist
843819

@@ -1887,30 +1863,6 @@ def groupby_series(obj, col=None):
18871863
results.index = ibase.default_index(len(results))
18881864
return results
18891865

1890-
@doc(GroupBy.sum.__doc__)
1891-
def sum(self, numeric_only: bool = True, min_count: int = 0) -> DataFrame:
1892-
return super().sum(numeric_only=numeric_only, min_count=min_count)
1893-
1894-
@doc(GroupBy.prod.__doc__)
1895-
def prod(self, numeric_only: bool = True, min_count: int = 0) -> DataFrame:
1896-
return super().prod(numeric_only=numeric_only, min_count=min_count)
1897-
1898-
@doc(GroupBy.min.__doc__)
1899-
def min(self, numeric_only: bool = False, min_count: int = -1) -> DataFrame:
1900-
return super().min(numeric_only=numeric_only, min_count=min_count)
1901-
1902-
@doc(GroupBy.max.__doc__)
1903-
def max(self, numeric_only: bool = False, min_count: int = -1) -> DataFrame:
1904-
return super().max(numeric_only=numeric_only, min_count=min_count)
1905-
1906-
@doc(GroupBy.first.__doc__)
1907-
def first(self, numeric_only: bool = False, min_count: int = -1) -> DataFrame:
1908-
return super().first(numeric_only=numeric_only, min_count=min_count)
1909-
1910-
@doc(GroupBy.last.__doc__)
1911-
def last(self, numeric_only: bool = False, min_count: int = -1) -> DataFrame:
1912-
return super().last(numeric_only=numeric_only, min_count=min_count)
1913-
19141866
boxplot = boxplot_frame_groupby
19151867

19161868

pandas/core/groupby/groupby.py

+18-12
Original file line numberDiff line numberDiff line change
@@ -17,13 +17,15 @@ class providing the base-class of operations.
1717
Callable,
1818
Dict,
1919
FrozenSet,
20+
Generic,
2021
Hashable,
2122
Iterable,
2223
List,
2324
Mapping,
2425
Optional,
2526
Tuple,
2627
Type,
28+
TypeVar,
2729
Union,
2830
)
2931

@@ -376,13 +378,14 @@ def _group_selection_context(groupby):
376378
]
377379

378380

379-
class _GroupBy(PandasObject, SelectionMixin):
381+
class _GroupBy(PandasObject, SelectionMixin, Generic[FrameOrSeries]):
380382
_group_selection = None
381383
_apply_whitelist: FrozenSet[str] = frozenset()
384+
obj: FrameOrSeries
382385

383386
def __init__(
384387
self,
385-
obj: NDFrame,
388+
obj: FrameOrSeries,
386389
keys: Optional[_KeysArgType] = None,
387390
axis: int = 0,
388391
level=None,
@@ -1079,7 +1082,11 @@ def _apply_filter(self, indices, dropna):
10791082
return filtered
10801083

10811084

1082-
class GroupBy(_GroupBy):
1085+
# We require another typevar to track operations that expand dimensions, like ohlc
1086+
FrameOrSeries2 = TypeVar("FrameOrSeries2", bound=NDFrame)
1087+
1088+
1089+
class GroupBy(_GroupBy[FrameOrSeries]):
10831090
"""
10841091
Class for grouping and aggregating relational data.
10851092
@@ -1390,25 +1397,25 @@ def size(self):
13901397
return self._reindex_output(result, fill_value=0)
13911398

13921399
@doc(_agg_template, fname="sum", no=True, mc=0)
1393-
def sum(self, numeric_only: bool = True, min_count: int = 0):
1400+
def sum(self, numeric_only: bool = True, min_count: int = 0) -> FrameOrSeries:
13941401
return self._agg_general(
13951402
numeric_only=numeric_only, min_count=min_count, alias="add", npfunc=np.sum
13961403
)
13971404

13981405
@doc(_agg_template, fname="prod", no=True, mc=0)
1399-
def prod(self, numeric_only: bool = True, min_count: int = 0):
1406+
def prod(self, numeric_only: bool = True, min_count: int = 0) -> FrameOrSeries:
14001407
return self._agg_general(
14011408
numeric_only=numeric_only, min_count=min_count, alias="prod", npfunc=np.prod
14021409
)
14031410

14041411
@doc(_agg_template, fname="min", no=False, mc=-1)
1405-
def min(self, numeric_only: bool = False, min_count: int = -1):
1412+
def min(self, numeric_only: bool = False, min_count: int = -1) -> FrameOrSeries:
14061413
return self._agg_general(
14071414
numeric_only=numeric_only, min_count=min_count, alias="min", npfunc=np.min
14081415
)
14091416

14101417
@doc(_agg_template, fname="max", no=False, mc=-1)
1411-
def max(self, numeric_only: bool = False, min_count: int = -1):
1418+
def max(self, numeric_only: bool = False, min_count: int = -1) -> FrameOrSeries:
14121419
return self._agg_general(
14131420
numeric_only=numeric_only, min_count=min_count, alias="max", npfunc=np.max
14141421
)
@@ -1431,7 +1438,7 @@ def get_loc_notna(x, loc: int):
14311438
return get_loc_notna(x, loc=loc)
14321439

14331440
@doc(_agg_template, fname="first", no=False, mc=-1)
1434-
def first(self, numeric_only: bool = False, min_count: int = -1):
1441+
def first(self, numeric_only: bool = False, min_count: int = -1) -> FrameOrSeries:
14351442
first_compat = partial(self._get_loc, loc=0)
14361443

14371444
return self._agg_general(
@@ -1441,8 +1448,7 @@ def first(self, numeric_only: bool = False, min_count: int = -1):
14411448
npfunc=first_compat,
14421449
)
14431450

1444-
@doc(_agg_template, fname="last", no=False, mc=-1)
1445-
def last(self, numeric_only: bool = False, min_count: int = -1):
1451+
def last(self, numeric_only: bool = False, min_count: int = -1) -> FrameOrSeries:
14461452
last_compat = partial(self._get_loc, loc=-1)
14471453

14481454
return self._agg_general(
@@ -2467,8 +2473,8 @@ def tail(self, n=5):
24672473
return self._selected_obj[mask]
24682474

24692475
def _reindex_output(
2470-
self, output: FrameOrSeries, fill_value: Scalar = np.NaN
2471-
) -> FrameOrSeries:
2476+
self, output: FrameOrSeries2, fill_value: Scalar = np.NaN
2477+
) -> FrameOrSeries2:
24722478
"""
24732479
If we have categorical groupers, then we might want to make sure that
24742480
we have a fully re-indexed output to the levels. This means expanding

0 commit comments

Comments
 (0)