Skip to content

Commit 3c8030f

Browse files
topper-123jreback
authored andcommitted
DOC/CLN: move NDFrame.groupby to (DataFrame|Series).groupby (pandas-dev#30314)
1 parent ff26171 commit 3c8030f

File tree

4 files changed

+172
-82
lines changed

4 files changed

+172
-82
lines changed

pandas/core/frame.py

+77
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,7 @@
100100
from pandas.core.arrays.datetimelike import DatetimeLikeArrayMixin as DatetimeLikeArray
101101
from pandas.core.arrays.sparse import SparseFrameAccessor
102102
from pandas.core.generic import NDFrame, _shared_docs
103+
from pandas.core.groupby import generic as groupby_generic
103104
from pandas.core.indexes import base as ibase
104105
from pandas.core.indexes.api import Index, ensure_index, ensure_index_from_sequences
105106
from pandas.core.indexes.datetimes import DatetimeIndex
@@ -5601,6 +5602,82 @@ def update(
56015602

56025603
# ----------------------------------------------------------------------
56035604
# Data reshaping
5605+
@Appender(
5606+
"""
5607+
Examples
5608+
--------
5609+
>>> df = pd.DataFrame({'Animal': ['Falcon', 'Falcon',
5610+
... 'Parrot', 'Parrot'],
5611+
... 'Max Speed': [380., 370., 24., 26.]})
5612+
>>> df
5613+
Animal Max Speed
5614+
0 Falcon 380.0
5615+
1 Falcon 370.0
5616+
2 Parrot 24.0
5617+
3 Parrot 26.0
5618+
>>> df.groupby(['Animal']).mean()
5619+
Max Speed
5620+
Animal
5621+
Falcon 375.0
5622+
Parrot 25.0
5623+
5624+
**Hierarchical Indexes**
5625+
5626+
We can groupby different levels of a hierarchical index
5627+
using the `level` parameter:
5628+
5629+
>>> arrays = [['Falcon', 'Falcon', 'Parrot', 'Parrot'],
5630+
... ['Captive', 'Wild', 'Captive', 'Wild']]
5631+
>>> index = pd.MultiIndex.from_arrays(arrays, names=('Animal', 'Type'))
5632+
>>> df = pd.DataFrame({'Max Speed': [390., 350., 30., 20.]},
5633+
... index=index)
5634+
>>> df
5635+
Max Speed
5636+
Animal Type
5637+
Falcon Captive 390.0
5638+
Wild 350.0
5639+
Parrot Captive 30.0
5640+
Wild 20.0
5641+
>>> df.groupby(level=0).mean()
5642+
Max Speed
5643+
Animal
5644+
Falcon 370.0
5645+
Parrot 25.0
5646+
>>> df.groupby(level="Type").mean()
5647+
Max Speed
5648+
Type
5649+
Captive 210.0
5650+
Wild 185.0
5651+
"""
5652+
)
5653+
@Appender(_shared_docs["groupby"] % _shared_doc_kwargs)
5654+
def groupby(
5655+
self,
5656+
by=None,
5657+
axis=0,
5658+
level=None,
5659+
as_index: bool = True,
5660+
sort: bool = True,
5661+
group_keys: bool = True,
5662+
squeeze: bool = False,
5663+
observed: bool = False,
5664+
) -> "groupby_generic.DataFrameGroupBy":
5665+
5666+
if level is None and by is None:
5667+
raise TypeError("You have to supply one of 'by' and 'level'")
5668+
axis = self._get_axis_number(axis)
5669+
5670+
return groupby_generic.DataFrameGroupBy(
5671+
obj=self,
5672+
keys=by,
5673+
axis=axis,
5674+
level=level,
5675+
as_index=as_index,
5676+
sort=sort,
5677+
group_keys=group_keys,
5678+
squeeze=squeeze,
5679+
observed=observed,
5680+
)
56045681

56055682
_shared_docs[
56065683
"pivot"

pandas/core/generic.py

+7-79
Original file line numberDiff line numberDiff line change
@@ -7301,19 +7301,10 @@ def clip(
73017301

73027302
return result
73037303

7304-
def groupby(
7305-
self,
7306-
by=None,
7307-
axis=0,
7308-
level=None,
7309-
as_index: bool_t = True,
7310-
sort: bool_t = True,
7311-
group_keys: bool_t = True,
7312-
squeeze: bool_t = False,
7313-
observed: bool_t = False,
7314-
):
7315-
"""
7316-
Group DataFrame or Series using a mapper or by a Series of columns.
7304+
_shared_docs[
7305+
"groupby"
7306+
] = """
7307+
Group %(klass)s using a mapper or by a Series of columns.
73177308
73187309
A groupby operation involves some combination of splitting the
73197310
object, applying a function, and combining the results. This can be
@@ -7358,9 +7349,8 @@ def groupby(
73587349
73597350
Returns
73607351
-------
7361-
DataFrameGroupBy or SeriesGroupBy
7362-
Depends on the calling object and returns groupby object that
7363-
contains information about the groups.
7352+
%(klass)sGroupBy
7353+
Returns a groupby object that contains information about the groups.
73647354
73657355
See Also
73667356
--------
@@ -7371,69 +7361,7 @@ def groupby(
73717361
-----
73727362
See the `user guide
73737363
<http://pandas.pydata.org/pandas-docs/stable/groupby.html>`_ for more.
7374-
7375-
Examples
7376-
--------
7377-
>>> df = pd.DataFrame({'Animal': ['Falcon', 'Falcon',
7378-
... 'Parrot', 'Parrot'],
7379-
... 'Max Speed': [380., 370., 24., 26.]})
7380-
>>> df
7381-
Animal Max Speed
7382-
0 Falcon 380.0
7383-
1 Falcon 370.0
7384-
2 Parrot 24.0
7385-
3 Parrot 26.0
7386-
>>> df.groupby(['Animal']).mean()
7387-
Max Speed
7388-
Animal
7389-
Falcon 375.0
7390-
Parrot 25.0
7391-
7392-
**Hierarchical Indexes**
7393-
7394-
We can groupby different levels of a hierarchical index
7395-
using the `level` parameter:
7396-
7397-
>>> arrays = [['Falcon', 'Falcon', 'Parrot', 'Parrot'],
7398-
... ['Captive', 'Wild', 'Captive', 'Wild']]
7399-
>>> index = pd.MultiIndex.from_arrays(arrays, names=('Animal', 'Type'))
7400-
>>> df = pd.DataFrame({'Max Speed': [390., 350., 30., 20.]},
7401-
... index=index)
7402-
>>> df
7403-
Max Speed
7404-
Animal Type
7405-
Falcon Captive 390.0
7406-
Wild 350.0
7407-
Parrot Captive 30.0
7408-
Wild 20.0
7409-
>>> df.groupby(level=0).mean()
7410-
Max Speed
7411-
Animal
7412-
Falcon 370.0
7413-
Parrot 25.0
7414-
>>> df.groupby(level=1).mean()
7415-
Max Speed
7416-
Type
7417-
Captive 210.0
7418-
Wild 185.0
7419-
"""
7420-
from pandas.core.groupby.groupby import get_groupby
7421-
7422-
if level is None and by is None:
7423-
raise TypeError("You have to supply one of 'by' and 'level'")
7424-
axis = self._get_axis_number(axis)
7425-
7426-
return get_groupby(
7427-
self,
7428-
by=by,
7429-
axis=axis,
7430-
level=level,
7431-
as_index=as_index,
7432-
sort=sort,
7433-
group_keys=group_keys,
7434-
squeeze=squeeze,
7435-
observed=observed,
7436-
)
7364+
"""
74377365

74387366
def asfreq(
74397367
self,

pandas/core/reshape/merge.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141
from pandas.core.dtypes.missing import isna, na_value_for_dtype
4242

4343
from pandas import Categorical, Index, MultiIndex
44+
from pandas.core import groupby
4445
import pandas.core.algorithms as algos
4546
from pandas.core.arrays.categorical import _recode_for_categories
4647
import pandas.core.common as com
@@ -113,6 +114,7 @@ def _groupby_and_merge(
113114
by = [by]
114115

115116
lby = left.groupby(by, sort=False)
117+
rby: Optional[groupby.DataFrameGroupBy] = None
116118

117119
# if we can groupby the rhs
118120
# then we can get vastly better perf
@@ -132,7 +134,7 @@ def _groupby_and_merge(
132134
try:
133135
rby = right.groupby(by, sort=False)
134136
except KeyError:
135-
rby = None
137+
pass
136138

137139
for key, lhs in lby:
138140

pandas/core/series.py

+85-2
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@
5959
is_empty_data,
6060
sanitize_array,
6161
)
62-
from pandas.core.generic import _shared_docs
62+
from pandas.core.groupby import generic as groupby_generic
6363
from pandas.core.indexers import maybe_convert_indices
6464
from pandas.core.indexes.accessors import CombinedDatetimelikeProperties
6565
from pandas.core.indexes.api import (
@@ -1431,7 +1431,7 @@ def to_string(
14311431
"""
14321432
)
14331433
@Substitution(klass="Series")
1434-
@Appender(_shared_docs["to_markdown"])
1434+
@Appender(generic._shared_docs["to_markdown"])
14351435
def to_markdown(
14361436
self, buf: Optional[IO[str]] = None, mode: Optional[str] = None, **kwargs,
14371437
) -> Optional[str]:
@@ -1568,6 +1568,89 @@ def _set_name(self, name, inplace=False):
15681568
ser.name = name
15691569
return ser
15701570

1571+
@Appender(
1572+
"""
1573+
Examples
1574+
--------
1575+
>>> ser = pd.Series([390., 350., 30., 20.],
1576+
... index=['Falcon', 'Falcon', 'Parrot', 'Parrot'], name="Max Speed")
1577+
>>> ser
1578+
Falcon 390.0
1579+
Falcon 350.0
1580+
Parrot 30.0
1581+
Parrot 20.0
1582+
Name: Max Speed, dtype: float64
1583+
>>> ser.groupby(["a", "b", "a", "b"]).mean()
1584+
a 210.0
1585+
b 185.0
1586+
Name: Max Speed, dtype: float64
1587+
>>> ser.groupby(level=0).mean()
1588+
Falcon 370.0
1589+
Parrot 25.0
1590+
Name: Max Speed, dtype: float64
1591+
>>> ser.groupby(ser > 100).mean()
1592+
Max Speed
1593+
False 25.0
1594+
True 370.0
1595+
Name: Max Speed, dtype: float64
1596+
1597+
**Grouping by Indexes**
1598+
1599+
We can groupby different levels of a hierarchical index
1600+
using the `level` parameter:
1601+
1602+
>>> arrays = [['Falcon', 'Falcon', 'Parrot', 'Parrot'],
1603+
... ['Captive', 'Wild', 'Captive', 'Wild']]
1604+
>>> index = pd.MultiIndex.from_arrays(arrays, names=('Animal', 'Type'))
1605+
>>> ser = pd.Series([390., 350., 30., 20.], index=index, name="Max Speed")
1606+
>>> ser
1607+
Animal Type
1608+
Falcon Captive 390.0
1609+
Wild 350.0
1610+
Parrot Captive 30.0
1611+
Wild 20.0
1612+
Name: Max Speed, dtype: float64
1613+
>>> ser.groupby(level=0).mean()
1614+
Animal
1615+
Falcon 370.0
1616+
Parrot 25.0
1617+
Name: Max Speed, dtype: float64
1618+
>>> ser.groupby(level="Type").mean()
1619+
Type
1620+
Captive 210.0
1621+
Wild 185.0
1622+
Name: Max Speed, dtype: float64
1623+
"""
1624+
)
1625+
@Appender(generic._shared_docs["groupby"] % _shared_doc_kwargs)
1626+
def groupby(
1627+
self,
1628+
by=None,
1629+
axis=0,
1630+
level=None,
1631+
as_index: bool = True,
1632+
sort: bool = True,
1633+
group_keys: bool = True,
1634+
squeeze: bool = False,
1635+
observed: bool = False,
1636+
) -> "groupby_generic.SeriesGroupBy":
1637+
1638+
if level is None and by is None:
1639+
raise TypeError("You have to supply one of 'by' and 'level'")
1640+
axis = self._get_axis_number(axis)
1641+
1642+
return groupby_generic.SeriesGroupBy(
1643+
obj=self,
1644+
keys=by,
1645+
axis=axis,
1646+
level=level,
1647+
as_index=as_index,
1648+
sort=sort,
1649+
group_keys=group_keys,
1650+
squeeze=squeeze,
1651+
observed=observed,
1652+
)
1653+
15711654
# ----------------------------------------------------------------------
15721655
# Statistics, overridden ndarray methods
15731656

0 commit comments

Comments
 (0)