Skip to content

Commit 903ab87

Browse files
topper-123hweecat
authored andcommitted
DOC/CLN: move NDFrame.groupby to (DataFrame|Series).groupby (pandas-dev#30314)
1 parent 57b489a commit 903ab87

File tree

4 files changed

+172
-82
lines changed

4 files changed

+172
-82
lines changed

pandas/core/frame.py

+77
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,7 @@
100100
from pandas.core.arrays.datetimelike import DatetimeLikeArrayMixin as DatetimeLikeArray
101101
from pandas.core.arrays.sparse import SparseFrameAccessor
102102
from pandas.core.generic import NDFrame, _shared_docs
103+
from pandas.core.groupby import generic as groupby_generic
103104
from pandas.core.indexes import base as ibase
104105
from pandas.core.indexes.api import Index, ensure_index, ensure_index_from_sequences
105106
from pandas.core.indexes.datetimes import DatetimeIndex
@@ -5606,6 +5607,82 @@ def update(
56065607

56075608
# ----------------------------------------------------------------------
56085609
# Data reshaping
5610+
@Appender(
5611+
"""
5612+
Examples
5613+
--------
5614+
>>> df = pd.DataFrame({'Animal': ['Falcon', 'Falcon',
5615+
... 'Parrot', 'Parrot'],
5616+
... 'Max Speed': [380., 370., 24., 26.]})
5617+
>>> df
5618+
Animal Max Speed
5619+
0 Falcon 380.0
5620+
1 Falcon 370.0
5621+
2 Parrot 24.0
5622+
3 Parrot 26.0
5623+
>>> df.groupby(['Animal']).mean()
5624+
Max Speed
5625+
Animal
5626+
Falcon 375.0
5627+
Parrot 25.0
5628+
5629+
**Hierarchical Indexes**
5630+
5631+
We can groupby different levels of a hierarchical index
5632+
using the `level` parameter:
5633+
5634+
>>> arrays = [['Falcon', 'Falcon', 'Parrot', 'Parrot'],
5635+
... ['Captive', 'Wild', 'Captive', 'Wild']]
5636+
>>> index = pd.MultiIndex.from_arrays(arrays, names=('Animal', 'Type'))
5637+
>>> df = pd.DataFrame({'Max Speed': [390., 350., 30., 20.]},
5638+
... index=index)
5639+
>>> df
5640+
Max Speed
5641+
Animal Type
5642+
Falcon Captive 390.0
5643+
Wild 350.0
5644+
Parrot Captive 30.0
5645+
Wild 20.0
5646+
>>> df.groupby(level=0).mean()
5647+
Max Speed
5648+
Animal
5649+
Falcon 370.0
5650+
Parrot 25.0
5651+
>>> df.groupby(level="Type").mean()
5652+
Max Speed
5653+
Type
5654+
Captive 210.0
5655+
Wild 185.0
5656+
"""
5657+
)
5658+
@Appender(_shared_docs["groupby"] % _shared_doc_kwargs)
5659+
def groupby(
5660+
self,
5661+
by=None,
5662+
axis=0,
5663+
level=None,
5664+
as_index: bool = True,
5665+
sort: bool = True,
5666+
group_keys: bool = True,
5667+
squeeze: bool = False,
5668+
observed: bool = False,
5669+
) -> "groupby_generic.DataFrameGroupBy":
5670+
5671+
if level is None and by is None:
5672+
raise TypeError("You have to supply one of 'by' and 'level'")
5673+
axis = self._get_axis_number(axis)
5674+
5675+
return groupby_generic.DataFrameGroupBy(
5676+
obj=self,
5677+
keys=by,
5678+
axis=axis,
5679+
level=level,
5680+
as_index=as_index,
5681+
sort=sort,
5682+
group_keys=group_keys,
5683+
squeeze=squeeze,
5684+
observed=observed,
5685+
)
56095686

56105687
_shared_docs[
56115688
"pivot"

pandas/core/generic.py

+7-79
Original file line numberDiff line numberDiff line change
@@ -7300,19 +7300,10 @@ def clip(
73007300

73017301
return result
73027302

7303-
def groupby(
7304-
self,
7305-
by=None,
7306-
axis=0,
7307-
level=None,
7308-
as_index: bool_t = True,
7309-
sort: bool_t = True,
7310-
group_keys: bool_t = True,
7311-
squeeze: bool_t = False,
7312-
observed: bool_t = False,
7313-
):
7314-
"""
7315-
Group DataFrame or Series using a mapper or by a Series of columns.
7303+
_shared_docs[
7304+
"groupby"
7305+
] = """
7306+
Group %(klass)s using a mapper or by a Series of columns.
73167307
73177308
A groupby operation involves some combination of splitting the
73187309
object, applying a function, and combining the results. This can be
@@ -7357,9 +7348,8 @@ def groupby(
73577348
73587349
Returns
73597350
-------
7360-
DataFrameGroupBy or SeriesGroupBy
7361-
Depends on the calling object and returns groupby object that
7362-
contains information about the groups.
7351+
%(klass)sGroupBy
7352+
Returns a groupby object that contains information about the groups.
73637353
73647354
See Also
73657355
--------
@@ -7370,69 +7360,7 @@ def groupby(
73707360
-----
73717361
See the `user guide
73727362
<http://pandas.pydata.org/pandas-docs/stable/groupby.html>`_ for more.
7373-
7374-
Examples
7375-
--------
7376-
>>> df = pd.DataFrame({'Animal': ['Falcon', 'Falcon',
7377-
... 'Parrot', 'Parrot'],
7378-
... 'Max Speed': [380., 370., 24., 26.]})
7379-
>>> df
7380-
Animal Max Speed
7381-
0 Falcon 380.0
7382-
1 Falcon 370.0
7383-
2 Parrot 24.0
7384-
3 Parrot 26.0
7385-
>>> df.groupby(['Animal']).mean()
7386-
Max Speed
7387-
Animal
7388-
Falcon 375.0
7389-
Parrot 25.0
7390-
7391-
**Hierarchical Indexes**
7392-
7393-
We can groupby different levels of a hierarchical index
7394-
using the `level` parameter:
7395-
7396-
>>> arrays = [['Falcon', 'Falcon', 'Parrot', 'Parrot'],
7397-
... ['Captive', 'Wild', 'Captive', 'Wild']]
7398-
>>> index = pd.MultiIndex.from_arrays(arrays, names=('Animal', 'Type'))
7399-
>>> df = pd.DataFrame({'Max Speed': [390., 350., 30., 20.]},
7400-
... index=index)
7401-
>>> df
7402-
Max Speed
7403-
Animal Type
7404-
Falcon Captive 390.0
7405-
Wild 350.0
7406-
Parrot Captive 30.0
7407-
Wild 20.0
7408-
>>> df.groupby(level=0).mean()
7409-
Max Speed
7410-
Animal
7411-
Falcon 370.0
7412-
Parrot 25.0
7413-
>>> df.groupby(level=1).mean()
7414-
Max Speed
7415-
Type
7416-
Captive 210.0
7417-
Wild 185.0
7418-
"""
7419-
from pandas.core.groupby.groupby import get_groupby
7420-
7421-
if level is None and by is None:
7422-
raise TypeError("You have to supply one of 'by' and 'level'")
7423-
axis = self._get_axis_number(axis)
7424-
7425-
return get_groupby(
7426-
self,
7427-
by=by,
7428-
axis=axis,
7429-
level=level,
7430-
as_index=as_index,
7431-
sort=sort,
7432-
group_keys=group_keys,
7433-
squeeze=squeeze,
7434-
observed=observed,
7435-
)
7363+
"""
74367364

74377365
def asfreq(
74387366
self,

pandas/core/reshape/merge.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141
from pandas.core.dtypes.missing import isna, na_value_for_dtype
4242

4343
from pandas import Categorical, Index, MultiIndex
44+
from pandas.core import groupby
4445
import pandas.core.algorithms as algos
4546
from pandas.core.arrays.categorical import _recode_for_categories
4647
import pandas.core.common as com
@@ -113,6 +114,7 @@ def _groupby_and_merge(
113114
by = [by]
114115

115116
lby = left.groupby(by, sort=False)
117+
rby: Optional[groupby.DataFrameGroupBy] = None
116118

117119
# if we can groupby the rhs
118120
# then we can get vastly better perf
@@ -132,7 +134,7 @@ def _groupby_and_merge(
132134
try:
133135
rby = right.groupby(by, sort=False)
134136
except KeyError:
135-
rby = None
137+
pass
136138

137139
for key, lhs in lby:
138140

pandas/core/series.py

+85-2
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@
5959
is_empty_data,
6060
sanitize_array,
6161
)
62-
from pandas.core.generic import _shared_docs
62+
from pandas.core.groupby import generic as groupby_generic
6363
from pandas.core.indexers import maybe_convert_indices
6464
from pandas.core.indexes.accessors import CombinedDatetimelikeProperties
6565
from pandas.core.indexes.api import (
@@ -1431,7 +1431,7 @@ def to_string(
14311431
"""
14321432
)
14331433
@Substitution(klass="Series")
1434-
@Appender(_shared_docs["to_markdown"])
1434+
@Appender(generic._shared_docs["to_markdown"])
14351435
def to_markdown(
14361436
self, buf: Optional[IO[str]] = None, mode: Optional[str] = None, **kwargs
14371437
) -> Optional[str]:
@@ -1568,6 +1568,89 @@ def _set_name(self, name, inplace=False):
15681568
ser.name = name
15691569
return ser
15701570

1571+
@Appender(
1572+
"""
1573+
Examples
1574+
--------
1575+
>>> ser = pd.Series([390., 350., 30., 20.],
1576+
... index=['Falcon', 'Falcon', 'Parrot', 'Parrot'], name="Max Speed")
1577+
>>> ser
1578+
Falcon 390.0
1579+
Falcon 350.0
1580+
Parrot 30.0
1581+
Parrot 20.0
1582+
Name: Max Speed, dtype: float64
1583+
>>> ser.groupby(["a", "b", "a", "b"]).mean()
1584+
a 210.0
1585+
b 185.0
1586+
Name: Max Speed, dtype: float64
1587+
>>> ser.groupby(level=0).mean()
1588+
Falcon 370.0
1589+
Parrot 25.0
1590+
Name: Max Speed, dtype: float64
1591+
>>> ser.groupby(ser > 100).mean()
1592+
Max Speed
1593+
False 25.0
1594+
True 370.0
1595+
Name: Max Speed, dtype: float64
1596+
1597+
**Grouping by Indexes**
1598+
1599+
We can groupby different levels of a hierarchical index
1600+
using the `level` parameter:
1601+
1602+
>>> arrays = [['Falcon', 'Falcon', 'Parrot', 'Parrot'],
1603+
... ['Captive', 'Wild', 'Captive', 'Wild']]
1604+
>>> index = pd.MultiIndex.from_arrays(arrays, names=('Animal', 'Type'))
1605+
>>> ser = pd.Series([390., 350., 30., 20.], index=index, name="Max Speed")
1606+
>>> ser
1607+
Animal Type
1608+
Falcon Captive 390.0
1609+
Wild 350.0
1610+
Parrot Captive 30.0
1611+
Wild 20.0
1612+
Name: Max Speed, dtype: float64
1613+
>>> ser.groupby(level=0).mean()
1614+
Animal
1615+
Falcon 370.0
1616+
Parrot 25.0
1617+
Name: Max Speed, dtype: float64
1618+
>>> ser.groupby(level="Type").mean()
1619+
Type
1620+
Captive 210.0
1621+
Wild 185.0
1622+
Name: Max Speed, dtype: float64
1623+
"""
1624+
)
1625+
@Appender(generic._shared_docs["groupby"] % _shared_doc_kwargs)
1626+
def groupby(
1627+
self,
1628+
by=None,
1629+
axis=0,
1630+
level=None,
1631+
as_index: bool = True,
1632+
sort: bool = True,
1633+
group_keys: bool = True,
1634+
squeeze: bool = False,
1635+
observed: bool = False,
1636+
) -> "groupby_generic.SeriesGroupBy":
1637+
1638+
if level is None and by is None:
1639+
raise TypeError("You have to supply one of 'by' and 'level'")
1640+
axis = self._get_axis_number(axis)
1641+
1642+
return groupby_generic.SeriesGroupBy(
1643+
obj=self,
1644+
keys=by,
1645+
axis=axis,
1646+
level=level,
1647+
as_index=as_index,
1648+
sort=sort,
1649+
group_keys=group_keys,
1650+
squeeze=squeeze,
1651+
observed=observed,
1652+
)
1653+
15711654
# ----------------------------------------------------------------------
15721655
# Statistics, overridden ndarray methods
15731656

0 commit comments

Comments
 (0)