@@ -7034,8 +7034,12 @@ def clip_lower(self, threshold, axis=None, inplace=False):
7034
7034
def groupby (self , by = None , axis = 0 , level = None , as_index = True , sort = True ,
7035
7035
group_keys = True , squeeze = False , observed = False , ** kwargs ):
7036
7036
"""
7037
- Group series using mapper (dict or key function, apply given function
7038
- to group, return result as series) or by a series of columns.
7037
+ Group DataFrame or Series using a mapper or by a Series of columns.
7038
+
7039
+ A groupby operation involves some combination of splitting the
7040
+ object, applying a function, and combining the results. This can be
7041
+ used to group large amounts of data and compute operations on these
7042
+ groups.
7039
7043
7040
7044
Parameters
7041
7045
----------
@@ -7048,54 +7052,95 @@ def groupby(self, by=None, axis=0, level=None, as_index=True, sort=True,
7048
7052
values are used as-is determine the groups. A label or list of
7049
7053
labels may be passed to group by the columns in ``self``. Notice
7050
7054
that a tuple is interpreted a (single) key.
7051
- axis : int, default 0
7055
+ axis : {0 or 'index', 1 or 'columns'}, default 0
7056
+ Split along rows (0) or columns (1).
7052
7057
level : int, level name, or sequence of such, default None
7053
7058
If the axis is a MultiIndex (hierarchical), group by a particular
7054
- level or levels
7055
- as_index : boolean , default True
7059
+ level or levels.
7060
+ as_index : bool , default True
7056
7061
For aggregated output, return object with group labels as the
7057
7062
index. Only relevant for DataFrame input. as_index=False is
7058
- effectively "SQL-style" grouped output
7059
- sort : boolean , default True
7063
+ effectively "SQL-style" grouped output.
7064
+ sort : bool , default True
7060
7065
Sort group keys. Get better performance by turning this off.
7061
7066
Note this does not influence the order of observations within each
7062
- group. groupby preserves the order of rows within each group.
7063
- group_keys : boolean , default True
7064
- When calling apply, add group keys to index to identify pieces
7065
- squeeze : boolean , default False
7066
- reduce the dimensionality of the return type if possible,
7067
- otherwise return a consistent type
7068
- observed : boolean , default False
7069
- This only applies if any of the groupers are Categoricals
7067
+ group. Groupby preserves the order of rows within each group.
7068
+ group_keys : bool , default True
7069
+ When calling apply, add group keys to index to identify pieces.
7070
+ squeeze : bool , default False
7071
+ Reduce the dimensionality of the return type if possible,
7072
+ otherwise return a consistent type.
7073
+ observed : bool , default False
7074
+ This only applies if any of the groupers are Categoricals.
7070
7075
If True: only show observed values for categorical groupers.
7071
7076
If False: show all values for categorical groupers.
7072
7077
7073
7078
.. versionadded:: 0.23.0
7074
7079
7080
+ **kwargs
7081
+ Optional, only accepts keyword argument 'mutated' and is passed
7082
+ to groupby.
7083
+
7075
7084
Returns
7076
7085
-------
7077
- GroupBy object
7086
+ DataFrameGroupBy or SeriesGroupBy
7087
+ Depends on the calling object and returns groupby object that
7088
+ contains information about the groups.
7078
7089
7079
- Examples
7090
+ See Also
7080
7091
--------
7081
- DataFrame results
7082
-
7083
- >>> data.groupby(func, axis=0).mean()
7084
- >>> data.groupby(['col1', 'col2'])['col3'].mean()
7085
-
7086
- DataFrame with hierarchical index
7087
-
7088
- >>> data.groupby(['col1', 'col2']).mean()
7092
+ resample : Convenience method for frequency conversion and resampling
7093
+ of time series.
7089
7094
7090
7095
Notes
7091
7096
-----
7092
7097
See the `user guide
7093
7098
<http://pandas.pydata.org/pandas-docs/stable/groupby.html>`_ for more.
7094
7099
7095
- See also
7100
+ Examples
7096
7101
--------
7097
- resample : Convenience method for frequency conversion and resampling
7098
- of time series.
7102
+ >>> df = pd.DataFrame({'Animal' : ['Falcon', 'Falcon',
7103
+ ... 'Parrot', 'Parrot'],
7104
+ ... 'Max Speed' : [380., 370., 24., 26.]})
7105
+ >>> df
7106
+ Animal Max Speed
7107
+ 0 Falcon 380.0
7108
+ 1 Falcon 370.0
7109
+ 2 Parrot 24.0
7110
+ 3 Parrot 26.0
7111
+ >>> df.groupby(['Animal']).mean()
7112
+ Max Speed
7113
+ Animal
7114
+ Falcon 375.0
7115
+ Parrot 25.0
7116
+
7117
+ **Hierarchical Indexes**
7118
+
7119
+ We can groupby different levels of a hierarchical index
7120
+ using the `level` parameter:
7121
+
7122
+ >>> arrays = [['Falcon', 'Falcon', 'Parrot', 'Parrot'],
7123
+ ... ['Capitve', 'Wild', 'Capitve', 'Wild']]
7124
+ >>> index = pd.MultiIndex.from_arrays(arrays, names=('Animal', 'Type'))
7125
+ >>> df = pd.DataFrame({'Max Speed' : [390., 350., 30., 20.]},
7126
+ ... index=index)
7127
+ >>> df
7128
+ Max Speed
7129
+ Animal Type
7130
+ Falcon Capitve 390.0
7131
+ Wild 350.0
7132
+ Parrot Capitve 30.0
7133
+ Wild 20.0
7134
+ >>> df.groupby(level=0).mean()
7135
+ Max Speed
7136
+ Animal
7137
+ Falcon 370.0
7138
+ Parrot 25.0
7139
+ >>> df.groupby(level=1).mean()
7140
+ Max Speed
7141
+ Type
7142
+ Capitve 210.0
7143
+ Wild 185.0
7099
7144
"""
7100
7145
from pandas .core .groupby .groupby import groupby
7101
7146
0 commit comments