diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index cb045b08f3629..4b0143b3e1ced 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -55,28 +55,28 @@ class providing the base-class of operations. _apply_docs = dict( template=""" - Apply function ``func`` group-wise and combine the results together. + Apply function `func` group-wise and combine the results together. - The function passed to ``apply`` must take a {input} as its first - argument and return a dataframe, a series or a scalar. ``apply`` will + The function passed to `apply` must take a {input} as its first + argument and return a DataFrame, Series or scalar. `apply` will then take care of combining the results back together into a single - dataframe or series. ``apply`` is therefore a highly flexible + dataframe or series. `apply` is therefore a highly flexible grouping method. - While ``apply`` is a very flexible method, its downside is that - using it can be quite a bit slower than using more specific methods. - Pandas offers a wide range of method that will be much faster - than using ``apply`` for their specific purposes, so try to use them - before reaching for ``apply``. + While `apply` is a very flexible method, its downside is that + using it can be quite a bit slower than using more specific methods + like `agg` or `transform`. Pandas offers a wide range of method that will + be much faster than using `apply` for their specific purposes, so try to + use them before reaching for `apply`. Parameters ---------- - func : function + func : callable A callable that takes a {input} as its first argument, and returns a dataframe, a series or a scalar. In addition the - callable may take positional and keyword arguments + callable may take positional and keyword arguments. args, kwargs : tuple and dict - Optional positional and keyword arguments to pass to ``func`` + Optional positional and keyword arguments to pass to `func`. Returns ------- @@ -84,9 +84,9 @@ class providing the base-class of operations. Notes ----- - In the current implementation ``apply`` calls func twice on the + In the current implementation `apply` calls `func` twice on the first group to decide whether it can take a fast or slow code - path. This can lead to unexpected behavior if func has + path. This can lead to unexpected behavior if `func` has side-effects, as they will take effect twice for the first group. @@ -98,38 +98,43 @@ class providing the base-class of operations. -------- pipe : Apply function to the full GroupBy object instead of to each group. - aggregate, transform + aggregate : Apply aggregate function to the GroupBy object. + transform : Apply function column-by-column to the GroupBy object. + Series.apply : Apply a function to a Series. + DataFrame.apply : Apply a function to each row or column of a DataFrame. """, dataframe_examples=""" - >>> df = pd.DataFrame({'A': 'a a b'.split(), 'B': [1,2,3], 'C': [4,6, 5]}) + >>> df = pd.DataFrame({'A': 'a a b'.split(), + 'B': [1,2,3], + 'C': [4,6, 5]}) >>> g = df.groupby('A') - From ``df`` above we can see that ``g`` has two groups, ``a``, ``b``. - Calling ``apply`` in various ways, we can get different grouping results: + Notice that ``g`` has two groups, ``a`` and ``b``. + Calling `apply` in various ways, we can get different grouping results: - Example 1: below the function passed to ``apply`` takes a dataframe as - its argument and returns a dataframe. ``apply`` combines the result for - each group together into a new dataframe: + Example 1: below the function passed to `apply` takes a DataFrame as + its argument and returns a DataFrame. `apply` combines the result for + each group together into a new DataFrame: - >>> g.apply(lambda x: x / x.sum()) + >>> g[['B', 'C']].apply(lambda x: x / x.sum()) B C 0 0.333333 0.4 1 0.666667 0.6 2 1.000000 1.0 - Example 2: The function passed to ``apply`` takes a dataframe as - its argument and returns a series. ``apply`` combines the result for - each group together into a new dataframe: + Example 2: The function passed to `apply` takes a DataFrame as + its argument and returns a Series. `apply` combines the result for + each group together into a new DataFrame: - >>> g.apply(lambda x: x.max() - x.min()) + >>> g[['B', 'C']].apply(lambda x: x.max() - x.min()) B C A a 1 2 b 0 0 - Example 3: The function passed to ``apply`` takes a dataframe as - its argument and returns a scalar. ``apply`` combines the result for - each group together into a series, including setting the index as + Example 3: The function passed to `apply` takes a DataFrame as + its argument and returns a scalar. `apply` combines the result for + each group together into a Series, including setting the index as appropriate: >>> g.apply(lambda x: x.C.max() - x.B.min()) @@ -139,15 +144,15 @@ class providing the base-class of operations. dtype: int64 """, series_examples=""" - >>> ser = pd.Series([0, 1, 2], index='a a b'.split()) - >>> g = ser.groupby(ser.index) + >>> s = pd.Series([0, 1, 2], index='a a b'.split()) + >>> g = s.groupby(s.index) - From ``ser`` above we can see that ``g`` has two groups, ``a``, ``b``. - Calling ``apply`` in various ways, we can get different grouping results: + From ``s`` above we can see that ``g`` has two groups, ``a`` and ``b``. + Calling `apply` in various ways, we can get different grouping results: - Example 1: The function passed to ``apply`` takes a series as - its argument and returns a series. ``apply`` combines the result for - each group together into a new series: + Example 1: The function passed to `apply` takes a Series as + its argument and returns a Series. `apply` combines the result for + each group together into a new Series: >>> g.apply(lambda x: x*2 if x.name == 'b' else x/2) 0 0.0 @@ -155,9 +160,9 @@ class providing the base-class of operations. 2 4.0 dtype: float64 - Example 2: The function passed to ``apply`` takes a series as - its argument and returns a scalar. ``apply`` combines the result for - each group together into a series, including setting the index as + Example 2: The function passed to `apply` takes a Series as + its argument and returns a scalar. `apply` combines the result for + each group together into a Series, including setting the index as appropriate: >>> g.apply(lambda x: x.max() - x.min()) @@ -167,12 +172,12 @@ class providing the base-class of operations. """) _pipe_template = """\ -Apply a function ``func`` with arguments to this %(klass)s object and return +Apply a function `func` with arguments to this %(klass)s object and return the function's result. %(versionadded)s -Use ``.pipe`` when you want to improve readability by chaining together +Use `.pipe` when you want to improve readability by chaining together functions that expect Series, DataFrames, GroupBy or Resampler objects. Instead of writing @@ -191,17 +196,17 @@ class providing the base-class of operations. ---------- func : callable or tuple of (callable, string) Function to apply to this %(klass)s object or, alternatively, - a ``(callable, data_keyword)`` tuple where ``data_keyword`` is a - string indicating the keyword of ``callable`` that expects the + a `(callable, data_keyword)` tuple where `data_keyword` is a + string indicating the keyword of `callable` that expects the %(klass)s object. args : iterable, optional - positional arguments passed into ``func``. + positional arguments passed into `func`. kwargs : dict, optional - a dictionary of keyword arguments passed into ``func``. + a dictionary of keyword arguments passed into `func`. Returns ------- -object : the return type of ``func``. +object : the return type of `func`. Notes ----- @@ -1442,7 +1447,7 @@ def nth(self, n, dropna=None): 2 3.0 2 5.0 - Specifying ``dropna`` allows count ignoring NaN + Specifying `dropna` allows count ignoring ``NaN`` >>> g.nth(0, dropna='any') B @@ -1458,7 +1463,7 @@ def nth(self, n, dropna=None): 1 NaN 2 NaN - Specifying ``as_index=False`` in ``groupby`` keeps the original index. + Specifying `as_index=False` in `groupby` keeps the original index. >>> df.groupby('A', as_index=False).nth(1) A B diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index a1511b726c705..3070fa0e63c88 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -59,7 +59,7 @@ class Grouper(object): sort : boolean, default to False whether to sort the resulting labels - additional kwargs to control time-like groupers (when ``freq`` is passed) + additional kwargs to control time-like groupers (when `freq` is passed) closed : closed end of interval; 'left' or 'right' label : interval boundary to use for labeling; 'left' or 'right'