|
77 | 77 | pandas.Panel.%(name)s
|
78 | 78 | """
|
79 | 79 |
|
| 80 | +_apply_docs = dict( |
| 81 | + template=""" |
| 82 | + Apply function ``func`` group-wise and combine the results together. |
| 83 | +
|
| 84 | + The function passed to ``apply`` must take a {input} as its first |
| 85 | + argument and return a dataframe, a series or a scalar. ``apply`` will |
| 86 | + then take care of combining the results back together into a single |
| 87 | + dataframe or series. ``apply`` is therefore a highly flexible |
| 88 | + grouping method. |
| 89 | +
|
| 90 | + While ``apply`` is a very flexible method, its downside is that |
| 91 | + using it can be quite a bit slower than using more specific methods. |
| 92 | + Pandas offers a wide range of method that will be much faster |
| 93 | + than using ``apply`` for their specific purposes, so try to use them |
| 94 | + before reaching for ``apply``. |
| 95 | +
|
| 96 | + Parameters |
| 97 | + ---------- |
| 98 | + func : function |
| 99 | + A callable that takes a {input} as its first argument, and |
| 100 | + returns a dataframe, a series or a scalar. In addition the |
| 101 | + callable may take positional and keyword arguments |
| 102 | + args, kwargs : tuple and dict |
| 103 | + Optional positional and keyword arguments to pass to ``func`` |
| 104 | +
|
| 105 | + Returns |
| 106 | + ------- |
| 107 | + applied : Series or DataFrame |
| 108 | +
|
| 109 | + Notes |
| 110 | + ----- |
| 111 | + In the current implementation ``apply`` calls func twice on the |
| 112 | + first group to decide whether it can take a fast or slow code |
| 113 | + path. This can lead to unexpected behavior if func has |
| 114 | + side-effects, as they will take effect twice for the first |
| 115 | + group. |
| 116 | +
|
| 117 | + Examples |
| 118 | + -------- |
| 119 | + {examples} |
| 120 | +
|
| 121 | + See also |
| 122 | + -------- |
| 123 | + pipe : Apply function to the full GroupBy object instead of to each |
| 124 | + group. |
| 125 | + aggregate, transform |
| 126 | + """, |
| 127 | + dataframe_examples=""" |
| 128 | + >>> df = pd.DataFrame({'A': 'a a b'.split(), 'B': [1,2,3], 'C': [4,6, 5]}) |
| 129 | + >>> g = df.groupby('A') |
| 130 | +
|
| 131 | + From ``df`` above we can see that ``g`` has two groups, ``a``, ``b``. |
| 132 | + Calling ``apply`` in various ways, we can get different grouping results: |
| 133 | +
|
| 134 | + Example 1: below the function passed to ``apply`` takes a dataframe as |
| 135 | + its argument and returns a dataframe. ``apply`` combines the result for |
| 136 | + each group together into a new dataframe: |
| 137 | +
|
| 138 | + >>> g.apply(lambda x: x / x.sum()) |
| 139 | + B C |
| 140 | + 0 0.333333 0.4 |
| 141 | + 1 0.666667 0.6 |
| 142 | + 2 1.000000 1.0 |
| 143 | +
|
| 144 | + Example 2: The function passed to ``apply`` takes a dataframe as |
| 145 | + its argument and returns a series. ``apply`` combines the result for |
| 146 | + each group together into a new dataframe: |
| 147 | +
|
| 148 | + >>> g.apply(lambda x: x.max() - x.min()) |
| 149 | + B C |
| 150 | + A |
| 151 | + a 1 2 |
| 152 | + b 0 0 |
| 153 | +
|
| 154 | + Example 3: The function passed to ``apply`` takes a dataframe as |
| 155 | + its argument and returns a scalar. ``apply`` combines the result for |
| 156 | + each group together into a series, including setting the index as |
| 157 | + appropriate: |
| 158 | +
|
| 159 | + >>> g.apply(lambda x: x.C.max() - x.B.min()) |
| 160 | + A |
| 161 | + a 5 |
| 162 | + b 2 |
| 163 | + dtype: int64 |
| 164 | + """, |
| 165 | + series_examples=""" |
| 166 | + >>> ser = pd.Series([0, 1, 2], index='a a b'.split()) |
| 167 | + >>> g = ser.groupby(ser.index) |
| 168 | +
|
| 169 | + From ``ser`` above we can see that ``g`` has two groups, ``a``, ``b``. |
| 170 | + Calling ``apply`` in various ways, we can get different grouping results: |
| 171 | +
|
| 172 | + Example 1: The function passed to ``apply`` takes a series as |
| 173 | + its argument and returns a series. ``apply`` combines the result for |
| 174 | + each group together into a new series: |
| 175 | +
|
| 176 | + >>> g.apply(lambda x: x*2 if x.name == 'b' else x/2) |
| 177 | + 0 0.0 |
| 178 | + 1 0.5 |
| 179 | + 2 4.0 |
| 180 | + dtype: float64 |
| 181 | +
|
| 182 | + Example 2: The function passed to ``apply`` takes a series as |
| 183 | + its argument and returns a scalar. ``apply`` combines the result for |
| 184 | + each group together into a series, including setting the index as |
| 185 | + appropriate: |
| 186 | +
|
| 187 | + >>> g.apply(lambda x: x.max() - x.min()) |
| 188 | + a 1 |
| 189 | + b 0 |
| 190 | + dtype: int64 |
| 191 | + """) |
| 192 | + |
80 | 193 | _transform_template = """
|
81 | 194 | Call function producing a like-indexed %(klass)s on each group and
|
82 | 195 | return a %(klass)s having the same indexes as the original object
|
|
144 | 257 |
|
145 | 258 | """
|
146 | 259 |
|
| 260 | + |
147 | 261 | # special case to prevent duplicate plots when catching exceptions when
|
148 | 262 | # forwarding methods from NDFrames
|
149 | 263 | _plotting_methods = frozenset(['plot', 'boxplot', 'hist'])
|
@@ -653,50 +767,10 @@ def __iter__(self):
|
653 | 767 | """
|
654 | 768 | return self.grouper.get_iterator(self.obj, axis=self.axis)
|
655 | 769 |
|
656 |
| - @Substitution(name='groupby') |
| 770 | + @Appender(_apply_docs['template'] |
| 771 | + .format(input="dataframe", |
| 772 | + examples=_apply_docs['dataframe_examples'])) |
657 | 773 | def apply(self, func, *args, **kwargs):
|
658 |
| - """ |
659 |
| - Apply function and combine results together in an intelligent way. |
660 |
| -
|
661 |
| - The split-apply-combine combination rules attempt to be as common |
662 |
| - sense based as possible. For example: |
663 |
| -
|
664 |
| - case 1: |
665 |
| - group DataFrame |
666 |
| - apply aggregation function (f(chunk) -> Series) |
667 |
| - yield DataFrame, with group axis having group labels |
668 |
| -
|
669 |
| - case 2: |
670 |
| - group DataFrame |
671 |
| - apply transform function ((f(chunk) -> DataFrame with same indexes) |
672 |
| - yield DataFrame with resulting chunks glued together |
673 |
| -
|
674 |
| - case 3: |
675 |
| - group Series |
676 |
| - apply function with f(chunk) -> DataFrame |
677 |
| - yield DataFrame with result of chunks glued together |
678 |
| -
|
679 |
| - Parameters |
680 |
| - ---------- |
681 |
| - func : function |
682 |
| -
|
683 |
| - Notes |
684 |
| - ----- |
685 |
| - See online documentation for full exposition on how to use apply. |
686 |
| -
|
687 |
| - In the current implementation apply calls func twice on the |
688 |
| - first group to decide whether it can take a fast or slow code |
689 |
| - path. This can lead to unexpected behavior if func has |
690 |
| - side-effects, as they will take effect twice for the first |
691 |
| - group. |
692 |
| -
|
693 |
| -
|
694 |
| - See also |
695 |
| - -------- |
696 |
| - pipe : Apply function to the full GroupBy object instead of to each |
697 |
| - group. |
698 |
| - aggregate, transform |
699 |
| - """ |
700 | 774 |
|
701 | 775 | func = self._is_builtin_func(func)
|
702 | 776 |
|
@@ -3013,6 +3087,12 @@ def _selection_name(self):
|
3013 | 3087 |
|
3014 | 3088 | """)
|
3015 | 3089 |
|
| 3090 | + @Appender(_apply_docs['template'] |
| 3091 | + .format(input='series', |
| 3092 | + examples=_apply_docs['series_examples'])) |
| 3093 | + def apply(self, func, *args, **kwargs): |
| 3094 | + return super(SeriesGroupBy, self).apply(func, *args, **kwargs) |
| 3095 | + |
3016 | 3096 | @Appender(_agg_doc)
|
3017 | 3097 | @Appender(_shared_docs['aggregate'] % dict(
|
3018 | 3098 | klass='Series',
|
|
0 commit comments