|
72 | 72 | GroupByPlot,
|
73 | 73 | _agg_template_frame,
|
74 | 74 | _agg_template_series,
|
75 |
| - _apply_docs, |
76 | 75 | _transform_template,
|
77 | 76 | )
|
78 | 77 | from pandas.core.indexes.api import (
|
@@ -214,12 +213,113 @@ def _get_data_to_aggregate(
|
214 | 213 | """
|
215 | 214 | )
|
216 | 215 |
|
217 |
| - @Appender( |
218 |
| - _apply_docs["template"].format( |
219 |
| - input="series", examples=_apply_docs["series_examples"] |
220 |
| - ) |
221 |
| - ) |
222 | 216 | def apply(self, func, *args, **kwargs) -> Series:
|
| 217 | + """ |
| 218 | + Apply function ``func`` group-wise and combine the results together. |
| 219 | +
|
| 220 | + The function passed to ``apply`` must take a series as its first |
| 221 | + argument and return a DataFrame, Series or scalar. ``apply`` will |
| 222 | + then take care of combining the results back together into a single |
| 223 | + dataframe or series. ``apply`` is therefore a highly flexible |
| 224 | + grouping method. |
| 225 | +
|
| 226 | + While ``apply`` is a very flexible method, its downside is that |
| 227 | + using it can be quite a bit slower than using more specific methods |
| 228 | + like ``agg`` or ``transform``. Pandas offers a wide range of method that will |
| 229 | + be much faster than using ``apply`` for their specific purposes, so try to |
| 230 | + use them before reaching for ``apply``. |
| 231 | +
|
| 232 | + Parameters |
| 233 | + ---------- |
| 234 | + func : callable |
| 235 | + A callable that takes a series as its first argument, and |
| 236 | + returns a dataframe, a series or a scalar. In addition the |
| 237 | + callable may take positional and keyword arguments. |
| 238 | +
|
| 239 | + *args : tuple |
| 240 | + Optional positional arguments to pass to ``func``. |
| 241 | +
|
| 242 | + **kwargs : dict |
| 243 | + Optional keyword arguments to pass to ``func``. |
| 244 | +
|
| 245 | + Returns |
| 246 | + ------- |
| 247 | + Series or DataFrame |
| 248 | +
|
| 249 | + See Also |
| 250 | + -------- |
| 251 | + pipe : Apply function to the full GroupBy object instead of to each |
| 252 | + group. |
| 253 | + aggregate : Apply aggregate function to the GroupBy object. |
| 254 | + transform : Apply function column-by-column to the GroupBy object. |
| 255 | + Series.apply : Apply a function to a Series. |
| 256 | + DataFrame.apply : Apply a function to each row or column of a DataFrame. |
| 257 | +
|
| 258 | + Notes |
| 259 | + ----- |
| 260 | +
|
| 261 | + .. versionchanged:: 1.3.0 |
| 262 | +
|
| 263 | + The resulting dtype will reflect the return value of the passed ``func``, |
| 264 | + see the examples below. |
| 265 | +
|
| 266 | + Functions that mutate the passed object can produce unexpected |
| 267 | + behavior or errors and are not supported. See :ref:`gotchas.udf-mutation` |
| 268 | + for more details. |
| 269 | +
|
| 270 | + Examples |
| 271 | + -------- |
| 272 | + >>> s = pd.Series([0, 1, 2], index="a a b".split()) |
| 273 | + >>> g1 = s.groupby(s.index, group_keys=False) |
| 274 | + >>> g2 = s.groupby(s.index, group_keys=True) |
| 275 | +
|
| 276 | + From ``s`` above we can see that ``g`` has two groups, ``a`` and ``b``. |
| 277 | + Notice that ``g1`` have ``g2`` have two groups, ``a`` and ``b``, and only |
| 278 | + differ in their ``group_keys`` argument. Calling `apply` in various ways, |
| 279 | + we can get different grouping results: |
| 280 | +
|
| 281 | + Example 1: The function passed to `apply` takes a Series as |
| 282 | + its argument and returns a Series. `apply` combines the result for |
| 283 | + each group together into a new Series. |
| 284 | +
|
| 285 | + .. versionchanged:: 1.3.0 |
| 286 | +
|
| 287 | + The resulting dtype will reflect the return value of the passed ``func``. |
| 288 | +
|
| 289 | + >>> g1.apply(lambda x: x * 2 if x.name == "a" else x / 2) |
| 290 | + a 0.0 |
| 291 | + a 2.0 |
| 292 | + b 1.0 |
| 293 | + dtype: float64 |
| 294 | +
|
| 295 | + In the above, the groups are not part of the index. We can have them included |
| 296 | + by using ``g2`` where ``group_keys=True``: |
| 297 | +
|
| 298 | + >>> g2.apply(lambda x: x * 2 if x.name == "a" else x / 2) |
| 299 | + a a 0.0 |
| 300 | + a 2.0 |
| 301 | + b b 1.0 |
| 302 | + dtype: float64 |
| 303 | +
|
| 304 | + Example 2: The function passed to `apply` takes a Series as |
| 305 | + its argument and returns a scalar. `apply` combines the result for |
| 306 | + each group together into a Series, including setting the index as |
| 307 | + appropriate: |
| 308 | +
|
| 309 | + >>> g1.apply(lambda x: x.max() - x.min()) |
| 310 | + a 1 |
| 311 | + b 0 |
| 312 | + dtype: int64 |
| 313 | +
|
| 314 | + The ``group_keys`` argument has no effect here because the result is not |
| 315 | + like-indexed (i.e. :ref:`a transform <groupby.transform>`) when compared |
| 316 | + to the input. |
| 317 | +
|
| 318 | + >>> g2.apply(lambda x: x.max() - x.min()) |
| 319 | + a 1 |
| 320 | + b 0 |
| 321 | + dtype: int64 |
| 322 | + """ |
223 | 323 | return super().apply(func, *args, **kwargs)
|
224 | 324 |
|
225 | 325 | @doc(_agg_template_series, examples=_agg_examples_doc, klass="Series")
|
|
0 commit comments