Skip to content

Commit 627d1b6

Browse files
authored
DOC: Groupby transform should mention that parameter can be a string (#50029)
* feat: adding to the function docs that transform function parameter can take a string argument * docs: updating docs for groupby * doc: updating docs to fix PR comments * fix: fixing failing doc check * docs: updating docs according to PR comment * docs: fixing PR comment * docs: updating docs to better suit dataframes and series * docs: resolving PR comments * docs: fixing PR check failure * docs: resolving PR comments * docs: fixing issue with docstring validation * docs: fixing doctest failures * docs: fixing some issues in the docstrings checks * docs: adding in some missing docs for docstest check * docs: fixing doctest check failure * docs: fixing docstring validation check failure * docs: updating docs according to PR comment
1 parent 1f836f1 commit 627d1b6

File tree

2 files changed

+110
-46
lines changed

2 files changed

+110
-46
lines changed

pandas/core/groupby/generic.py

+100-2
Original file line numberDiff line numberDiff line change
@@ -427,7 +427,51 @@ def _aggregate_named(self, func, *args, **kwargs):
427427

428428
return result
429429

430-
@Substitution(klass="Series")
430+
__examples_series_doc = dedent(
431+
"""
432+
>>> ser = pd.Series(
433+
... [390.0, 350.0, 30.0, 20.0],
434+
... index=["Falcon", "Falcon", "Parrot", "Parrot"],
435+
... name="Max Speed")
436+
>>> grouped = ser.groupby([1, 1, 2, 2])
437+
>>> grouped.transform(lambda x: (x - x.mean()) / x.std())
438+
Falcon 0.707107
439+
Falcon -0.707107
440+
Parrot 0.707107
441+
Parrot -0.707107
442+
Name: Max Speed, dtype: float64
443+
444+
Broadcast result of the transformation
445+
446+
>>> grouped.transform(lambda x: x.max() - x.min())
447+
Falcon 40.0
448+
Falcon 40.0
449+
Parrot 10.0
450+
Parrot 10.0
451+
Name: Max Speed, dtype: float64
452+
453+
>>> grouped.transform("mean")
454+
Falcon 370.0
455+
Falcon 370.0
456+
Parrot 25.0
457+
Parrot 25.0
458+
Name: Max Speed, dtype: float64
459+
460+
.. versionchanged:: 1.3.0
461+
462+
The resulting dtype will reflect the return value of the passed ``func``,
463+
for example:
464+
465+
>>> grouped.transform(lambda x: x.astype(int).max())
466+
Falcon 390
467+
Falcon 390
468+
Parrot 30
469+
Parrot 30
470+
Name: Max Speed, dtype: int64
471+
"""
472+
)
473+
474+
@Substitution(klass="Series", example=__examples_series_doc)
431475
@Appender(_transform_template)
432476
def transform(self, func, *args, engine=None, engine_kwargs=None, **kwargs):
433477
return self._transform(
@@ -1407,7 +1451,61 @@ def _transform_general(self, func, *args, **kwargs):
14071451
concatenated = concatenated.reindex(concat_index, axis=other_axis, copy=False)
14081452
return self._set_result_index_ordered(concatenated)
14091453

1410-
@Substitution(klass="DataFrame")
1454+
__examples_dataframe_doc = dedent(
1455+
"""
1456+
>>> df = pd.DataFrame({'A' : ['foo', 'bar', 'foo', 'bar',
1457+
... 'foo', 'bar'],
1458+
... 'B' : ['one', 'one', 'two', 'three',
1459+
... 'two', 'two'],
1460+
... 'C' : [1, 5, 5, 2, 5, 5],
1461+
... 'D' : [2.0, 5., 8., 1., 2., 9.]})
1462+
>>> grouped = df.groupby('A')[['C', 'D']]
1463+
>>> grouped.transform(lambda x: (x - x.mean()) / x.std())
1464+
C D
1465+
0 -1.154701 -0.577350
1466+
1 0.577350 0.000000
1467+
2 0.577350 1.154701
1468+
3 -1.154701 -1.000000
1469+
4 0.577350 -0.577350
1470+
5 0.577350 1.000000
1471+
1472+
Broadcast result of the transformation
1473+
1474+
>>> grouped.transform(lambda x: x.max() - x.min())
1475+
C D
1476+
0 4.0 6.0
1477+
1 3.0 8.0
1478+
2 4.0 6.0
1479+
3 3.0 8.0
1480+
4 4.0 6.0
1481+
5 3.0 8.0
1482+
1483+
>>> grouped.transform("mean")
1484+
C D
1485+
0 3.666667 4.0
1486+
1 4.000000 5.0
1487+
2 3.666667 4.0
1488+
3 4.000000 5.0
1489+
4 3.666667 4.0
1490+
5 4.000000 5.0
1491+
1492+
.. versionchanged:: 1.3.0
1493+
1494+
The resulting dtype will reflect the return value of the passed ``func``,
1495+
for example:
1496+
1497+
>>> grouped.transform(lambda x: x.astype(int).max())
1498+
C D
1499+
0 5 8
1500+
1 5 9
1501+
2 5 8
1502+
3 5 9
1503+
4 5 8
1504+
5 5 9
1505+
"""
1506+
)
1507+
1508+
@Substitution(klass="DataFrame", example=__examples_dataframe_doc)
14111509
@Appender(_transform_template)
14121510
def transform(self, func, *args, engine=None, engine_kwargs=None, **kwargs):
14131511
return self._transform(

pandas/core/groupby/groupby.py

+10-44
Original file line numberDiff line numberDiff line change
@@ -402,15 +402,22 @@ class providing the base-class of operations.
402402
f : function, str
403403
Function to apply to each group. See the Notes section below for requirements.
404404
405-
Can also accept a Numba JIT function with
406-
``engine='numba'`` specified.
405+
Accepted inputs are:
406+
407+
- String
408+
- Python function
409+
- Numba JIT function with ``engine='numba'`` specified.
407410
411+
Only passing a single function is supported with this engine.
408412
If the ``'numba'`` engine is chosen, the function must be
409413
a user defined function with ``values`` and ``index`` as the
410414
first and second arguments respectively in the function signature.
411415
Each group's index will be passed to the user defined function
412416
and optionally available for use.
413417
418+
If a string is chosen, then it needs to be the name
419+
of the groupby method you want to use.
420+
414421
.. versionchanged:: 1.1.0
415422
*args
416423
Positional arguments to pass to func.
@@ -480,48 +487,7 @@ class providing the base-class of operations.
480487
481488
Examples
482489
--------
483-
484-
>>> df = pd.DataFrame({'A' : ['foo', 'bar', 'foo', 'bar',
485-
... 'foo', 'bar'],
486-
... 'B' : ['one', 'one', 'two', 'three',
487-
... 'two', 'two'],
488-
... 'C' : [1, 5, 5, 2, 5, 5],
489-
... 'D' : [2.0, 5., 8., 1., 2., 9.]})
490-
>>> grouped = df.groupby('A')[['C', 'D']]
491-
>>> grouped.transform(lambda x: (x - x.mean()) / x.std())
492-
C D
493-
0 -1.154701 -0.577350
494-
1 0.577350 0.000000
495-
2 0.577350 1.154701
496-
3 -1.154701 -1.000000
497-
4 0.577350 -0.577350
498-
5 0.577350 1.000000
499-
500-
Broadcast result of the transformation
501-
502-
>>> grouped.transform(lambda x: x.max() - x.min())
503-
C D
504-
0 4.0 6.0
505-
1 3.0 8.0
506-
2 4.0 6.0
507-
3 3.0 8.0
508-
4 4.0 6.0
509-
5 3.0 8.0
510-
511-
.. versionchanged:: 1.3.0
512-
513-
The resulting dtype will reflect the return value of the passed ``func``,
514-
for example:
515-
516-
>>> grouped.transform(lambda x: x.astype(int).max())
517-
C D
518-
0 5 8
519-
1 5 9
520-
2 5 8
521-
3 5 9
522-
4 5 8
523-
5 5 9
524-
"""
490+
%(example)s"""
525491

526492
_agg_template = """
527493
Aggregate using one or more operations over the specified axis.

0 commit comments

Comments
 (0)