Skip to content

Commit 8f144bb

Browse files
topper-123victor
authored and
victor
committed
DOC: improve doc string for .aggregate and .transform (pandas-dev#22641)
1 parent 9cf7b60 commit 8f144bb

File tree

4 files changed

+76
-49
lines changed

4 files changed

+76
-49
lines changed

ci/doctests.sh

+2-2
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ if [ "$DOCTEST" ]; then
2121

2222
# DataFrame / Series docstrings
2323
pytest --doctest-modules -v pandas/core/frame.py \
24-
-k"-assign -axes -combine -isin -itertuples -join -nlargest -nsmallest -nunique -pivot_table -quantile -query -reindex -reindex_axis -replace -round -set_index -stack -to_dict -to_stata -transform"
24+
-k"-assign -axes -combine -isin -itertuples -join -nlargest -nsmallest -nunique -pivot_table -quantile -query -reindex -reindex_axis -replace -round -set_index -stack -to_dict -to_stata"
2525

2626
if [ $? -ne "0" ]; then
2727
RET=1
@@ -35,7 +35,7 @@ if [ "$DOCTEST" ]; then
3535
fi
3636

3737
pytest --doctest-modules -v pandas/core/generic.py \
38-
-k"-_set_axis_name -_xs -describe -droplevel -groupby -interpolate -pct_change -pipe -reindex -reindex_axis -resample -sample -to_json -to_xarray -transform -transpose -values -xs"
38+
-k"-_set_axis_name -_xs -describe -droplevel -groupby -interpolate -pct_change -pipe -reindex -reindex_axis -resample -sample -to_json -to_xarray -transpose -values -xs"
3939

4040
if [ $? -ne "0" ]; then
4141
RET=1

pandas/core/frame.py

+3-4
Original file line numberDiff line numberDiff line change
@@ -109,10 +109,9 @@
109109
_shared_doc_kwargs = dict(
110110
axes='index, columns', klass='DataFrame',
111111
axes_single_arg="{0 or 'index', 1 or 'columns'}",
112-
axis="""
113-
axis : {0 or 'index', 1 or 'columns'}, default 0
114-
- 0 or 'index': apply function to each column.
115-
- 1 or 'columns': apply function to each row.""",
112+
axis="""axis : {0 or 'index', 1 or 'columns'}, default 0
113+
If 0 or 'index': apply function to each column.
114+
If 1 or 'columns': apply function to each row.""",
116115
optional_by="""
117116
by : str or list of str
118117
Name or list of names to sort by.

pandas/core/generic.py

+63-39
Original file line numberDiff line numberDiff line change
@@ -4545,17 +4545,16 @@ def pipe(self, func, *args, **kwargs):
45454545
45464546
Parameters
45474547
----------
4548-
func : function, string, dictionary, or list of string/functions
4548+
func : function, str, list or dict
45494549
Function to use for aggregating the data. If a function, must either
4550-
work when passed a %(klass)s or when passed to %(klass)s.apply. For
4551-
a DataFrame, can pass a dict, if the keys are DataFrame column names.
4550+
work when passed a %(klass)s or when passed to %(klass)s.apply.
45524551
45534552
Accepted combinations are:
45544553
4555-
- string function name.
4556-
- function.
4557-
- list of functions.
4558-
- dict of column names -> functions (or list of functions).
4554+
- function
4555+
- string function name
4556+
- list of functions and/or function names, e.g. ``[np.sum, 'mean']``
4557+
- dict of axis labels -> functions, function names or list of such.
45594558
%(axis)s
45604559
*args
45614560
Positional arguments to pass to `func`.
@@ -4564,7 +4563,11 @@ def pipe(self, func, *args, **kwargs):
45644563
45654564
Returns
45664565
-------
4567-
aggregated : %(klass)s
4566+
DataFrame, Series or scalar
4567+
if DataFrame.agg is called with a single function, returns a Series
4568+
if DataFrame.agg is called with several functions, returns a DataFrame
4569+
if Series.agg is called with single function, returns a scalar
4570+
if Series.agg is called with several functions, returns a Series
45684571
45694572
Notes
45704573
-----
@@ -4574,50 +4577,71 @@ def pipe(self, func, *args, **kwargs):
45744577
""")
45754578

45764579
_shared_docs['transform'] = ("""
4577-
Call function producing a like-indexed %(klass)s
4578-
and return a %(klass)s with the transformed values
4580+
Call ``func`` on self producing a %(klass)s with transformed values
4581+
and that has the same axis length as self.
45794582
45804583
.. versionadded:: 0.20.0
45814584
45824585
Parameters
45834586
----------
4584-
func : callable, string, dictionary, or list of string/callables
4585-
To apply to column
4587+
func : function, str, list or dict
4588+
Function to use for transforming the data. If a function, must either
4589+
work when passed a %(klass)s or when passed to %(klass)s.apply.
45864590
4587-
Accepted Combinations are:
4591+
Accepted combinations are:
45884592
4589-
- string function name
45904593
- function
4591-
- list of functions
4592-
- dict of column names -> functions (or list of functions)
4594+
- string function name
4595+
- list of functions and/or function names, e.g. ``[np.exp. 'sqrt']``
4596+
- dict of axis labels -> functions, function names or list of such.
4597+
%(axis)s
4598+
*args
4599+
Positional arguments to pass to `func`.
4600+
**kwargs
4601+
Keyword arguments to pass to `func`.
45934602
45944603
Returns
45954604
-------
4596-
transformed : %(klass)s
4605+
%(klass)s
4606+
A %(klass)s that must have the same length as self.
45974607
4598-
Examples
4608+
Raises
4609+
------
4610+
ValueError : If the returned %(klass)s has a different length than self.
4611+
4612+
See Also
45994613
--------
4600-
>>> df = pd.DataFrame(np.random.randn(10, 3), columns=['A', 'B', 'C'],
4601-
... index=pd.date_range('1/1/2000', periods=10))
4602-
df.iloc[3:7] = np.nan
4603-
4604-
>>> df.transform(lambda x: (x - x.mean()) / x.std())
4605-
A B C
4606-
2000-01-01 0.579457 1.236184 0.123424
4607-
2000-01-02 0.370357 -0.605875 -1.231325
4608-
2000-01-03 1.455756 -0.277446 0.288967
4609-
2000-01-04 NaN NaN NaN
4610-
2000-01-05 NaN NaN NaN
4611-
2000-01-06 NaN NaN NaN
4612-
2000-01-07 NaN NaN NaN
4613-
2000-01-08 -0.498658 1.274522 1.642524
4614-
2000-01-09 -0.540524 -1.012676 -0.828968
4615-
2000-01-10 -1.366388 -0.614710 0.005378
4616-
4617-
See also
4614+
%(klass)s.agg : Only perform aggregating type operations.
4615+
%(klass)s.apply : Invoke function on a %(klass)s.
4616+
4617+
Examples
46184618
--------
4619-
pandas.%(klass)s.aggregate
4620-
pandas.%(klass)s.apply
4619+
>>> df = pd.DataFrame({'A': range(3), 'B': range(1, 4)})
4620+
>>> df
4621+
A B
4622+
0 0 1
4623+
1 1 2
4624+
2 2 3
4625+
>>> df.transform(lambda x: x + 1)
4626+
A B
4627+
0 1 2
4628+
1 2 3
4629+
2 3 4
4630+
4631+
Even though the resulting %(klass)s must have the same length as the
4632+
input %(klass)s, it is possible to provide several input functions:
4633+
4634+
>>> s = pd.Series(range(3))
4635+
>>> s
4636+
0 0
4637+
1 1
4638+
2 2
4639+
dtype: int64
4640+
>>> s.transform([np.sqrt, np.exp])
4641+
sqrt exp
4642+
0 0.000000 1.000000
4643+
1 1.000000 2.718282
4644+
2 1.414214 7.389056
46214645
""")
46224646

46234647
# ----------------------------------------------------------------------
@@ -9401,7 +9425,7 @@ def ewm(self, com=None, span=None, halflife=None, alpha=None,
94019425

94029426
cls.ewm = ewm
94039427

9404-
@Appender(_shared_docs['transform'] % _shared_doc_kwargs)
9428+
@Appender(_shared_docs['transform'] % dict(axis="", **_shared_doc_kwargs))
94059429
def transform(self, func, *args, **kwargs):
94069430
result = self.agg(func, *args, **kwargs)
94079431
if is_scalar(result) or len(result) != len(self):

pandas/core/series.py

+8-4
Original file line numberDiff line numberDiff line change
@@ -89,10 +89,8 @@
8989

9090
_shared_doc_kwargs = dict(
9191
axes='index', klass='Series', axes_single_arg="{0 or 'index'}",
92-
axis="""
93-
axis : {0 or 'index'}
94-
Parameter needed for compatibility with DataFrame.
95-
""",
92+
axis="""axis : {0 or 'index'}
93+
Parameter needed for compatibility with DataFrame.""",
9694
inplace="""inplace : boolean, default False
9795
If True, performs operation inplace and returns None.""",
9896
unique='np.ndarray', duplicated='Series',
@@ -3097,6 +3095,12 @@ def aggregate(self, func, axis=0, *args, **kwargs):
30973095

30983096
agg = aggregate
30993097

3098+
@Appender(generic._shared_docs['transform'] % _shared_doc_kwargs)
3099+
def transform(self, func, axis=0, *args, **kwargs):
3100+
# Validate the axis parameter
3101+
self._get_axis_number(axis)
3102+
return super(Series, self).transform(func, *args, **kwargs)
3103+
31003104
def apply(self, func, convert_dtype=True, args=(), **kwds):
31013105
"""
31023106
Invoke function on values of Series. Can be ufunc (a NumPy function

0 commit comments

Comments
 (0)