diff --git a/ci/doctests.sh b/ci/doctests.sh index 2af5dbd26aeb1..654bd57107904 100755 --- a/ci/doctests.sh +++ b/ci/doctests.sh @@ -21,7 +21,7 @@ if [ "$DOCTEST" ]; then # DataFrame / Series docstrings pytest --doctest-modules -v pandas/core/frame.py \ - -k"-assign -axes -combine -isin -itertuples -join -nlargest -nsmallest -nunique -pivot_table -quantile -query -reindex -reindex_axis -replace -round -set_index -stack -to_dict -to_stata -transform" + -k"-assign -axes -combine -isin -itertuples -join -nlargest -nsmallest -nunique -pivot_table -quantile -query -reindex -reindex_axis -replace -round -set_index -stack -to_dict -to_stata" if [ $? -ne "0" ]; then RET=1 @@ -35,7 +35,7 @@ if [ "$DOCTEST" ]; then fi pytest --doctest-modules -v pandas/core/generic.py \ - -k"-_set_axis_name -_xs -describe -droplevel -groupby -interpolate -pct_change -pipe -reindex -reindex_axis -resample -sample -to_json -to_xarray -transform -transpose -values -xs" + -k"-_set_axis_name -_xs -describe -droplevel -groupby -interpolate -pct_change -pipe -reindex -reindex_axis -resample -sample -to_json -to_xarray -transpose -values -xs" if [ $? -ne "0" ]; then RET=1 diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 251bc6587872d..bb08d4fa5582b 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -109,10 +109,9 @@ _shared_doc_kwargs = dict( axes='index, columns', klass='DataFrame', axes_single_arg="{0 or 'index', 1 or 'columns'}", - axis=""" - axis : {0 or 'index', 1 or 'columns'}, default 0 - - 0 or 'index': apply function to each column. - - 1 or 'columns': apply function to each row.""", + axis="""axis : {0 or 'index', 1 or 'columns'}, default 0 + If 0 or 'index': apply function to each column. + If 1 or 'columns': apply function to each row.""", optional_by=""" by : str or list of str Name or list of names to sort by. diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 2e5da21f573b0..243784ea84d43 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -4545,17 +4545,16 @@ def pipe(self, func, *args, **kwargs): Parameters ---------- - func : function, string, dictionary, or list of string/functions + func : function, str, list or dict Function to use for aggregating the data. If a function, must either - work when passed a %(klass)s or when passed to %(klass)s.apply. For - a DataFrame, can pass a dict, if the keys are DataFrame column names. + work when passed a %(klass)s or when passed to %(klass)s.apply. Accepted combinations are: - - string function name. - - function. - - list of functions. - - dict of column names -> functions (or list of functions). + - function + - string function name + - list of functions and/or function names, e.g. ``[np.sum, 'mean']`` + - dict of axis labels -> functions, function names or list of such. %(axis)s *args Positional arguments to pass to `func`. @@ -4564,7 +4563,11 @@ def pipe(self, func, *args, **kwargs): Returns ------- - aggregated : %(klass)s + DataFrame, Series or scalar + if DataFrame.agg is called with a single function, returns a Series + if DataFrame.agg is called with several functions, returns a DataFrame + if Series.agg is called with single function, returns a scalar + if Series.agg is called with several functions, returns a Series Notes ----- @@ -4574,50 +4577,71 @@ def pipe(self, func, *args, **kwargs): """) _shared_docs['transform'] = (""" - Call function producing a like-indexed %(klass)s - and return a %(klass)s with the transformed values + Call ``func`` on self producing a %(klass)s with transformed values + and that has the same axis length as self. .. versionadded:: 0.20.0 Parameters ---------- - func : callable, string, dictionary, or list of string/callables - To apply to column + func : function, str, list or dict + Function to use for transforming the data. If a function, must either + work when passed a %(klass)s or when passed to %(klass)s.apply. - Accepted Combinations are: + Accepted combinations are: - - string function name - function - - list of functions - - dict of column names -> functions (or list of functions) + - string function name + - list of functions and/or function names, e.g. ``[np.exp. 'sqrt']`` + - dict of axis labels -> functions, function names or list of such. + %(axis)s + *args + Positional arguments to pass to `func`. + **kwargs + Keyword arguments to pass to `func`. Returns ------- - transformed : %(klass)s + %(klass)s + A %(klass)s that must have the same length as self. - Examples + Raises + ------ + ValueError : If the returned %(klass)s has a different length than self. + + See Also -------- - >>> df = pd.DataFrame(np.random.randn(10, 3), columns=['A', 'B', 'C'], - ... index=pd.date_range('1/1/2000', periods=10)) - df.iloc[3:7] = np.nan - - >>> df.transform(lambda x: (x - x.mean()) / x.std()) - A B C - 2000-01-01 0.579457 1.236184 0.123424 - 2000-01-02 0.370357 -0.605875 -1.231325 - 2000-01-03 1.455756 -0.277446 0.288967 - 2000-01-04 NaN NaN NaN - 2000-01-05 NaN NaN NaN - 2000-01-06 NaN NaN NaN - 2000-01-07 NaN NaN NaN - 2000-01-08 -0.498658 1.274522 1.642524 - 2000-01-09 -0.540524 -1.012676 -0.828968 - 2000-01-10 -1.366388 -0.614710 0.005378 - - See also + %(klass)s.agg : Only perform aggregating type operations. + %(klass)s.apply : Invoke function on a %(klass)s. + + Examples -------- - pandas.%(klass)s.aggregate - pandas.%(klass)s.apply + >>> df = pd.DataFrame({'A': range(3), 'B': range(1, 4)}) + >>> df + A B + 0 0 1 + 1 1 2 + 2 2 3 + >>> df.transform(lambda x: x + 1) + A B + 0 1 2 + 1 2 3 + 2 3 4 + + Even though the resulting %(klass)s must have the same length as the + input %(klass)s, it is possible to provide several input functions: + + >>> s = pd.Series(range(3)) + >>> s + 0 0 + 1 1 + 2 2 + dtype: int64 + >>> s.transform([np.sqrt, np.exp]) + sqrt exp + 0 0.000000 1.000000 + 1 1.000000 2.718282 + 2 1.414214 7.389056 """) # ---------------------------------------------------------------------- @@ -9401,7 +9425,7 @@ def ewm(self, com=None, span=None, halflife=None, alpha=None, cls.ewm = ewm - @Appender(_shared_docs['transform'] % _shared_doc_kwargs) + @Appender(_shared_docs['transform'] % dict(axis="", **_shared_doc_kwargs)) def transform(self, func, *args, **kwargs): result = self.agg(func, *args, **kwargs) if is_scalar(result) or len(result) != len(self): diff --git a/pandas/core/series.py b/pandas/core/series.py index a4d403e4bcd94..654ba01bc7897 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -89,10 +89,8 @@ _shared_doc_kwargs = dict( axes='index', klass='Series', axes_single_arg="{0 or 'index'}", - axis=""" - axis : {0 or 'index'} - Parameter needed for compatibility with DataFrame. - """, + axis="""axis : {0 or 'index'} + Parameter needed for compatibility with DataFrame.""", inplace="""inplace : boolean, default False If True, performs operation inplace and returns None.""", unique='np.ndarray', duplicated='Series', @@ -3098,6 +3096,12 @@ def aggregate(self, func, axis=0, *args, **kwargs): agg = aggregate + @Appender(generic._shared_docs['transform'] % _shared_doc_kwargs) + def transform(self, func, axis=0, *args, **kwargs): + # Validate the axis parameter + self._get_axis_number(axis) + return super(Series, self).transform(func, *args, **kwargs) + def apply(self, func, convert_dtype=True, args=(), **kwds): """ Invoke function on values of Series. Can be ufunc (a NumPy function