From 23609c3494793033b044122c6c00ae2d4b6035c5 Mon Sep 17 00:00:00 2001 From: tp Date: Sat, 8 Sep 2018 10:00:09 +0100 Subject: [PATCH 1/4] improve doc string for df.aggregate and df.transform --- pandas/core/frame.py | 27 +++++++++++++++++++ pandas/core/generic.py | 60 +++++++++++++++++------------------------- pandas/core/series.py | 33 +++++++++++++++++++++++ 3 files changed, 84 insertions(+), 36 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 251bc6587872d..6de808d9a73fa 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5995,6 +5995,33 @@ def _aggregate(self, arg, axis=0, *args, **kwargs): agg = aggregate + _transform_doc = dedent(""" + Examples + -------- + >>> df = pd.DataFrame(np.random.randn(10, 2), columns=['A', 'B'], + ... index=pd.date_range('1/1/2000', periods=10)) + >>> df.iloc[3:7] = np.nan + + >>> df.transform(lambda x: (x - x.mean()) / x.std()) + A B + 2000-01-01 0.579457 1.236184 + 2000-01-02 0.370357 -0.605875 + 2000-01-03 1.455756 -0.277446 + 2000-01-04 NaN NaN + 2000-01-05 NaN NaN + 2000-01-06 NaN NaN + 2000-01-07 NaN NaN + 2000-01-08 -0.498658 1.274522 + 2000-01-09 -0.540524 -1.012676 + 2000-01-10 -1.366388 -0.614710 + + See also + -------- + pandas.DataFrame.aggregate + pandas.DataFrame.apply + """) + + @Appender(_transform_doc) @Appender(_shared_docs['transform'] % _shared_doc_kwargs) def transform(self, func, axis=0, *args, **kwargs): axis = self._get_axis_number(axis) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 2e5da21f573b0..23f14aaa02c75 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -4545,17 +4545,16 @@ def pipe(self, func, *args, **kwargs): Parameters ---------- - func : function, string, dictionary, or list of string/functions + func : function, string, list of string/functions or dictionary Function to use for aggregating the data. If a function, must either - work when passed a %(klass)s or when passed to %(klass)s.apply. For - a DataFrame, can pass a dict, if the keys are DataFrame column names. + work when passed a %(klass)s or when passed to %(klass)s.apply. Accepted combinations are: - - string function name. - - function. - - list of functions. - - dict of column names -> functions (or list of functions). + - string function name + - function + - list of functions and/or function names + - dict of axis labels -> functions, function names or list of such %(axis)s *args Positional arguments to pass to `func`. @@ -4581,43 +4580,32 @@ def pipe(self, func, *args, **kwargs): Parameters ---------- - func : callable, string, dictionary, or list of string/callables - To apply to column + func : function, string, list of string/functions or dictionary + Function to use for transforming the data. If a function, must either + work when passed a %(klass)s or when passed to %(klass)s.apply. + The function (or each function in a list/dict) must return an + object with the same length for the provided axis as the + calling %(klass)s. - Accepted Combinations are: + Accepted combinations are: - string function name - function - - list of functions - - dict of column names -> functions (or list of functions) + - list of functions and/or function names + - dict of axis labels -> functions, function names or list of such + %(axis)s + *args + Positional arguments to pass to `func`. + **kwargs + Keyword arguments to pass to `func`. Returns ------- transformed : %(klass)s - Examples - -------- - >>> df = pd.DataFrame(np.random.randn(10, 3), columns=['A', 'B', 'C'], - ... index=pd.date_range('1/1/2000', periods=10)) - df.iloc[3:7] = np.nan - - >>> df.transform(lambda x: (x - x.mean()) / x.std()) - A B C - 2000-01-01 0.579457 1.236184 0.123424 - 2000-01-02 0.370357 -0.605875 -1.231325 - 2000-01-03 1.455756 -0.277446 0.288967 - 2000-01-04 NaN NaN NaN - 2000-01-05 NaN NaN NaN - 2000-01-06 NaN NaN NaN - 2000-01-07 NaN NaN NaN - 2000-01-08 -0.498658 1.274522 1.642524 - 2000-01-09 -0.540524 -1.012676 -0.828968 - 2000-01-10 -1.366388 -0.614710 0.005378 - - See also - -------- - pandas.%(klass)s.aggregate - pandas.%(klass)s.apply + Raises + ------ + ValueError: if the returned %(klass)s has a different length than self. """) # ---------------------------------------------------------------------- @@ -9401,7 +9389,7 @@ def ewm(self, com=None, span=None, halflife=None, alpha=None, cls.ewm = ewm - @Appender(_shared_docs['transform'] % _shared_doc_kwargs) + @Appender(_shared_docs['transform'] % dict(axis="", **_shared_doc_kwargs)) def transform(self, func, *args, **kwargs): result = self.agg(func, *args, **kwargs) if is_scalar(result) or len(result) != len(self): diff --git a/pandas/core/series.py b/pandas/core/series.py index a4d403e4bcd94..b8920619cf96b 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -3098,6 +3098,39 @@ def aggregate(self, func, axis=0, *args, **kwargs): agg = aggregate + _transform_doc = dedent(""" + Examples + -------- + >>> s = pd.Series(range(5)) + >>> s.transform(lambda x: (x - x.mean()) / x.std()) + 0 -1.264911 + 1 -0.632456 + 2 0.000000 + 3 0.632456 + 4 1.264911 + dtype: float64 + + >>> s.transform([np.sqrt, np.exp]) + sqrt exp + 0 0.000000 1.000000 + 1 1.000000 2.718282 + 2 1.414214 7.389056 + 3 1.732051 20.085537 + 4 2.000000 54.598150 + + See also + -------- + pandas.Series.aggregate + pandas.Series.apply + """) + + @Appender(_transform_doc) + @Appender(generic._shared_docs['transform'] % _shared_doc_kwargs) + def transform(self, func, axis=0, *args, **kwargs): + # Validate the axis parameter + self._get_axis_number(axis) + return super(Series, self).transform(func, *args, **kwargs) + def apply(self, func, convert_dtype=True, args=(), **kwds): """ Invoke function on values of Series. Can be ufunc (a NumPy function From 650f6391d57aeb08f693f862d2908877f2021176 Mon Sep 17 00:00:00 2001 From: tp Date: Thu, 13 Sep 2018 22:14:24 +0100 Subject: [PATCH 2/4] adjusted for comments --- pandas/core/frame.py | 27 --------------------------- pandas/core/generic.py | 40 +++++++++++++++++++++++++++++++++++++--- pandas/core/series.py | 27 --------------------------- 3 files changed, 37 insertions(+), 57 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 6de808d9a73fa..251bc6587872d 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5995,33 +5995,6 @@ def _aggregate(self, arg, axis=0, *args, **kwargs): agg = aggregate - _transform_doc = dedent(""" - Examples - -------- - >>> df = pd.DataFrame(np.random.randn(10, 2), columns=['A', 'B'], - ... index=pd.date_range('1/1/2000', periods=10)) - >>> df.iloc[3:7] = np.nan - - >>> df.transform(lambda x: (x - x.mean()) / x.std()) - A B - 2000-01-01 0.579457 1.236184 - 2000-01-02 0.370357 -0.605875 - 2000-01-03 1.455756 -0.277446 - 2000-01-04 NaN NaN - 2000-01-05 NaN NaN - 2000-01-06 NaN NaN - 2000-01-07 NaN NaN - 2000-01-08 -0.498658 1.274522 - 2000-01-09 -0.540524 -1.012676 - 2000-01-10 -1.366388 -0.614710 - - See also - -------- - pandas.DataFrame.aggregate - pandas.DataFrame.apply - """) - - @Appender(_transform_doc) @Appender(_shared_docs['transform'] % _shared_doc_kwargs) def transform(self, func, axis=0, *args, **kwargs): axis = self._get_axis_number(axis) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 23f14aaa02c75..10761ca5daf0b 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -4583,9 +4583,6 @@ def pipe(self, func, *args, **kwargs): func : function, string, list of string/functions or dictionary Function to use for transforming the data. If a function, must either work when passed a %(klass)s or when passed to %(klass)s.apply. - The function (or each function in a list/dict) must return an - object with the same length for the provided axis as the - calling %(klass)s. Accepted combinations are: @@ -4606,6 +4603,43 @@ def pipe(self, func, *args, **kwargs): Raises ------ ValueError: if the returned %(klass)s has a different length than self. + + Examples + -------- + >>> df = pd.DataFrame({'A': range(10), 'B': range(10, 0, -1)}, + ... index=pd.date_range('1/1/2000', periods=10)) + >>> df.iloc[3:7] = np.nan + + >>> df.transform(lambda x: (x - x.mean()) / x.std()) + A B + 2000-01-01 -1.143001 1.143001 + 2000-01-02 -0.889001 0.889001 + 2000-01-03 -0.635001 0.635001 + 2000-01-04 NaN NaN + 2000-01-05 NaN NaN + 2000-01-06 NaN NaN + 2000-01-07 NaN NaN + 2000-01-08 0.635001 -0.635001 + 2000-01-09 0.889001 -0.889001 + 2000-01-10 1.143001 -1.143001 + + It is only required for the axis specified in the ``axis`` parameter + to have the same length for output and for self. The other axis may have a + different length: + + >>> s = pd.Series(range(5)) + >>> s.transform([np.sqrt, np.exp]) + sqrt exp + 0 0.000000 1.000000 + 1 1.000000 2.718282 + 2 1.414214 7.389056 + 3 1.732051 20.085537 + 4 2.000000 54.598150 + + See also + -------- + pandas.%(klass)s.aggregate + pandas.%(klass)s.apply """) # ---------------------------------------------------------------------- diff --git a/pandas/core/series.py b/pandas/core/series.py index b8920619cf96b..cc22be9db95da 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -3098,33 +3098,6 @@ def aggregate(self, func, axis=0, *args, **kwargs): agg = aggregate - _transform_doc = dedent(""" - Examples - -------- - >>> s = pd.Series(range(5)) - >>> s.transform(lambda x: (x - x.mean()) / x.std()) - 0 -1.264911 - 1 -0.632456 - 2 0.000000 - 3 0.632456 - 4 1.264911 - dtype: float64 - - >>> s.transform([np.sqrt, np.exp]) - sqrt exp - 0 0.000000 1.000000 - 1 1.000000 2.718282 - 2 1.414214 7.389056 - 3 1.732051 20.085537 - 4 2.000000 54.598150 - - See also - -------- - pandas.Series.aggregate - pandas.Series.apply - """) - - @Appender(_transform_doc) @Appender(generic._shared_docs['transform'] % _shared_doc_kwargs) def transform(self, func, axis=0, *args, **kwargs): # Validate the axis parameter From 4bd8490811ca12da38c3cba2e615c8dc77dcbbc1 Mon Sep 17 00:00:00 2001 From: tp Date: Sun, 16 Sep 2018 23:15:43 +0100 Subject: [PATCH 3/4] adjust for comments --- pandas/core/generic.py | 62 +++++++++++++++++------------------------- 1 file changed, 25 insertions(+), 37 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 10761ca5daf0b..69337dbc8be90 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -4545,7 +4545,7 @@ def pipe(self, func, *args, **kwargs): Parameters ---------- - func : function, string, list of string/functions or dictionary + func : function, string, list of functions and/or strings or dict Function to use for aggregating the data. If a function, must either work when passed a %(klass)s or when passed to %(klass)s.apply. @@ -4563,7 +4563,7 @@ def pipe(self, func, *args, **kwargs): Returns ------- - aggregated : %(klass)s + pandas.%(klass)s Notes ----- @@ -4573,14 +4573,14 @@ def pipe(self, func, *args, **kwargs): """) _shared_docs['transform'] = (""" - Call function producing a like-indexed %(klass)s - and return a %(klass)s with the transformed values + Call ``func`` on self producing a %(klass)s with transformed values + and that has the same axis length as self. .. versionadded:: 0.20.0 Parameters ---------- - func : function, string, list of string/functions or dictionary + func : function, string, list of functions and/or strings or dict Function to use for transforming the data. If a function, must either work when passed a %(klass)s or when passed to %(klass)s.apply. @@ -4589,7 +4589,7 @@ def pipe(self, func, *args, **kwargs): - string function name - function - list of functions and/or function names - - dict of axis labels -> functions, function names or list of such + - dict of axis labels -> functions, function names or list of such. %(axis)s *args Positional arguments to pass to `func`. @@ -4598,48 +4598,36 @@ def pipe(self, func, *args, **kwargs): Returns ------- - transformed : %(klass)s + pandas.%(klass)s + A %(klass)s that must have the same length as self. Raises ------ - ValueError: if the returned %(klass)s has a different length than self. + ValueError : if the returned %(klass)s has a different length than self. + + See Also + -------- + pandas.%(klass)s.agg : only perform aggregating type operations + pandas.%(klass)s.apply : Invoke function on a Series Examples -------- - >>> df = pd.DataFrame({'A': range(10), 'B': range(10, 0, -1)}, - ... index=pd.date_range('1/1/2000', periods=10)) - >>> df.iloc[3:7] = np.nan - - >>> df.transform(lambda x: (x - x.mean()) / x.std()) - A B - 2000-01-01 -1.143001 1.143001 - 2000-01-02 -0.889001 0.889001 - 2000-01-03 -0.635001 0.635001 - 2000-01-04 NaN NaN - 2000-01-05 NaN NaN - 2000-01-06 NaN NaN - 2000-01-07 NaN NaN - 2000-01-08 0.635001 -0.635001 - 2000-01-09 0.889001 -0.889001 - 2000-01-10 1.143001 -1.143001 - - It is only required for the axis specified in the ``axis`` parameter - to have the same length for output and for self. The other axis may have a - different length: - - >>> s = pd.Series(range(5)) + >>> df = pd.DataFrame({'A': range(3), 'B': range(1, 4)}) + >>> df.transform(lambda x: x + 1) + A B + 0 1 2 + 1 2 3 + 2 3 4 + + Even though the resulting %(klass)s must have the length as the input + %(klass)s, it is possible to provide several input functions: + + >>> s = pd.Series(range(3)) >>> s.transform([np.sqrt, np.exp]) sqrt exp 0 0.000000 1.000000 1 1.000000 2.718282 2 1.414214 7.389056 - 3 1.732051 20.085537 - 4 2.000000 54.598150 - - See also - -------- - pandas.%(klass)s.aggregate - pandas.%(klass)s.apply """) # ---------------------------------------------------------------------- From fbe270c5b29da5f7ba392d213795b4320466a54f Mon Sep 17 00:00:00 2001 From: tp Date: Mon, 17 Sep 2018 12:55:58 +0100 Subject: [PATCH 4/4] adjust for more comments --- ci/doctests.sh | 4 ++-- pandas/core/frame.py | 7 +++---- pandas/core/generic.py | 42 ++++++++++++++++++++++++++++-------------- pandas/core/series.py | 6 ++---- 4 files changed, 35 insertions(+), 24 deletions(-) diff --git a/ci/doctests.sh b/ci/doctests.sh index 2af5dbd26aeb1..654bd57107904 100755 --- a/ci/doctests.sh +++ b/ci/doctests.sh @@ -21,7 +21,7 @@ if [ "$DOCTEST" ]; then # DataFrame / Series docstrings pytest --doctest-modules -v pandas/core/frame.py \ - -k"-assign -axes -combine -isin -itertuples -join -nlargest -nsmallest -nunique -pivot_table -quantile -query -reindex -reindex_axis -replace -round -set_index -stack -to_dict -to_stata -transform" + -k"-assign -axes -combine -isin -itertuples -join -nlargest -nsmallest -nunique -pivot_table -quantile -query -reindex -reindex_axis -replace -round -set_index -stack -to_dict -to_stata" if [ $? -ne "0" ]; then RET=1 @@ -35,7 +35,7 @@ if [ "$DOCTEST" ]; then fi pytest --doctest-modules -v pandas/core/generic.py \ - -k"-_set_axis_name -_xs -describe -droplevel -groupby -interpolate -pct_change -pipe -reindex -reindex_axis -resample -sample -to_json -to_xarray -transform -transpose -values -xs" + -k"-_set_axis_name -_xs -describe -droplevel -groupby -interpolate -pct_change -pipe -reindex -reindex_axis -resample -sample -to_json -to_xarray -transpose -values -xs" if [ $? -ne "0" ]; then RET=1 diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 251bc6587872d..bb08d4fa5582b 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -109,10 +109,9 @@ _shared_doc_kwargs = dict( axes='index, columns', klass='DataFrame', axes_single_arg="{0 or 'index', 1 or 'columns'}", - axis=""" - axis : {0 or 'index', 1 or 'columns'}, default 0 - - 0 or 'index': apply function to each column. - - 1 or 'columns': apply function to each row.""", + axis="""axis : {0 or 'index', 1 or 'columns'}, default 0 + If 0 or 'index': apply function to each column. + If 1 or 'columns': apply function to each row.""", optional_by=""" by : str or list of str Name or list of names to sort by. diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 69337dbc8be90..243784ea84d43 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -4545,16 +4545,16 @@ def pipe(self, func, *args, **kwargs): Parameters ---------- - func : function, string, list of functions and/or strings or dict + func : function, str, list or dict Function to use for aggregating the data. If a function, must either work when passed a %(klass)s or when passed to %(klass)s.apply. Accepted combinations are: - - string function name - function - - list of functions and/or function names - - dict of axis labels -> functions, function names or list of such + - string function name + - list of functions and/or function names, e.g. ``[np.sum, 'mean']`` + - dict of axis labels -> functions, function names or list of such. %(axis)s *args Positional arguments to pass to `func`. @@ -4563,7 +4563,11 @@ def pipe(self, func, *args, **kwargs): Returns ------- - pandas.%(klass)s + DataFrame, Series or scalar + if DataFrame.agg is called with a single function, returns a Series + if DataFrame.agg is called with several functions, returns a DataFrame + if Series.agg is called with single function, returns a scalar + if Series.agg is called with several functions, returns a Series Notes ----- @@ -4580,15 +4584,15 @@ def pipe(self, func, *args, **kwargs): Parameters ---------- - func : function, string, list of functions and/or strings or dict + func : function, str, list or dict Function to use for transforming the data. If a function, must either work when passed a %(klass)s or when passed to %(klass)s.apply. Accepted combinations are: - - string function name - function - - list of functions and/or function names + - string function name + - list of functions and/or function names, e.g. ``[np.exp. 'sqrt']`` - dict of axis labels -> functions, function names or list of such. %(axis)s *args @@ -4598,31 +4602,41 @@ def pipe(self, func, *args, **kwargs): Returns ------- - pandas.%(klass)s + %(klass)s A %(klass)s that must have the same length as self. Raises ------ - ValueError : if the returned %(klass)s has a different length than self. + ValueError : If the returned %(klass)s has a different length than self. See Also -------- - pandas.%(klass)s.agg : only perform aggregating type operations - pandas.%(klass)s.apply : Invoke function on a Series + %(klass)s.agg : Only perform aggregating type operations. + %(klass)s.apply : Invoke function on a %(klass)s. Examples -------- >>> df = pd.DataFrame({'A': range(3), 'B': range(1, 4)}) + >>> df + A B + 0 0 1 + 1 1 2 + 2 2 3 >>> df.transform(lambda x: x + 1) A B 0 1 2 1 2 3 2 3 4 - Even though the resulting %(klass)s must have the length as the input - %(klass)s, it is possible to provide several input functions: + Even though the resulting %(klass)s must have the same length as the + input %(klass)s, it is possible to provide several input functions: >>> s = pd.Series(range(3)) + >>> s + 0 0 + 1 1 + 2 2 + dtype: int64 >>> s.transform([np.sqrt, np.exp]) sqrt exp 0 0.000000 1.000000 diff --git a/pandas/core/series.py b/pandas/core/series.py index cc22be9db95da..654ba01bc7897 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -89,10 +89,8 @@ _shared_doc_kwargs = dict( axes='index', klass='Series', axes_single_arg="{0 or 'index'}", - axis=""" - axis : {0 or 'index'} - Parameter needed for compatibility with DataFrame. - """, + axis="""axis : {0 or 'index'} + Parameter needed for compatibility with DataFrame.""", inplace="""inplace : boolean, default False If True, performs operation inplace and returns None.""", unique='np.ndarray', duplicated='Series',