From b39289f605f12e34a206f471e2ce015980f08dd4 Mon Sep 17 00:00:00 2001 From: Gjelt Date: Sun, 11 Mar 2018 01:02:33 +0100 Subject: [PATCH 01/12] DOC: update pd.Series/DataFrame.interpolate --- pandas/core/generic.py | 136 +++++++++++++++++++++++++++++++---------- 1 file changed, 103 insertions(+), 33 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 397726181d2fb..b77f4340d2227 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -5257,32 +5257,32 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, ---------- method : {'linear', 'time', 'index', 'values', 'nearest', 'zero', 'slinear', 'quadratic', 'cubic', 'barycentric', 'krogh', - 'polynomial', 'spline', 'piecewise_polynomial', - 'from_derivatives', 'pchip', 'akima'} - - * 'linear': ignore the index and treat the values as equally + 'polynomial', 'spline', 'piecewise_polynomial', 'pad', + 'from_derivatives', 'pchip', 'akima'}, default 'linear' + Interpolation technique to use. + * 'linear': Ignore the index and treat the values as equally spaced. This is the only method supported on MultiIndexes. - default - * 'time': interpolation works on daily and higher resolution - data to interpolate given length of interval - * 'index', 'values': use the actual numerical values of the index + Default. + * 'time': Interpolation works on daily and higher resolution + data to interpolate given length of interval. + * 'index', 'values': use the actual numerical values of the index. * 'nearest', 'zero', 'slinear', 'quadratic', 'cubic', - 'barycentric', 'polynomial' is passed to + 'barycentric', 'polynomial': Passed to ``scipy.interpolate.interp1d``. Both 'polynomial' and 'spline' require that you also specify an `order` (int), e.g. df.interpolate(method='polynomial', order=4). These use the actual numerical values of the index. - * 'krogh', 'piecewise_polynomial', 'spline', 'pchip' and 'akima' - are all wrappers around the scipy interpolation methods of + * 'krogh', 'piecewise_polynomial', 'spline', 'pchip', 'akima': + Wrappers around the scipy interpolation methods of similar names. These use the actual numerical values of the index. For more information on their behavior, see the `scipy documentation `__ and `tutorial documentation - `__ - * 'from_derivatives' refers to BPoly.from_derivatives which + `__. + * 'from_derivatives': Refers to BPoly.from_derivatives which replaces 'piecewise_polynomial' interpolation method in - scipy 0.18 + scipy 0.18. .. versionadded:: 0.18.1 @@ -5292,24 +5292,29 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, scipy < 0.18 axis : {0, 1}, default 0 - * 0: fill column-by-column - * 1: fill row-by-row - limit : int, default None. + Axis to interpolate along. + * 0: Fill column-by-column. + * 1: Fill row-by-row. + limit : int, default None Maximum number of consecutive NaNs to fill. Must be greater than 0. + inplace : bool, default False + Update the data in place if possible. limit_direction : {'forward', 'backward', 'both'}, default 'forward' + If limit is specified, consecutive NaNs will be filled in this + direction. limit_area : {'inside', 'outside'}, default None - * None: (default) no fill restriction - * 'inside' Only fill NaNs surrounded by valid values (interpolate). - * 'outside' Only fill NaNs outside valid values (extrapolate). + If limit is specified, consecutive NaNs will be filled with this + restriction. + * None: No fill restriction (default). + * 'inside': Only fill NaNs surrounded by valid values (interpolate). + * 'outside': Only fill NaNs outside valid values (extrapolate). + .. versionadded:: 0.21.0 - If limit is specified, consecutive NaNs will be filled in this - direction. - inplace : bool, default False - Update the NDFrame in place if possible. downcast : optional, 'infer' or None, defaults to None Downcast dtypes if possible. - kwargs : keyword arguments to pass on to the interpolating function. + kwargs + Keyword arguments to pass on to the interpolating function. Returns ------- @@ -5317,21 +5322,86 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, See Also -------- - reindex, replace, fillna + replace : replace a value + fillna : fill missing values Examples -------- - Filling in NaNs + Filling in NaNs in a Series via linear interpolation. - >>> s = pd.Series([0, 1, np.nan, 3]) - >>> s.interpolate() - 0 0 - 1 1 - 2 2 - 3 3 + >>> ser = pd.Series([0, 1, np.nan, 3]) + >>> ser.interpolate() + 0 0.0 + 1 1.0 + 2 2.0 + 3 3.0 dtype: float64 + Filling in NaNs in a Series by padding, but filling at most two + consecutive NaN at a time. + + >>> ser = pd.Series([np.nan, "single_one", np.nan, + ... "fill_two_more", np.nan, np.nan, np.nan, + ... 4.71, np.nan]) + >>> ser + 0 NaN + 1 single_one + 2 NaN + 3 fill_two_more + 4 NaN + 5 NaN + 6 NaN + 7 4.71 + 8 NaN + dtype: object + >>> ser.interpolate(method='pad', limit=2) + 0 NaN + 1 single_one + 2 single_one + 3 fill_two_more + 4 fill_two_more + 5 fill_two_more + 6 NaN + 7 4.71 + 8 4.71 + dtype: object + + Create a DataFrame with missing values. + + >>> df = pd.DataFrame([[0,1,2,0,4],[1,2,3,-1,8], + ... [2,3,4,-2,12],[3,4,5,-3,16]], + ... columns=['a', 'b', 'c', 'd', 'e']) + >>> df + a b c d e + 0 0 1 2 0 4 + 1 1 2 3 -1 8 + 2 2 3 4 -2 12 + 3 3 4 5 -3 16 + >>> df.loc[3,'a'] = np.nan + >>> df.loc[0,'b'] = np.nan + >>> df.loc[1,'d'] = np.nan + >>> df.loc[2,'d'] = np.nan + >>> df.loc[1,'e'] = np.nan + >>> df + a b c d e + 0 0.0 NaN 2 0.0 4.0 + 1 1.0 2.0 3 NaN NaN + 2 2.0 3.0 4 NaN 12.0 + 3 NaN 4.0 5 -3.0 16.0 + + Fill the DataFrame forward (that is, going down) along each column. + Note how the last entry in column `a` is interpolated differently + (because there is no entry after it to use for interpolation). + Note how the first entry in column `b` remains NA (because there + is no entry befofe it to use for interpolation). + + >>> df.interpolate(method='linear', limit_direction='forward', axis=0) + a b c d e + 0 0.0 NaN 2 0.0 4.0 + 1 1.0 2.0 3 -1.0 8.0 + 2 2.0 3.0 4 -2.0 12.0 + 3 2.0 4.0 5 -3.0 16.0 """ @Appender(_shared_docs['interpolate'] % _shared_doc_kwargs) From 0f5f66624642caf7f8d6c4fb18664affb71015b7 Mon Sep 17 00:00:00 2001 From: Gjelt Date: Sun, 11 Mar 2018 01:18:36 +0100 Subject: [PATCH 02/12] DOC: update pd.Series/DataFrame.interpolate, removed whitespace --- pandas/core/generic.py | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index b77f4340d2227..ec29a0edbf08b 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -5306,9 +5306,10 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, If limit is specified, consecutive NaNs will be filled with this restriction. * None: No fill restriction (default). - * 'inside': Only fill NaNs surrounded by valid values (interpolate). + * 'inside': Only fill NaNs surrounded by valid values + (interpolate). * 'outside': Only fill NaNs outside valid values (extrapolate). - + .. versionadded:: 0.21.0 downcast : optional, 'infer' or None, defaults to None @@ -5340,9 +5341,9 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, Filling in NaNs in a Series by padding, but filling at most two consecutive NaN at a time. - - >>> ser = pd.Series([np.nan, "single_one", np.nan, - ... "fill_two_more", np.nan, np.nan, np.nan, + + >>> ser = pd.Series([np.nan, "single_one", np.nan, + ... "fill_two_more", np.nan, np.nan, np.nan, ... 4.71, np.nan]) >>> ser 0 NaN @@ -5366,11 +5367,11 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, 7 4.71 8 4.71 dtype: object - + Create a DataFrame with missing values. - + >>> df = pd.DataFrame([[0,1,2,0,4],[1,2,3,-1,8], - ... [2,3,4,-2,12],[3,4,5,-3,16]], + ... [2,3,4,-2,12],[3,4,5,-3,16]], ... columns=['a', 'b', 'c', 'd', 'e']) >>> df a b c d e @@ -5389,13 +5390,13 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, 1 1.0 2.0 3 NaN NaN 2 2.0 3.0 4 NaN 12.0 3 NaN 4.0 5 -3.0 16.0 - + Fill the DataFrame forward (that is, going down) along each column. - Note how the last entry in column `a` is interpolated differently + Note how the last entry in column `a` is interpolated differently (because there is no entry after it to use for interpolation). Note how the first entry in column `b` remains NA (because there is no entry befofe it to use for interpolation). - + >>> df.interpolate(method='linear', limit_direction='forward', axis=0) a b c d e 0 0.0 NaN 2 0.0 4.0 From 5358af9b1993648bc4087fe24499f6cc72c296a2 Mon Sep 17 00:00:00 2001 From: Gjelt Date: Sun, 11 Mar 2018 02:14:34 +0100 Subject: [PATCH 03/12] DOC: update pd.Series/DataFrame.interpolate, new allowed value for input param --- pandas/core/generic.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index ec29a0edbf08b..bc103fc378e7c 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -5260,12 +5260,14 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, 'polynomial', 'spline', 'piecewise_polynomial', 'pad', 'from_derivatives', 'pchip', 'akima'}, default 'linear' Interpolation technique to use. + * 'linear': Ignore the index and treat the values as equally spaced. This is the only method supported on MultiIndexes. Default. * 'time': Interpolation works on daily and higher resolution data to interpolate given length of interval. * 'index', 'values': use the actual numerical values of the index. + * 'pad': Fill in NaNs using existing values. * 'nearest', 'zero', 'slinear', 'quadratic', 'cubic', 'barycentric', 'polynomial': Passed to ``scipy.interpolate.interp1d``. Both 'polynomial' and 'spline' @@ -5280,7 +5282,8 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, `__ and `tutorial documentation `__. - * 'from_derivatives': Refers to BPoly.from_derivatives which + * 'from_derivatives': Refers to + ``scipy.intrepolate.BPoly.from_derivatives`` which replaces 'piecewise_polynomial' interpolation method in scipy 0.18. @@ -5293,6 +5296,7 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, axis : {0, 1}, default 0 Axis to interpolate along. + * 0: Fill column-by-column. * 1: Fill row-by-row. limit : int, default None @@ -5305,6 +5309,7 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, limit_area : {'inside', 'outside'}, default None If limit is specified, consecutive NaNs will be filled with this restriction. + * None: No fill restriction (default). * 'inside': Only fill NaNs surrounded by valid values (interpolate). @@ -5319,7 +5324,8 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, Returns ------- - Series or DataFrame of same shape interpolated at the NaNs + Series or DataFrame + Same-shape object interpolated at the NaN values See Also -------- From 6cee3185ca15a2a7a41d7807cea60edc22dca3ae Mon Sep 17 00:00:00 2001 From: Gjelt Date: Mon, 12 Mar 2018 23:45:53 +0100 Subject: [PATCH 04/12] DOC: pandas.DataFrame.interpolate incorporated recommended channges --- pandas/core/generic.py | 84 +++++++++++++++++++++++++----------------- 1 file changed, 50 insertions(+), 34 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index bc103fc378e7c..b98fd8ed17085 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -5258,16 +5258,15 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, method : {'linear', 'time', 'index', 'values', 'nearest', 'zero', 'slinear', 'quadratic', 'cubic', 'barycentric', 'krogh', 'polynomial', 'spline', 'piecewise_polynomial', 'pad', - 'from_derivatives', 'pchip', 'akima'}, default 'linear' + 'from_derivatives', 'pchip', 'akima'} Interpolation technique to use. * 'linear': Ignore the index and treat the values as equally spaced. This is the only method supported on MultiIndexes. - Default. - * 'time': Interpolation works on daily and higher resolution + * 'time': Works on daily and higher resolution data to interpolate given length of interval. * 'index', 'values': use the actual numerical values of the index. - * 'pad': Fill in NaNs using existing values. + * 'pad': Fill in `NaN`s using existing values. * 'nearest', 'zero', 'slinear', 'quadratic', 'cubic', 'barycentric', 'polynomial': Passed to ``scipy.interpolate.interp1d``. Both 'polynomial' and 'spline' @@ -5275,15 +5274,10 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, e.g. df.interpolate(method='polynomial', order=4). These use the actual numerical values of the index. * 'krogh', 'piecewise_polynomial', 'spline', 'pchip', 'akima': - Wrappers around the scipy interpolation methods of - similar names. These use the actual numerical values of the - index. For more information on their behavior, see the - `scipy documentation - `__ - and `tutorial documentation - `__. + Wrappers around the scipy interpolation methods of similar + names. See `Notes`. * 'from_derivatives': Refers to - ``scipy.intrepolate.BPoly.from_derivatives`` which + ``scipy.interpolate.BPoly.from_derivatives`` which replaces 'piecewise_polynomial' interpolation method in scipy 0.18. @@ -5294,32 +5288,30 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, 'piecewise_polynomial' in scipy 0.18; backwards-compatible with scipy < 0.18 - axis : {0, 1}, default 0 + axis : {0 or 'index', 1 or 'columns', None}, default None Axis to interpolate along. - - * 0: Fill column-by-column. - * 1: Fill row-by-row. - limit : int, default None - Maximum number of consecutive NaNs to fill. Must be greater than 0. + limit : int, optional + Maximum number of consecutive `NaN`s to fill. Must be greater than + 0. inplace : bool, default False Update the data in place if possible. limit_direction : {'forward', 'backward', 'both'}, default 'forward' - If limit is specified, consecutive NaNs will be filled in this + If limit is specified, consecutive `NaN`s will be filled in this direction. - limit_area : {'inside', 'outside'}, default None - If limit is specified, consecutive NaNs will be filled with this + limit_area : {`None`, 'inside', 'outside'} + If limit is specified, consecutive `NaN`s will be filled with this restriction. * None: No fill restriction (default). - * 'inside': Only fill NaNs surrounded by valid values + * 'inside': Only fill `NaN`s surrounded by valid values (interpolate). - * 'outside': Only fill NaNs outside valid values (extrapolate). + * 'outside': Only fill `NaN`s outside valid values (extrapolate). .. versionadded:: 0.21.0 downcast : optional, 'infer' or None, defaults to None Downcast dtypes if possible. - kwargs + **kwargs Keyword arguments to pass on to the interpolating function. Returns @@ -5331,27 +5323,50 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, -------- replace : replace a value fillna : fill missing values + scipy.interpolate.Akima1DInterpolator : piecewise cubic polynomials + (Akima interpolator) + scipy.interpolate.BPoly.from_derivatives : piecewise polynomial in the + Bernstein basis + scipy.interpolate.interp1d : interpolate a 1-D function + scipy.interpolate.KroghInterpolator : interpolate polynomial (Krogh + interpolator) + scipy.interpolate.PchipInterpolator : PCHIP 1-d monotonic cubic + interpolation + scipy.interpolate.CubicSpline : cubic spline data interpolator + + Notes + ----- + If the selected `method` is one of 'krogh', 'piecewise_polynomial', + 'spline', 'pchip', 'akima': + They are wrappers around the scipy interpolation methods of similar + names. These use the actual numerical values of the index. + For more information on their behavior, see the + `scipy documentation + `__ + and `tutorial documentation + `__. Examples -------- - Filling in NaNs in a Series via linear interpolation. + Filling in `NaNs` in a :class:`~pandas.Series` via linear + interpolation. - >>> ser = pd.Series([0, 1, np.nan, 3]) - >>> ser.interpolate() + >>> s = pd.Series([0, 1, np.nan, 3]) + >>> s.interpolate() 0 0.0 1 1.0 2 2.0 3 3.0 dtype: float64 - Filling in NaNs in a Series by padding, but filling at most two - consecutive NaN at a time. + Filling in `NaN`s in a Series by padding, but filling at most two + consecutive `NaN` at a time. - >>> ser = pd.Series([np.nan, "single_one", np.nan, + >>> s = pd.Series([np.nan, "single_one", np.nan, ... "fill_two_more", np.nan, np.nan, np.nan, ... 4.71, np.nan]) - >>> ser + >>> s 0 NaN 1 single_one 2 NaN @@ -5362,7 +5377,7 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, 7 4.71 8 NaN dtype: object - >>> ser.interpolate(method='pad', limit=2) + >>> s.interpolate(method='pad', limit=2) 0 NaN 1 single_one 2 single_one @@ -5374,7 +5389,8 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, 8 4.71 dtype: object - Create a DataFrame with missing values. + Create a :class:`~pandas.DataFrame` with missing values to fill it + with diffferent methods. >>> df = pd.DataFrame([[0,1,2,0,4],[1,2,3,-1,8], ... [2,3,4,-2,12],[3,4,5,-3,16]], @@ -5400,7 +5416,7 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, Fill the DataFrame forward (that is, going down) along each column. Note how the last entry in column `a` is interpolated differently (because there is no entry after it to use for interpolation). - Note how the first entry in column `b` remains NA (because there + Note how the first entry in column `b` remains `NaN` (because there is no entry befofe it to use for interpolation). >>> df.interpolate(method='linear', limit_direction='forward', axis=0) From ba8cfd2e1ceca150ad327b3be5073763472643f6 Mon Sep 17 00:00:00 2001 From: Gjelt Date: Tue, 13 Mar 2018 00:15:57 +0100 Subject: [PATCH 05/12] DOC: pandas.DataFrame.interpolnew examples --- pandas/core/generic.py | 36 +++++++++++++++++++++++++++--------- 1 file changed, 27 insertions(+), 9 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index b98fd8ed17085..08303c59014ea 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -5266,8 +5266,8 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, * 'time': Works on daily and higher resolution data to interpolate given length of interval. * 'index', 'values': use the actual numerical values of the index. - * 'pad': Fill in `NaN`s using existing values. - * 'nearest', 'zero', 'slinear', 'quadratic', 'cubic', + * 'pad': Fill in NaNs using existing values. + * 'nearest', 'zero', 'slinear', 'quadratic', 'cubic', 'spline', 'barycentric', 'polynomial': Passed to ``scipy.interpolate.interp1d``. Both 'polynomial' and 'spline' require that you also specify an `order` (int), @@ -5291,21 +5291,21 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, axis : {0 or 'index', 1 or 'columns', None}, default None Axis to interpolate along. limit : int, optional - Maximum number of consecutive `NaN`s to fill. Must be greater than + Maximum number of consecutive NaNs to fill. Must be greater than 0. inplace : bool, default False Update the data in place if possible. limit_direction : {'forward', 'backward', 'both'}, default 'forward' - If limit is specified, consecutive `NaN`s will be filled in this + If limit is specified, consecutive NaNs will be filled in this direction. limit_area : {`None`, 'inside', 'outside'} - If limit is specified, consecutive `NaN`s will be filled with this + If limit is specified, consecutive NaNs will be filled with this restriction. * None: No fill restriction (default). - * 'inside': Only fill `NaN`s surrounded by valid values + * 'inside': Only fill NaNs surrounded by valid values (interpolate). - * 'outside': Only fill `NaN`s outside valid values (extrapolate). + * 'outside': Only fill NaNs outside valid values (extrapolate). .. versionadded:: 0.21.0 @@ -5349,7 +5349,7 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, Examples -------- - Filling in `NaNs` in a :class:`~pandas.Series` via linear + Filling in `NaN` in a :class:`~pandas.Series` via linear interpolation. >>> s = pd.Series([0, 1, np.nan, 3]) @@ -5360,7 +5360,7 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, 3 3.0 dtype: float64 - Filling in `NaN`s in a Series by padding, but filling at most two + Filling in `NaN` in a Series by padding, but filling at most two consecutive `NaN` at a time. >>> s = pd.Series([np.nan, "single_one", np.nan, @@ -5389,6 +5389,24 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, 8 4.71 dtype: object + Filling in `NaN` in a Series via polynomial interpolation or splines: + Both `polynomial` and `spline` methods require that you also specify + an `order` (int). + + >>> s = pd.Series([0, 2, np.nan, 8]) + >>> s.interpolate(method='polynomial', order=1) + 0 0.0 + 1 2.0 + 2 5.0 + 3 8.0 + dtype: float64 + >>> s.interpolate(method='polynomial', order=2) + 0 0.000000 + 1 2.000000 + 2 4.666667 + 3 8.000000 + dtype: float64 + Create a :class:`~pandas.DataFrame` with missing values to fill it with diffferent methods. From 272d5e22dbb5350d21e7e57c7611f951acd0ead7 Mon Sep 17 00:00:00 2001 From: Gjelt Date: Tue, 13 Mar 2018 00:22:36 +0100 Subject: [PATCH 06/12] DOC: pandas.DataFrame.interpol example update --- pandas/core/generic.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 08303c59014ea..cf06f90fc3a1d 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -5419,6 +5419,7 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, 1 1 2 3 -1 8 2 2 3 4 -2 12 3 3 4 5 -3 16 + >>> df.loc[1,'a'] = np.nan >>> df.loc[3,'a'] = np.nan >>> df.loc[0,'b'] = np.nan >>> df.loc[1,'d'] = np.nan @@ -5427,7 +5428,7 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, >>> df a b c d e 0 0.0 NaN 2 0.0 4.0 - 1 1.0 2.0 3 NaN NaN + 1 NaN 2.0 3 NaN NaN 2 2.0 3.0 4 NaN 12.0 3 NaN 4.0 5 -3.0 16.0 From d64f5f38aa940e484c441f7fadaadfa14ba0838f Mon Sep 17 00:00:00 2001 From: Gjelt Date: Thu, 15 Mar 2018 00:10:26 +0100 Subject: [PATCH 07/12] DOC> pandas.DataFrame.interpolate - implemented feedback --- pandas/core/generic.py | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index cf06f90fc3a1d..79b1cf0d69b7d 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -5274,7 +5274,7 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, e.g. df.interpolate(method='polynomial', order=4). These use the actual numerical values of the index. * 'krogh', 'piecewise_polynomial', 'spline', 'pchip', 'akima': - Wrappers around the scipy interpolation methods of similar + Wrappers around the SciPy interpolation methods of similar names. See `Notes`. * 'from_derivatives': Refers to ``scipy.interpolate.BPoly.from_derivatives`` which @@ -5302,7 +5302,7 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, If limit is specified, consecutive NaNs will be filled with this restriction. - * None: No fill restriction (default). + * None: No fill restriction. * 'inside': Only fill NaNs surrounded by valid values (interpolate). * 'outside': Only fill NaNs outside valid values (extrapolate). @@ -5336,14 +5336,13 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, Notes ----- - If the selected `method` is one of 'krogh', 'piecewise_polynomial', - 'spline', 'pchip', 'akima': - They are wrappers around the scipy interpolation methods of similar - names. These use the actual numerical values of the index. + The 'krogh', 'piecewise_polynomial', 'spline', 'pchip' and 'akima' + methods are wrappers around the respective SciPy implementations of + similar names. These use the actual numerical values of the index. For more information on their behavior, see the - `scipy documentation + `SciPy documentation `__ - and `tutorial documentation + and `SciPy tutorial `__. Examples @@ -5406,10 +5405,7 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, 2 4.666667 3 8.000000 dtype: float64 - - Create a :class:`~pandas.DataFrame` with missing values to fill it - with diffferent methods. - + >>> df = pd.DataFrame([[0,1,2,0,4],[1,2,3,-1,8], ... [2,3,4,-2,12],[3,4,5,-3,16]], ... columns=['a', 'b', 'c', 'd', 'e']) From 0734c3ba3f95347cf12cd765bad386223ccc1eca Mon Sep 17 00:00:00 2001 From: Gjelt Date: Thu, 15 Mar 2018 00:24:23 +0100 Subject: [PATCH 08/12] DOC: pandas.DataFrame.interpolate - implemented feedback --- pandas/core/generic.py | 80 +++++++++++++++++++++++------------------- 1 file changed, 43 insertions(+), 37 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 79b1cf0d69b7d..7ec1c936ad225 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -5272,7 +5272,7 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, ``scipy.interpolate.interp1d``. Both 'polynomial' and 'spline' require that you also specify an `order` (int), e.g. df.interpolate(method='polynomial', order=4). - These use the actual numerical values of the index. + These use the numerical values of the index. * 'krogh', 'piecewise_polynomial', 'spline', 'pchip', 'akima': Wrappers around the SciPy interpolation methods of similar names. See `Notes`. @@ -5285,8 +5285,8 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, Added support for the 'akima' method Added interpolate method 'from_derivatives' which replaces - 'piecewise_polynomial' in scipy 0.18; backwards-compatible with - scipy < 0.18 + 'piecewise_polynomial' in SciPy 0.18; backwards-compatible with + SciPy < 0.18 axis : {0 or 'index', 1 or 'columns', None}, default None Axis to interpolate along. @@ -5317,7 +5317,8 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, Returns ------- Series or DataFrame - Same-shape object interpolated at the NaN values + Returns the same object type as the caller, interpolated at + some or all `NaN` values See Also -------- @@ -5363,8 +5364,8 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, consecutive `NaN` at a time. >>> s = pd.Series([np.nan, "single_one", np.nan, - ... "fill_two_more", np.nan, np.nan, np.nan, - ... 4.71, np.nan]) + ... "fill_two_more", np.nan, np.nan, np.nan, + ... 4.71, np.nan]) >>> s 0 NaN 1 single_one @@ -5393,53 +5394,58 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, an `order` (int). >>> s = pd.Series([0, 2, np.nan, 8]) - >>> s.interpolate(method='polynomial', order=1) - 0 0.0 - 1 2.0 - 2 5.0 - 3 8.0 - dtype: float64 >>> s.interpolate(method='polynomial', order=2) 0 0.000000 1 2.000000 2 4.666667 3 8.000000 dtype: float64 - - >>> df = pd.DataFrame([[0,1,2,0,4],[1,2,3,-1,8], - ... [2,3,4,-2,12],[3,4,5,-3,16]], - ... columns=['a', 'b', 'c', 'd', 'e']) + + Filling in `NaN` in a :class:`~pandas.DataFrame` via linear + interpolation. + + >>> df = pd.DataFrame({'a': range(0,4), + ... 'b': range(1,5), + ... 'c': range(-1, -5, -1), + ... 'd': [x**2 for x in range(1,5)]}) >>> df - a b c d e - 0 0 1 2 0 4 - 1 1 2 3 -1 8 - 2 2 3 4 -2 12 - 3 3 4 5 -3 16 + a b c d + 0 0 1 -1 1 + 1 1 2 -2 4 + 2 2 3 -3 9 + 3 3 4 -4 16 >>> df.loc[1,'a'] = np.nan >>> df.loc[3,'a'] = np.nan >>> df.loc[0,'b'] = np.nan + >>> df.loc[1,'c'] = np.nan + >>> df.loc[2,'c'] = np.nan >>> df.loc[1,'d'] = np.nan - >>> df.loc[2,'d'] = np.nan - >>> df.loc[1,'e'] = np.nan >>> df - a b c d e - 0 0.0 NaN 2 0.0 4.0 - 1 NaN 2.0 3 NaN NaN - 2 2.0 3.0 4 NaN 12.0 - 3 NaN 4.0 5 -3.0 16.0 + a b c d + 0 0.0 NaN -1.0 1.0 + 1 NaN 2.0 NaN NaN + 2 2.0 3.0 NaN 9.0 + 3 NaN 4.0 -4.0 16.0 Fill the DataFrame forward (that is, going down) along each column. - Note how the last entry in column `a` is interpolated differently - (because there is no entry after it to use for interpolation). - Note how the first entry in column `b` remains `NaN` (because there - is no entry befofe it to use for interpolation). + Note how the last entry in column `a` is interpolated differently, + because there is no entry after it to use for interpolation. + Note how the first entry in column `b` remains `NaN`, because there + is no entry befofe it to use for interpolation. >>> df.interpolate(method='linear', limit_direction='forward', axis=0) - a b c d e - 0 0.0 NaN 2 0.0 4.0 - 1 1.0 2.0 3 -1.0 8.0 - 2 2.0 3.0 4 -2.0 12.0 - 3 2.0 4.0 5 -3.0 16.0 + a b c d + 0 0.0 NaN -1.0 1.0 + 1 1.0 2.0 -2.0 5.0 + 2 2.0 3.0 -3.0 9.0 + 3 2.0 4.0 -4.0 16.0 + + >>> df['d'].interpolate(method='polynomial', order=2) + 0 1.0 + 1 4.0 + 2 9.0 + 3 16.0 + Name: d, dtype: float64 """ @Appender(_shared_docs['interpolate'] % _shared_doc_kwargs) From 1eab0a881b1b29a7555a426f50ac3511da450f9f Mon Sep 17 00:00:00 2001 From: Gjelt Date: Thu, 15 Mar 2018 00:28:22 +0100 Subject: [PATCH 09/12] DOC: pandas.DataFrame.interpolate - sneaky whitespace --- pandas/core/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 7ec1c936ad225..f4b31af8644fb 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -5445,7 +5445,7 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, 1 4.0 2 9.0 3 16.0 - Name: d, dtype: float64 + Name: d, dtype: float64 """ @Appender(_shared_docs['interpolate'] % _shared_doc_kwargs) From 3ca95ec92494a3f0bebfb56bfa2f0a11c55a0798 Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Sun, 19 Aug 2018 00:21:08 +0100 Subject: [PATCH 10/12] Minor fixes to interpolate docstring --- pandas/core/generic.py | 60 +++++++++++++++++++----------------------- 1 file changed, 27 insertions(+), 33 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index f4b31af8644fb..50dd47e8219f7 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -5251,7 +5251,7 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, _shared_docs['interpolate'] = """ Please note that only ``method='linear'`` is supported for - DataFrames/Series with a MultiIndex. + DataFrame/Series with a MultiIndex. Parameters ---------- @@ -5263,15 +5263,15 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, * 'linear': Ignore the index and treat the values as equally spaced. This is the only method supported on MultiIndexes. - * 'time': Works on daily and higher resolution - data to interpolate given length of interval. + * 'time': Works on daily and higher resolution data to interpolate + given length of interval. * 'index', 'values': use the actual numerical values of the index. * 'pad': Fill in NaNs using existing values. * 'nearest', 'zero', 'slinear', 'quadratic', 'cubic', 'spline', 'barycentric', 'polynomial': Passed to ``scipy.interpolate.interp1d``. Both 'polynomial' and 'spline' require that you also specify an `order` (int), - e.g. df.interpolate(method='polynomial', order=4). + e.g. ``df.interpolate(method='polynomial', order=4)``. These use the numerical values of the index. * 'krogh', 'piecewise_polynomial', 'spline', 'pchip', 'akima': Wrappers around the SciPy interpolation methods of similar @@ -5298,7 +5298,7 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, limit_direction : {'forward', 'backward', 'both'}, default 'forward' If limit is specified, consecutive NaNs will be filled in this direction. - limit_area : {`None`, 'inside', 'outside'} + limit_area : {`None`, 'inside', 'outside'}, default None If limit is specified, consecutive NaNs will be filled with this restriction. @@ -5322,18 +5322,18 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, See Also -------- - replace : replace a value - fillna : fill missing values - scipy.interpolate.Akima1DInterpolator : piecewise cubic polynomials - (Akima interpolator) - scipy.interpolate.BPoly.from_derivatives : piecewise polynomial in the - Bernstein basis - scipy.interpolate.interp1d : interpolate a 1-D function - scipy.interpolate.KroghInterpolator : interpolate polynomial (Krogh - interpolator) + Series.fillna : Fill missing values using different methods. + DataFrame.fillna : Fill missing values using different methods. + scipy.interpolate.Akima1DInterpolator : Piecewise cubic polynomials + (Akima interpolator). + scipy.interpolate.BPoly.from_derivatives : Piecewise polynomial in the + Bernstein basis. + scipy.interpolate.interp1d : Interpolate a 1-D function. + scipy.interpolate.KroghInterpolator : Interpolate polynomial (Krogh + interpolator). scipy.interpolate.PchipInterpolator : PCHIP 1-d monotonic cubic - interpolation - scipy.interpolate.CubicSpline : cubic spline data interpolator + interpolation. + scipy.interpolate.CubicSpline : Cubic spline data interpolator. Notes ----- @@ -5348,11 +5348,16 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, Examples -------- - Filling in `NaN` in a :class:`~pandas.Series` via linear interpolation. >>> s = pd.Series([0, 1, np.nan, 3]) + >>> s + 0 0.0 + 1 1.0 + 2 NaN + 3 3.0 + dtype: float64 >>> s.interpolate() 0 0.0 1 1.0 @@ -5404,22 +5409,11 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, Filling in `NaN` in a :class:`~pandas.DataFrame` via linear interpolation. - >>> df = pd.DataFrame({'a': range(0,4), - ... 'b': range(1,5), - ... 'c': range(-1, -5, -1), - ... 'd': [x**2 for x in range(1,5)]}) - >>> df - a b c d - 0 0 1 -1 1 - 1 1 2 -2 4 - 2 2 3 -3 9 - 3 3 4 -4 16 - >>> df.loc[1,'a'] = np.nan - >>> df.loc[3,'a'] = np.nan - >>> df.loc[0,'b'] = np.nan - >>> df.loc[1,'c'] = np.nan - >>> df.loc[2,'c'] = np.nan - >>> df.loc[1,'d'] = np.nan + >>> df = pd.DataFrame([(0.0, np.nan, -1.0, 1.0), + ... (np.nan, 2.0, np.nan, np.nan), + ... (2.0, 3.0, np.nan, 9.0), + ... (np.nan, 4.0, -4.0, 16.0)], + ... columns=list('abcd')) >>> df a b c d 0 0.0 NaN -1.0 1.0 From 3af4306c8c0d8a7977417131ea6e1b75ecd3b1ba Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Sun, 19 Aug 2018 00:46:57 +0100 Subject: [PATCH 11/12] Last minor fixes --- pandas/core/generic.py | 28 ++++++++++++---------------- 1 file changed, 12 insertions(+), 16 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 4a8a69ccd4cd2..0cdb7e949b44d 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -6085,11 +6085,8 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, Parameters ---------- - method : {'linear', 'time', 'index', 'values', 'nearest', 'zero', - 'slinear', 'quadratic', 'cubic', 'barycentric', 'krogh', - 'polynomial', 'spline', 'piecewise_polynomial', 'pad', - 'from_derivatives', 'pchip', 'akima'} - Interpolation technique to use. + method : str, default 'linear' + Interpolation technique to use. One of: * 'linear': Ignore the index and treat the values as equally spaced. This is the only method supported on MultiIndexes. @@ -6152,8 +6149,7 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, See Also -------- - Series.fillna : Fill missing values using different methods. - DataFrame.fillna : Fill missing values using different methods. + fillna : Fill missing values using different methods. scipy.interpolate.Akima1DInterpolator : Piecewise cubic polynomials (Akima interpolator). scipy.interpolate.BPoly.from_derivatives : Piecewise polynomial in the @@ -6236,8 +6232,13 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, 3 8.000000 dtype: float64 - Filling in `NaN` in a :class:`~pandas.DataFrame` via linear - interpolation. + Fill the DataFrame forward (that is, going down) along each column + using linear interpolation. + + Note how the last entry in column `a` is interpolated differently, + because there is no entry after it to use for interpolation. + Note how the first entry in column `b` remains `NaN`, because there + is no entry befofe it to use for interpolation. >>> df = pd.DataFrame([(0.0, np.nan, -1.0, 1.0), ... (np.nan, 2.0, np.nan, np.nan), @@ -6250,13 +6251,6 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, 1 NaN 2.0 NaN NaN 2 2.0 3.0 NaN 9.0 3 NaN 4.0 -4.0 16.0 - - Fill the DataFrame forward (that is, going down) along each column. - Note how the last entry in column `a` is interpolated differently, - because there is no entry after it to use for interpolation. - Note how the first entry in column `b` remains `NaN`, because there - is no entry befofe it to use for interpolation. - >>> df.interpolate(method='linear', limit_direction='forward', axis=0) a b c d 0 0.0 NaN -1.0 1.0 @@ -6264,6 +6258,8 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, 2 2.0 3.0 -3.0 9.0 3 2.0 4.0 -4.0 16.0 + Using polynomial interpolation. + >>> df['d'].interpolate(method='polynomial', order=2) 0 1.0 1 4.0 From 3f00e93f25b477e6005acdf29c46b0d62bace19b Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Sun, 19 Aug 2018 01:25:21 +0100 Subject: [PATCH 12/12] Fixing quotes and backticks --- pandas/core/generic.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 0cdb7e949b44d..9bdf34113ccf0 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -6096,7 +6096,7 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, * 'pad': Fill in NaNs using existing values. * 'nearest', 'zero', 'slinear', 'quadratic', 'cubic', 'spline', 'barycentric', 'polynomial': Passed to - ``scipy.interpolate.interp1d``. Both 'polynomial' and 'spline' + `scipy.interpolate.interp1d`. Both 'polynomial' and 'spline' require that you also specify an `order` (int), e.g. ``df.interpolate(method='polynomial', order=4)``. These use the numerical values of the index. @@ -6104,7 +6104,7 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, Wrappers around the SciPy interpolation methods of similar names. See `Notes`. * 'from_derivatives': Refers to - ``scipy.interpolate.BPoly.from_derivatives`` which + `scipy.interpolate.BPoly.from_derivatives` which replaces 'piecewise_polynomial' interpolation method in scipy 0.18. @@ -6129,7 +6129,7 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, If limit is specified, consecutive NaNs will be filled with this restriction. - * None: No fill restriction. + * ``None``: No fill restriction. * 'inside': Only fill NaNs surrounded by valid values (interpolate). * 'outside': Only fill NaNs outside valid values (extrapolate). @@ -6145,7 +6145,7 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, ------- Series or DataFrame Returns the same object type as the caller, interpolated at - some or all `NaN` values + some or all ``NaN`` values See Also -------- @@ -6174,7 +6174,7 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, Examples -------- - Filling in `NaN` in a :class:`~pandas.Series` via linear + Filling in ``NaN`` in a :class:`~pandas.Series` via linear interpolation. >>> s = pd.Series([0, 1, np.nan, 3]) @@ -6191,8 +6191,8 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, 3 3.0 dtype: float64 - Filling in `NaN` in a Series by padding, but filling at most two - consecutive `NaN` at a time. + Filling in ``NaN`` in a Series by padding, but filling at most two + consecutive ``NaN`` at a time. >>> s = pd.Series([np.nan, "single_one", np.nan, ... "fill_two_more", np.nan, np.nan, np.nan, @@ -6220,9 +6220,9 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, 8 4.71 dtype: object - Filling in `NaN` in a Series via polynomial interpolation or splines: - Both `polynomial` and `spline` methods require that you also specify - an `order` (int). + Filling in ``NaN`` in a Series via polynomial interpolation or splines: + Both 'polynomial' and 'spline' methods require that you also specify + an ``order`` (int). >>> s = pd.Series([0, 2, np.nan, 8]) >>> s.interpolate(method='polynomial', order=2) @@ -6235,9 +6235,9 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, Fill the DataFrame forward (that is, going down) along each column using linear interpolation. - Note how the last entry in column `a` is interpolated differently, + Note how the last entry in column 'a' is interpolated differently, because there is no entry after it to use for interpolation. - Note how the first entry in column `b` remains `NaN`, because there + Note how the first entry in column 'b' remains ``NaN``, because there is no entry befofe it to use for interpolation. >>> df = pd.DataFrame([(0.0, np.nan, -1.0, 1.0),