From a8b2b667c41c062dcf9b4be3fa184625ce64287d Mon Sep 17 00:00:00 2001 From: anishkarki Date: Sat, 9 Nov 2024 02:11:52 +1000 Subject: [PATCH 01/11] DOC: Improve documentation for df.interpolate() methods #60227 --- pandas/core/generic.py | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 35014674565ff..00a3c5a24e8a8 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -7668,8 +7668,12 @@ def interpolate( * 'linear': Ignore the index and treat the values as equally spaced. This is the only method supported on MultiIndexes. * 'time': Works on daily and higher resolution data to interpolate - given length of interval. - * 'index', 'values': use the actual numerical values of the index. + given length of interval. This interpolates values based on + time interval between observations. + * 'index': The interpolation uses the actual numerical values + of the df's index to linearly calculate missing value. + * 'value': Interpolation based on the actual numerical values + in the DataFrame, treating them as equally spaced along the index. * 'nearest', 'zero', 'slinear', 'quadratic', 'cubic', 'barycentric', 'polynomial': Passed to `scipy.interpolate.interp1d`, whereas 'spline' is passed to @@ -7798,6 +7802,26 @@ def interpolate( 2 2.0 3.0 -3.0 9.0 3 2.0 4.0 -4.0 16.0 + Using linear and index method for linear interpolation. + >>> data = {"val": [1, np.nan, 3]} + >>> df = pd.DataFrame( + ... data, index=[0, 1, 6] + ... ) # a non-sequential index to demonstrate the difference + >>> df + val + 0 1.0 + 1 NaN + 6 3.0 + >>> df.interpolate(method="linear") + val + 0 1.0 + 1 2.0 + 6 3.0 + >>> df.interpolate(method="index") + val + 0 1.000000 + 1 1.333333 + 6 3.000000 Using polynomial interpolation. >>> df["d"].interpolate(method="polynomial", order=2) From 78b1893e43517e1fc557707c44a3a3ee5635bf5a Mon Sep 17 00:00:00 2001 From: anishkarki Date: Sat, 9 Nov 2024 10:17:02 +1000 Subject: [PATCH 02/11] DOC: Fix rounding in DataFrame.interpolate index method example --- pandas/core/generic.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 00a3c5a24e8a8..ee79316181397 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -7817,11 +7817,11 @@ def interpolate( 0 1.0 1 2.0 6 3.0 - >>> df.interpolate(method="index") - val - 0 1.000000 - 1 1.333333 - 6 3.000000 + >>> df.interpolate(method="index").round(1) + val + 0 1.0 + 1 1.3 + 6 3.0 Using polynomial interpolation. >>> df["d"].interpolate(method="polynomial", order=2) From 937fc333df974f74b01bb8eba7887e1d13d5efa8 Mon Sep 17 00:00:00 2001 From: anishkarki Date: Sat, 9 Nov 2024 10:51:06 +1000 Subject: [PATCH 03/11] Fix: Use index method for linear interpolation with non-sequential index --- pandas/core/generic.py | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index ee79316181397..d1f9c7db2e39c 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -7802,7 +7802,8 @@ def interpolate( 2 2.0 3.0 -3.0 9.0 3 2.0 4.0 -4.0 16.0 - Using linear and index method for linear interpolation. + Using index method for linear interpolation + with non-sequential index. >>> data = {"val": [1, np.nan, 3]} >>> df = pd.DataFrame( ... data, index=[0, 1, 6] @@ -7812,16 +7813,12 @@ def interpolate( 0 1.0 1 NaN 6 3.0 - >>> df.interpolate(method="linear") - val - 0 1.0 - 1 2.0 - 6 3.0 - >>> df.interpolate(method="index").round(1) - val - 0 1.0 - 1 1.3 - 6 3.0 + >>> df["val"].interpolate(method="index").round(1) + 0 1.0 + 1 1.3 + 6 3.0 + Name: val, dtype: float64 + Using polynomial interpolation. >>> df["d"].interpolate(method="polynomial", order=2) From c80d9bf13cc63a64b70108d52527eedbed8a4b63 Mon Sep 17 00:00:00 2001 From: anishkarki Date: Sat, 9 Nov 2024 11:06:33 +1000 Subject: [PATCH 04/11] Refactor: Rename DataFrame variable for non-sequential index --- pandas/core/generic.py | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index d1f9c7db2e39c..1e7b79d076639 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -7801,32 +7801,31 @@ def interpolate( 1 1.0 2.0 -2.0 5.0 2 2.0 3.0 -3.0 9.0 3 2.0 4.0 -4.0 16.0 + Using polynomial interpolation. + + >>> df["d"].interpolate(method="polynomial", order=2) + 0 1.0 + 1 4.0 + 2 9.0 + 3 16.0 + Name: d, dtype: float64 Using index method for linear interpolation with non-sequential index. - >>> data = {"val": [1, np.nan, 3]} - >>> df = pd.DataFrame( + >>> data_non_seq = {"val": [1, np.nan, 3]} + >>> df_non_seq = pd.DataFrame( ... data, index=[0, 1, 6] ... ) # a non-sequential index to demonstrate the difference - >>> df + >>> df_non_seq val 0 1.0 1 NaN 6 3.0 - >>> df["val"].interpolate(method="index").round(1) + >>> df_non_seq["val"].interpolate(method="index").round(1) 0 1.0 1 1.3 6 3.0 Name: val, dtype: float64 - - Using polynomial interpolation. - - >>> df["d"].interpolate(method="polynomial", order=2) - 0 1.0 - 1 4.0 - 2 9.0 - 3 16.0 - Name: d, dtype: float64 """ inplace = validate_bool_kwarg(inplace, "inplace") From 5da848b2511d81df6981f6b01bdfe359f377bfcf Mon Sep 17 00:00:00 2001 From: anishkarki Date: Sat, 9 Nov 2024 14:57:52 +1000 Subject: [PATCH 05/11] Refactor: Added Example for non-sequential dataframe at the end of docstring --- pandas/core/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 1e7b79d076639..5155c25789226 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -7801,6 +7801,7 @@ def interpolate( 1 1.0 2.0 -2.0 5.0 2 2.0 3.0 -3.0 9.0 3 2.0 4.0 -4.0 16.0 + Using polynomial interpolation. >>> df["d"].interpolate(method="polynomial", order=2) @@ -7809,7 +7810,6 @@ def interpolate( 2 9.0 3 16.0 Name: d, dtype: float64 - Using index method for linear interpolation with non-sequential index. >>> data_non_seq = {"val": [1, np.nan, 3]} From 62a2359481ff143b3025a088239a5439635370f7 Mon Sep 17 00:00:00 2001 From: anishkarki Date: Sat, 9 Nov 2024 15:11:56 +1000 Subject: [PATCH 06/11] CLN: Remove trailing whitespace in docstring and adding comment --- pandas/core/generic.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 5155c25789226..9ecc8f5ecc321 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -7802,7 +7802,7 @@ def interpolate( 2 2.0 3.0 -3.0 9.0 3 2.0 4.0 -4.0 16.0 - Using polynomial interpolation. + # Using polynomial interpolation. >>> df["d"].interpolate(method="polynomial", order=2) 0 1.0 @@ -7810,8 +7810,8 @@ def interpolate( 2 9.0 3 16.0 Name: d, dtype: float64 - Using index method for linear interpolation - with non-sequential index. + # Using index method for linear interpolation with + # non-sequential index. >>> data_non_seq = {"val": [1, np.nan, 3]} >>> df_non_seq = pd.DataFrame( ... data, index=[0, 1, 6] From 8cd11de2ee20a7374c89ae238fc2272d06f4d1eb Mon Sep 17 00:00:00 2001 From: anishkarki Date: Sat, 9 Nov 2024 15:24:08 +1000 Subject: [PATCH 07/11] DOC: Revert added non-sequential index example in interpolate() docstring --- pandas/core/generic.py | 18 +----------------- 1 file changed, 1 insertion(+), 17 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 9ecc8f5ecc321..a79125ffc2b0d 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -7802,7 +7802,7 @@ def interpolate( 2 2.0 3.0 -3.0 9.0 3 2.0 4.0 -4.0 16.0 - # Using polynomial interpolation. + Using polynomial interpolation. >>> df["d"].interpolate(method="polynomial", order=2) 0 1.0 @@ -7810,22 +7810,6 @@ def interpolate( 2 9.0 3 16.0 Name: d, dtype: float64 - # Using index method for linear interpolation with - # non-sequential index. - >>> data_non_seq = {"val": [1, np.nan, 3]} - >>> df_non_seq = pd.DataFrame( - ... data, index=[0, 1, 6] - ... ) # a non-sequential index to demonstrate the difference - >>> df_non_seq - val - 0 1.0 - 1 NaN - 6 3.0 - >>> df_non_seq["val"].interpolate(method="index").round(1) - 0 1.0 - 1 1.3 - 6 3.0 - Name: val, dtype: float64 """ inplace = validate_bool_kwarg(inplace, "inplace") From 122cb734afbaea005e7ee42013d89ffb4c5aa03e Mon Sep 17 00:00:00 2001 From: Anish Karki Date: Mon, 11 Nov 2024 00:07:50 +1000 Subject: [PATCH 08/11] Update pandas/core/generic.py Co-authored-by: Richard Shadrach <45562402+rhshadrach@users.noreply.github.com> --- pandas/core/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index ce0857f1c3deb..04c7c814cd384 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -7672,7 +7672,7 @@ def interpolate( time interval between observations. * 'index': The interpolation uses the actual numerical values of the df's index to linearly calculate missing value. - * 'value': Interpolation based on the actual numerical values + * 'values': Interpolation based on the numerical values in the DataFrame, treating them as equally spaced along the index. * 'nearest', 'zero', 'slinear', 'quadratic', 'cubic', 'barycentric', 'polynomial': Passed to From 92ace218f0604d3b74e96529314d3f216a7643ba Mon Sep 17 00:00:00 2001 From: Anish Karki Date: Mon, 11 Nov 2024 00:16:48 +1000 Subject: [PATCH 09/11] Update pandas/core/generic.py Co-authored-by: Richard Shadrach <45562402+rhshadrach@users.noreply.github.com> --- pandas/core/generic.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 04c7c814cd384..56031f20faa16 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -7670,8 +7670,8 @@ def interpolate( * 'time': Works on daily and higher resolution data to interpolate given length of interval. This interpolates values based on time interval between observations. - * 'index': The interpolation uses the actual numerical values - of the df's index to linearly calculate missing value. + * 'index': The interpolation uses the numerical values + of the DataFrame's index to linearly calculate missing values. * 'values': Interpolation based on the numerical values in the DataFrame, treating them as equally spaced along the index. * 'nearest', 'zero', 'slinear', 'quadratic', 'cubic', From 79d8a0323919f093d015ff8a03e4df3265b5354f Mon Sep 17 00:00:00 2001 From: anishkarki Date: Mon, 11 Nov 2024 01:28:13 +1000 Subject: [PATCH 10/11] Trigger CI From e46a947e562cceebe23eca7eff4d718760c6074b Mon Sep 17 00:00:00 2001 From: anishkarki Date: Mon, 11 Nov 2024 01:28:13 +1000 Subject: [PATCH 11/11] Trigger CI