From 82c854eef74e264b38ae8bfbc6b0e591b98a6ecd Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Wed, 26 Mar 2025 12:39:41 -0400 Subject: [PATCH 1/4] REGR: Interpolate with method=index --- pandas/core/missing.py | 11 +------- pandas/tests/resample/test_base.py | 26 ++++++++++--------- pandas/tests/resample/test_time_grouper.py | 2 +- .../tests/series/methods/test_interpolate.py | 2 +- 4 files changed, 17 insertions(+), 24 deletions(-) diff --git a/pandas/core/missing.py b/pandas/core/missing.py index ff2daae002731..01ace80198f5d 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -314,16 +314,7 @@ def get_interp_index(method, index: Index) -> Index: # prior default from pandas import Index - if isinstance(index.dtype, DatetimeTZDtype) or lib.is_np_dtype( - index.dtype, "mM" - ): - # Convert datetime-like indexes to int64 - index = Index(index.view("i8")) - - elif not is_numeric_dtype(index.dtype): - # We keep behavior consistent with prior versions of pandas for - # non-numeric, non-datetime indexes - index = Index(range(len(index))) + index = Index(np.arange(len(index))) else: methods = {"index", "values", "nearest", "time"} is_numeric_or_datetime = ( diff --git a/pandas/tests/resample/test_base.py b/pandas/tests/resample/test_base.py index eb4ba6a3fdf71..fdcd9c8920dab 100644 --- a/pandas/tests/resample/test_base.py +++ b/pandas/tests/resample/test_base.py @@ -123,20 +123,22 @@ def test_resample_interpolate_regular_sampling_off_grid( ser = Series(np.arange(5.0), index) method = all_1d_no_arg_interpolation_methods - # Resample to 1 hour sampling and interpolate with the given method - ser_resampled = ser.resample("1h").interpolate(method) + result = ser.resample("1h").interpolate(method) - # Check that none of the resampled values are NaN, except the first one - # which lies 1 minute before the first actual data point - assert np.isnan(ser_resampled.iloc[0]) - assert not ser_resampled.iloc[1:].isna().any() - - if method not in ["nearest", "zero"]: - # Check that the resampled values are close to the expected values - # except for methods with known inaccuracies - assert np.all( - np.isclose(ser_resampled.values[1:], np.arange(0.5, 4.5, 0.5), rtol=1.0e-1) + if method == "linear": + values = np.repeat(np.arange(0.0, 4.0), 2) + np.tile( + np.arange(1 / 3, 0.7, 1 / 3), 4 ) + elif method == "nearest": + values = np.repeat(np.arange(0.0, 5.0), 2)[1:-1] + elif method == "zero": + values = np.repeat(np.arange(0.0, 4.0), 2) + else: + values = 0.491667 + np.arange(0.0, 4.0, 0.5) + values = np.insert(values, 0, np.nan) + index = date_range("2000-01-01 00:00:00", periods=9, freq="1h") + expected = Series(values, index=index) + tm.assert_series_equal(result, expected) def test_resample_interpolate_irregular_sampling(all_1d_no_arg_interpolation_methods): diff --git a/pandas/tests/resample/test_time_grouper.py b/pandas/tests/resample/test_time_grouper.py index 3cc95922e7f2f..8eda3742f4ba9 100644 --- a/pandas/tests/resample/test_time_grouper.py +++ b/pandas/tests/resample/test_time_grouper.py @@ -433,7 +433,7 @@ def test_groupby_resample_interpolate_with_apply_syntax_off_grid(groupy_test_df) data={ "price": [ 10.0, - 9.21131, + 9.5, 11.0, ] }, diff --git a/pandas/tests/series/methods/test_interpolate.py b/pandas/tests/series/methods/test_interpolate.py index ff7f8d0b7fa72..f8ceb67b34af2 100644 --- a/pandas/tests/series/methods/test_interpolate.py +++ b/pandas/tests/series/methods/test_interpolate.py @@ -270,7 +270,7 @@ def test_nan_interpolate(self, kwargs): def test_nan_irregular_index(self): s = Series([1, 2, np.nan, 4], index=[1, 3, 5, 9]) result = s.interpolate() - expected = Series([1.0, 2.0, 2.6666666666666665, 4.0], index=[1, 3, 5, 9]) + expected = Series([1.0, 2.0, 3.0, 4.0], index=[1, 3, 5, 9]) tm.assert_series_equal(result, expected) def test_nan_str_index(self): From c63708fbe4510e8068920d0a609887a25703864a Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Wed, 26 Mar 2025 12:41:50 -0400 Subject: [PATCH 2/4] Magic trailing comma --- pandas/tests/resample/test_time_grouper.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/pandas/tests/resample/test_time_grouper.py b/pandas/tests/resample/test_time_grouper.py index 8eda3742f4ba9..e6cfa12f5f61a 100644 --- a/pandas/tests/resample/test_time_grouper.py +++ b/pandas/tests/resample/test_time_grouper.py @@ -430,13 +430,7 @@ def test_groupby_resample_interpolate_with_apply_syntax_off_grid(groupy_test_df) ) expected = DataFrame( - data={ - "price": [ - 10.0, - 9.5, - 11.0, - ] - }, + data={"price": [10.0, 9.5, 11.0]}, index=expected_ind, ) tm.assert_frame_equal(result, expected, check_names=False) From ec08504c35fce0e191026bc50961ac17db38bf44 Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Wed, 26 Mar 2025 12:48:29 -0400 Subject: [PATCH 3/4] perf --- pandas/core/missing.py | 4 ++-- pandas/tests/resample/test_base.py | 4 +--- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/pandas/core/missing.py b/pandas/core/missing.py index 01ace80198f5d..e2fb3b9a6fc0b 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -312,9 +312,9 @@ def get_interp_index(method, index: Index) -> Index: # create/use the index if method == "linear": # prior default - from pandas import Index + from pandas import RangeIndex - index = Index(np.arange(len(index))) + index = RangeIndex(len(index)) else: methods = {"index", "values", "nearest", "time"} is_numeric_or_datetime = ( diff --git a/pandas/tests/resample/test_base.py b/pandas/tests/resample/test_base.py index fdcd9c8920dab..d9bd89af61aaf 100644 --- a/pandas/tests/resample/test_base.py +++ b/pandas/tests/resample/test_base.py @@ -126,9 +126,7 @@ def test_resample_interpolate_regular_sampling_off_grid( result = ser.resample("1h").interpolate(method) if method == "linear": - values = np.repeat(np.arange(0.0, 4.0), 2) + np.tile( - np.arange(1 / 3, 0.7, 1 / 3), 4 - ) + values = np.repeat(np.arange(0.0, 4.0), 2) + np.tile([1 / 3, 2 / 3], 4) elif method == "nearest": values = np.repeat(np.arange(0.0, 5.0), 2)[1:-1] elif method == "zero": From fbddfebe069573b3f94d2ad68fc99225f5d029e8 Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Sat, 29 Mar 2025 08:49:57 -0400 Subject: [PATCH 4/4] Update docstring --- pandas/core/resample.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 1d27687d15af0..753f7fb6cea1a 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -897,17 +897,17 @@ def interpolate( to non-aligned timestamps, as in the following example: >>> series.resample("400ms").interpolate("linear") - 2023-03-01 07:00:00.000 1.0 - 2023-03-01 07:00:00.400 0.2 - 2023-03-01 07:00:00.800 -0.6 - 2023-03-01 07:00:01.200 -0.4 - 2023-03-01 07:00:01.600 0.8 - 2023-03-01 07:00:02.000 2.0 - 2023-03-01 07:00:02.400 1.6 - 2023-03-01 07:00:02.800 1.2 - 2023-03-01 07:00:03.200 1.4 - 2023-03-01 07:00:03.600 2.2 - 2023-03-01 07:00:04.000 3.0 + 2023-03-01 07:00:00.000 1.000000 + 2023-03-01 07:00:00.400 0.333333 + 2023-03-01 07:00:00.800 -0.333333 + 2023-03-01 07:00:01.200 0.000000 + 2023-03-01 07:00:01.600 1.000000 + 2023-03-01 07:00:02.000 2.000000 + 2023-03-01 07:00:02.400 1.666667 + 2023-03-01 07:00:02.800 1.333333 + 2023-03-01 07:00:03.200 1.666667 + 2023-03-01 07:00:03.600 2.333333 + 2023-03-01 07:00:04.000 3.000000 Freq: 400ms, dtype: float64 Note that the series correctly decreases between two anchors