Skip to content

Commit 238205c

Browse files
committed
Resolved merge conflicts
1 parent 86ad444 commit 238205c

File tree

3 files changed

+126
-2
lines changed

3 files changed

+126
-2
lines changed

pandas/core/missing.py

+10-1
Original file line numberDiff line numberDiff line change
@@ -304,7 +304,16 @@ def get_interp_index(method, index: Index) -> Index:
304304
# prior default
305305
from pandas import Index
306306

307-
index = Index(np.arange(len(index)))
307+
if isinstance(index.dtype, DatetimeTZDtype) or lib.is_np_dtype(
308+
index.dtype, "mM"
309+
):
310+
# Convert datetime-like indexes to int64
311+
index = Index(index.view("i8"))
312+
313+
elif not is_numeric_dtype(index.dtype):
314+
# We keep behavior consistent with prior versions of pandas for
315+
# non-numeric, non-datetime indexes
316+
index = Index(np.arange(len(index)))
308317
else:
309318
methods = {"index", "values", "nearest", "time"}
310319
is_numeric_or_datetime = (

pandas/core/resample.py

+24-1
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@
8383
TimedeltaIndex,
8484
timedelta_range,
8585
)
86+
from pandas.core.reshape.concat import concat
8687

8788
from pandas.tseries.frequencies import (
8889
is_subperiod,
@@ -1085,7 +1086,23 @@ def interpolate(
10851086
"""
10861087
assert downcast is lib.no_default # just checking coverage
10871088
result = self._upsample("asfreq")
1088-
return result.interpolate(
1089+
1090+
# If the original data has timestamps which are not aligned with the
1091+
# target timestamps, we need to add those points back to the data frame
1092+
# that is supposed to be interpolated. This does not work with
1093+
# PeriodIndex, so we skip this case.
1094+
obj = self._selected_obj
1095+
is_period_index = isinstance(obj.index, PeriodIndex)
1096+
1097+
if not is_period_index:
1098+
final_index = result.index
1099+
missing_data_points_index = obj.index.difference(final_index)
1100+
if len(missing_data_points_index) > 0:
1101+
result = concat(
1102+
[result, obj.loc[missing_data_points_index]]
1103+
).sort_index()
1104+
1105+
result_interpolated = result.interpolate(
10891106
method=method,
10901107
axis=axis,
10911108
limit=limit,
@@ -1096,6 +1113,12 @@ def interpolate(
10961113
**kwargs,
10971114
)
10981115

1116+
# We make sure that original data points which do not align with the
1117+
# resampled index are removed
1118+
if is_period_index:
1119+
return result_interpolated
1120+
return result_interpolated.loc[final_index]
1121+
10991122
@final
11001123
def asfreq(self, fill_value=None):
11011124
"""

pandas/tests/resample/test_base.py

+92
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,51 @@
2121
from pandas.core.indexes.timedeltas import timedelta_range
2222
from pandas.core.resample import _asfreq_compat
2323

24+
# a fixture value can be overridden by the test parameter value. Note that the
25+
# value of the fixture can be overridden this way even if the test doesn't use
26+
# it directly (doesn't mention it in the function prototype).
27+
# see https://docs.pytest.org/en/latest/fixture.html#override-a-fixture-with-direct-test-parametrization # noqa: E501
28+
# in this module we override the fixture values defined in conftest.py
29+
# tuples of '_index_factory,_series_name,_index_start,_index_end'
30+
DATE_RANGE = (date_range, "dti", datetime(2005, 1, 1), datetime(2005, 1, 10))
31+
PERIOD_RANGE = (period_range, "pi", datetime(2005, 1, 1), datetime(2005, 1, 10))
32+
TIMEDELTA_RANGE = (timedelta_range, "tdi", "1 day", "10 day")
33+
34+
all_ts = pytest.mark.parametrize(
35+
"_index_factory,_series_name,_index_start,_index_end",
36+
[DATE_RANGE, PERIOD_RANGE, TIMEDELTA_RANGE],
37+
)
38+
39+
all_1d_no_arg_interpolation_methods = pytest.mark.parametrize(
40+
"method",
41+
[
42+
"linear",
43+
"time",
44+
"index",
45+
"values",
46+
"nearest",
47+
"zero",
48+
"slinear",
49+
"quadratic",
50+
"cubic",
51+
"barycentric",
52+
"krogh",
53+
"from_derivatives",
54+
"piecewise_polynomial",
55+
"pchip",
56+
"akima",
57+
],
58+
)
59+
60+
61+
@pytest.fixture
62+
def create_index(_index_factory):
63+
def _create_index(*args, **kwargs):
64+
"""return the _index_factory created using the args, kwargs"""
65+
return _index_factory(*args, **kwargs)
66+
67+
return _create_index
68+
2469

2570
@pytest.mark.parametrize("freq", ["2D", "1h"])
2671
@pytest.mark.parametrize(
@@ -89,6 +134,53 @@ def test_resample_interpolate(index):
89134
tm.assert_frame_equal(result, expected)
90135

91136

137+
@all_1d_no_arg_interpolation_methods
138+
def test_resample_interpolate_regular_sampling_off_grid(method):
139+
# GH#21351
140+
index = date_range("2000-01-01 00:01:00", periods=5, freq="2h")
141+
ser = Series(np.arange(5.0), index)
142+
143+
# Resample to 1 hour sampling and interpolate with the given method
144+
ser_resampled = ser.resample("1h").interpolate(method)
145+
146+
# Check that none of the resampled values are NaN, except the first one
147+
# which lies 1 minute before the first actual data point
148+
assert np.isnan(ser_resampled.iloc[0])
149+
assert not ser_resampled.iloc[1:].isna().any()
150+
151+
if method not in ["nearest", "zero"]:
152+
# Check that the resampled values are close to the expected values
153+
# except for methods with known inaccuracies
154+
assert np.all(
155+
np.isclose(ser_resampled.values[1:], np.arange(0.5, 4.5, 0.5), rtol=1.0e-1)
156+
)
157+
158+
159+
@all_1d_no_arg_interpolation_methods
160+
def test_resample_interpolate_irregular_sampling(method):
161+
# GH#21351
162+
ser = Series(
163+
np.linspace(0.0, 1.0, 5),
164+
index=DatetimeIndex(
165+
[
166+
"2000-01-01 00:00:03",
167+
"2000-01-01 00:00:22",
168+
"2000-01-01 00:00:24",
169+
"2000-01-01 00:00:31",
170+
"2000-01-01 00:00:39",
171+
]
172+
),
173+
)
174+
175+
# Resample to 5 second sampling and interpolate with the given method
176+
ser_resampled = ser.resample("5s").interpolate(method)
177+
178+
# Check that none of the resampled values are NaN, except the first one
179+
# which lies 3 seconds before the first actual data point
180+
assert np.isnan(ser_resampled.iloc[0])
181+
assert not ser_resampled.iloc[1:].isna().any()
182+
183+
92184
def test_raises_on_non_datetimelike_index():
93185
# this is a non datetimelike index
94186
xp = DataFrame()

0 commit comments

Comments
 (0)