Skip to content

BUG: Series.interpolate with dt64/td64 raises #51005

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Feb 3, 2023
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -984,6 +984,7 @@ Datetimelike
- Bug in :func:`to_datetime` was raising ``ValueError`` when parsing mixed-offset :class:`Timestamp` with ``errors='ignore'`` (:issue:`50585`)
- Bug in :func:`to_datetime` was incorrectly handling floating-point inputs within 1 ``unit`` of the overflow boundaries (:issue:`50183`)
- Bug in :func:`to_datetime` with unit of "Y" or "M" giving incorrect results, not matching pointwise :class:`Timestamp` results (:issue:`50870`)
- Bug in :meth:`Series.interpolate` and :meth:`DataFrame.interpolate` with datetime or timedelta dtypes incorrectly raising ``ValueError`` (:issue:`11312`)
-

Timedelta
Expand Down
55 changes: 44 additions & 11 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -1180,6 +1180,7 @@ def fillna(

def interpolate(
self,
*,
method: FillnaOptions = "pad",
axis: AxisInt = 0,
index: Index | None = None,
Expand Down Expand Up @@ -1212,15 +1213,15 @@ def interpolate(
# split improves performance in ndarray.copy()
return self.split_and_operate(
type(self).interpolate,
method,
axis,
index,
inplace,
limit,
limit_direction,
limit_area,
fill_value,
downcast,
method=method,
axis=axis,
index=index,
inplace=inplace,
limit=limit,
limit_direction=limit_direction,
limit_area=limit_area,
fill_value=fill_value,
downcast=downcast,
**kwargs,
)

Expand Down Expand Up @@ -1598,9 +1599,9 @@ def get_values(self, dtype: DtypeObj | None = None) -> np.ndarray:
def values_for_json(self) -> np.ndarray:
return np.asarray(self.values)

# error: Signature of "interpolate" incompatible with supertype "Block"
def interpolate( # type: ignore[override]
def interpolate(
self,
*,
method: FillnaOptions = "pad",
axis: int = 0,
inplace: bool = False,
Expand Down Expand Up @@ -1976,6 +1977,38 @@ class DatetimeLikeBlock(NDArrayBackedExtensionBlock):
def values_for_json(self) -> np.ndarray:
return self.values._ndarray

def interpolate(
self,
*,
method: FillnaOptions = "pad",
index: Index | None = None,
axis: int = 0,
inplace: bool = False,
limit: int | None = None,
fill_value=None,
**kwargs,
):
values = self.values

# error: Non-overlapping equality check (left operand type:
# "Literal['backfill', 'bfill', 'ffill', 'pad']", right operand type:
# "Literal['linear']") [comparison-overlap]
if method == "linear": # type: ignore[comparison-overlap]
# TODO: GH#50950 implement for arbitrary EAs
data_out = values._ndarray if inplace else values._ndarray.copy()
missing.interpolate_array_2d(
data_out, method=method, limit=limit, index=index
)
new_values = type(values)._simple_new(data_out, dtype=values.dtype)
return self.make_block_same_class(new_values)

elif values.ndim == 2 and axis == 0:
# NDArrayBackedExtensionArray.fillna assumes axis=1
new_values = values.T.fillna(value=fill_value, method=method, limit=limit).T
else:
new_values = values.fillna(value=fill_value, method=method, limit=limit)
return self.make_block_same_class(new_values)


class DatetimeTZBlock(DatetimeLikeBlock):
"""implement a datetime64 block with a tz attribute"""
Expand Down
11 changes: 10 additions & 1 deletion pandas/core/missing.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import numpy as np

from pandas._libs import (
NaT,
algos,
lib,
)
Expand Down Expand Up @@ -457,6 +458,11 @@ def _interpolate_1d(
# sort preserve_nans and convert to list
preserve_nans = sorted(preserve_nans)

is_datetimelike = needs_i8_conversion(yvalues.dtype)

if is_datetimelike:
yvalues = yvalues.view("i8")

if method in NP_METHODS:
# np.interp requires sorted X values, #21037

Expand All @@ -476,7 +482,10 @@ def _interpolate_1d(
**kwargs,
)

yvalues[preserve_nans] = np.nan
if is_datetimelike:
yvalues[preserve_nans] = NaT.value
else:
yvalues[preserve_nans] = np.nan
return


Expand Down
34 changes: 34 additions & 0 deletions pandas/tests/series/methods/test_interpolate.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,40 @@ def interp_methods_ind(request):


class TestSeriesInterpolateData:
def test_interpolate_datetimelike_values(self):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we have DataFrame.interpolate tests with NaT?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i dont think so. before long ill do a pass to parametrize all of these over frame_or_series

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

good suggestion, turns out it was broken with DataFrame. fixed.

# GH#11312
orig = Series(date_range("2012-01-01", periods=5))
ser = orig.copy()
ser[2] = pd.NaT

res = ser.interpolate()
expected = orig
tm.assert_series_equal(res, expected)

# datetime64tz cast
ser_tz = ser.dt.tz_localize("US/Pacific")
res_tz = ser_tz.interpolate()
expected_tz = orig.dt.tz_localize("US/Pacific")
tm.assert_series_equal(res_tz, expected_tz)

# timedelta64 cast
ser_td = ser - ser[0]
res_td = ser_td.interpolate()
expected_td = orig - orig[0]
tm.assert_series_equal(res_td, expected_td)

@pytest.mark.xfail(reason="EA.fillna does not handle 'linear' method")
def test_interpolate_period_values(self):
orig = Series(date_range("2012-01-01", periods=5))
ser = orig.copy()
ser[2] = pd.NaT

# period cast
ser_per = ser.dt.to_period("D")
res_per = ser_per.interpolate()
expected_per = orig.dt.to_period("D")
tm.assert_series_equal(res_per, expected_per)

def test_interpolate(self, datetime_series):
ts = Series(np.arange(len(datetime_series), dtype=float), datetime_series.index)

Expand Down