From 49f165fe4c73bd2fea3c32829895ff42910f0ed9 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 26 Jan 2023 16:14:29 -0800 Subject: [PATCH 1/3] BUG: Series.interpolate with dt64/td64 raises --- doc/source/whatsnew/v2.0.0.rst | 1 + pandas/core/internals/blocks.py | 50 +++++++++++++++---- pandas/core/missing.py | 11 +++- .../tests/series/methods/test_interpolate.py | 34 +++++++++++++ 4 files changed, 86 insertions(+), 10 deletions(-) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 76aa426cc1cee..a38786c28a128 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -983,6 +983,7 @@ Datetimelike - Bug in :func:`to_datetime` was raising ``ValueError`` when parsing mixed-offset :class:`Timestamp` with ``errors='ignore'`` (:issue:`50585`) - Bug in :func:`to_datetime` was incorrectly handling floating-point inputs within 1 ``unit`` of the overflow boundaries (:issue:`50183`) - Bug in :func:`to_datetime` with unit of "Y" or "M" giving incorrect results, not matching pointwise :class:`Timestamp` results (:issue:`50870`) +- Bug in :meth:`Series.interpolate` and :meth:`DataFrame.interpolate` with datetime or timedelta dtypes incorrectly raising ``ValueError`` (:issue:`11312`) - Timedelta diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 8fb6a18ca137a..2a447cc52c0ad 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1180,6 +1180,7 @@ def fillna( def interpolate( self, + *, method: FillnaOptions = "pad", axis: AxisInt = 0, index: Index | None = None, @@ -1212,15 +1213,15 @@ def interpolate( # split improves performance in ndarray.copy() return self.split_and_operate( type(self).interpolate, - method, - axis, - index, - inplace, - limit, - limit_direction, - limit_area, - fill_value, - downcast, + method=method, + axis=axis, + index=index, + inplace=inplace, + limit=limit, + limit_direction=limit_direction, + limit_area=limit_area, + fill_value=fill_value, + downcast=downcast, **kwargs, ) @@ -1601,6 +1602,7 @@ def values_for_json(self) -> np.ndarray: # error: Signature of "interpolate" incompatible with supertype "Block" def interpolate( # type: ignore[override] self, + *, method: FillnaOptions = "pad", axis: int = 0, inplace: bool = False, @@ -1976,6 +1978,36 @@ class DatetimeLikeBlock(NDArrayBackedExtensionBlock): def values_for_json(self) -> np.ndarray: return self.values._ndarray + # error: Signature of "interpolate" incompatible with supertype "Block" + def interpolate( # type: ignore[override] + self, + *, + method: FillnaOptions = "pad", + index: Index | None = None, + axis: int = 0, + inplace: bool = False, + limit: int | None = None, + fill_value=None, + **kwargs, + ): + values = self.values + + if method == "linear": + # TODO: GH#50950 implement for arbitrary EAs + data_out = values._ndarray if inplace else values._ndarray.copy() + missing.interpolate_array_2d( + data_out, method=method, limit=limit, index=index + ) + new_values = type(values)._simple_new(data_out, dtype=values.dtype) + return self.make_block_same_class(new_values) + + elif values.ndim == 2 and axis == 0: + # NDArrayBackedExtensionArray.fillna assumes axis=1 + new_values = values.T.fillna(value=fill_value, method=method, limit=limit).T + else: + new_values = values.fillna(value=fill_value, method=method, limit=limit) + return self.make_block_same_class(new_values) + class DatetimeTZBlock(DatetimeLikeBlock): """implement a datetime64 block with a tz attribute""" diff --git a/pandas/core/missing.py b/pandas/core/missing.py index 162186bc4186a..fc3178c8b7132 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -16,6 +16,7 @@ import numpy as np from pandas._libs import ( + NaT, algos, lib, ) @@ -457,6 +458,11 @@ def _interpolate_1d( # sort preserve_nans and convert to list preserve_nans = sorted(preserve_nans) + is_datetimelike = needs_i8_conversion(yvalues.dtype) + + if is_datetimelike: + yvalues = yvalues.view("i8") + if method in NP_METHODS: # np.interp requires sorted X values, #21037 @@ -476,7 +482,10 @@ def _interpolate_1d( **kwargs, ) - yvalues[preserve_nans] = np.nan + if is_datetimelike: + yvalues[preserve_nans] = NaT.value + else: + yvalues[preserve_nans] = np.nan return diff --git a/pandas/tests/series/methods/test_interpolate.py b/pandas/tests/series/methods/test_interpolate.py index 506abd0ca3de2..f1eae7194df94 100644 --- a/pandas/tests/series/methods/test_interpolate.py +++ b/pandas/tests/series/methods/test_interpolate.py @@ -78,6 +78,40 @@ def interp_methods_ind(request): class TestSeriesInterpolateData: + def test_interpolate_datetimelike_values(self): + # GH#11312 + orig = Series(date_range("2012-01-01", periods=5)) + ser = orig.copy() + ser[2] = pd.NaT + + res = ser.interpolate() + expected = orig + tm.assert_series_equal(res, expected) + + # datetime64tz cast + ser_tz = ser.dt.tz_localize("US/Pacific") + res_tz = ser_tz.interpolate() + expected_tz = orig.dt.tz_localize("US/Pacific") + tm.assert_series_equal(res_tz, expected_tz) + + # timedelta64 cast + ser_td = ser - ser[0] + res_td = ser_td.interpolate() + expected_td = orig - orig[0] + tm.assert_series_equal(res_td, expected_td) + + @pytest.mark.xfail(reason="EA.fillna does not handle 'linear' method") + def test_interpolate_period_values(self): + orig = Series(date_range("2012-01-01", periods=5)) + ser = orig.copy() + ser[2] = pd.NaT + + # period cast + ser_per = ser.dt.to_period("D") + res_per = ser_per.interpolate() + expected_per = orig.dt.to_period("D") + tm.assert_series_equal(res_per, expected_per) + def test_interpolate(self, datetime_series): ts = Series(np.arange(len(datetime_series), dtype=float), datetime_series.index) From af3d86cdcb0224e457ea1ae917a1884c285432e3 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 27 Jan 2023 07:52:51 -0800 Subject: [PATCH 2/3] mypy fixup --- pandas/core/internals/blocks.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 2a447cc52c0ad..69dab2c86a4b2 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1599,8 +1599,7 @@ def get_values(self, dtype: DtypeObj | None = None) -> np.ndarray: def values_for_json(self) -> np.ndarray: return np.asarray(self.values) - # error: Signature of "interpolate" incompatible with supertype "Block" - def interpolate( # type: ignore[override] + def interpolate( self, *, method: FillnaOptions = "pad", @@ -1978,8 +1977,7 @@ class DatetimeLikeBlock(NDArrayBackedExtensionBlock): def values_for_json(self) -> np.ndarray: return self.values._ndarray - # error: Signature of "interpolate" incompatible with supertype "Block" - def interpolate( # type: ignore[override] + def interpolate( self, *, method: FillnaOptions = "pad", @@ -1992,7 +1990,10 @@ def interpolate( # type: ignore[override] ): values = self.values - if method == "linear": + # error: Non-overlapping equality check (left operand type: + # "Literal['backfill', 'bfill', 'ffill', 'pad']", right operand type: + # "Literal['linear']") [comparison-overlap] + if method == "linear": # type: ignore[comparison-overlap] # TODO: GH#50950 implement for arbitrary EAs data_out = values._ndarray if inplace else values._ndarray.copy() missing.interpolate_array_2d( From f756ab176038135e75ad546e405f8ae9679314a8 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 2 Feb 2023 11:32:47 -0800 Subject: [PATCH 3/3] fix+test DataFrame case --- pandas/core/internals/blocks.py | 2 +- .../tests/frame/methods/test_interpolate.py | 23 +++++++++++++++++++ .../tests/series/methods/test_interpolate.py | 22 ------------------ 3 files changed, 24 insertions(+), 23 deletions(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index ded913e015bf7..94433fdfbe753 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1995,7 +1995,7 @@ def interpolate( # TODO: GH#50950 implement for arbitrary EAs data_out = values._ndarray if inplace else values._ndarray.copy() missing.interpolate_array_2d( - data_out, method=method, limit=limit, index=index + data_out, method=method, limit=limit, index=index, axis=axis ) new_values = type(values)._simple_new(data_out, dtype=values.dtype) return self.make_block_same_class(new_values) diff --git a/pandas/tests/frame/methods/test_interpolate.py b/pandas/tests/frame/methods/test_interpolate.py index 00fdfe373f1d8..20e4571e2ba05 100644 --- a/pandas/tests/frame/methods/test_interpolate.py +++ b/pandas/tests/frame/methods/test_interpolate.py @@ -5,6 +5,7 @@ from pandas import ( DataFrame, + NaT, Series, date_range, ) @@ -12,6 +13,28 @@ class TestDataFrameInterpolate: + def test_interpolate_datetimelike_values(self, frame_or_series): + # GH#11312, GH#51005 + orig = Series(date_range("2012-01-01", periods=5)) + ser = orig.copy() + ser[2] = NaT + + res = frame_or_series(ser).interpolate() + expected = frame_or_series(orig) + tm.assert_equal(res, expected) + + # datetime64tz cast + ser_tz = ser.dt.tz_localize("US/Pacific") + res_tz = frame_or_series(ser_tz).interpolate() + expected_tz = frame_or_series(orig.dt.tz_localize("US/Pacific")) + tm.assert_equal(res_tz, expected_tz) + + # timedelta64 cast + ser_td = ser - ser[0] + res_td = frame_or_series(ser_td).interpolate() + expected_td = frame_or_series(orig - orig[0]) + tm.assert_equal(res_td, expected_td) + def test_interpolate_inplace(self, frame_or_series, using_array_manager, request): # GH#44749 if using_array_manager and frame_or_series is DataFrame: diff --git a/pandas/tests/series/methods/test_interpolate.py b/pandas/tests/series/methods/test_interpolate.py index f1eae7194df94..0f19bfc48e05b 100644 --- a/pandas/tests/series/methods/test_interpolate.py +++ b/pandas/tests/series/methods/test_interpolate.py @@ -78,28 +78,6 @@ def interp_methods_ind(request): class TestSeriesInterpolateData: - def test_interpolate_datetimelike_values(self): - # GH#11312 - orig = Series(date_range("2012-01-01", periods=5)) - ser = orig.copy() - ser[2] = pd.NaT - - res = ser.interpolate() - expected = orig - tm.assert_series_equal(res, expected) - - # datetime64tz cast - ser_tz = ser.dt.tz_localize("US/Pacific") - res_tz = ser_tz.interpolate() - expected_tz = orig.dt.tz_localize("US/Pacific") - tm.assert_series_equal(res_tz, expected_tz) - - # timedelta64 cast - ser_td = ser - ser[0] - res_td = ser_td.interpolate() - expected_td = orig - orig[0] - tm.assert_series_equal(res_td, expected_td) - @pytest.mark.xfail(reason="EA.fillna does not handle 'linear' method") def test_interpolate_period_values(self): orig = Series(date_range("2012-01-01", periods=5))