From 862d62d584d326049c02b6a89e24ee052a033493 Mon Sep 17 00:00:00 2001 From: Atul Aggarwal Date: Wed, 11 Jul 2018 06:52:27 +0530 Subject: [PATCH 1/2] ENH: support NaT values into datetime series for interpolation (#11701) --- doc/source/whatsnew/v0.24.0.txt | 1 + pandas/core/generic.py | 13 +++++++++++++ pandas/tests/series/test_missing.py | 20 ++++++++++++++++++++ 3 files changed, 34 insertions(+) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 034a56b2ac0cb..780ed1a99ee14 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -84,6 +84,7 @@ Other Enhancements - :meth:`Series.nlargest`, :meth:`Series.nsmallest`, :meth:`DataFrame.nlargest`, and :meth:`DataFrame.nsmallest` now accept the value ``"all"`` for the ``keep`` argument. This keeps all ties for the nth largest/smallest value (:issue:`16818`) - :class:`IntervalIndex` has gained the :meth:`~IntervalIndex.set_closed` method to change the existing ``closed`` value (:issue:`21670`) - :func:`~DataFrame.to_csv` and :func:`~DataFrame.to_json` now support ``compression='infer'`` to infer compression based on filename (:issue:`15008`) +- Implement interpolating ``NaT`` values in ``datetime`` series (:issue:`11701`) - .. _whatsnew_0240.api_breaking: diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 8da678e0adec0..7591e6e4386e0 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -6112,6 +6112,14 @@ def interpolate(self, method='linear', axis=0, limit=None, inplace=False, raise NotImplementedError("Interpolation with NaNs in the index " "has not been implemented. Try filling " "those NaNs before interpolating.") + is_datetime = False + datetime_timezone = None + if is_datetime64_any_dtype(_maybe_transposed_self): + _datetime_nat_values = _maybe_transposed_self.isnull() + datetime_timezone = _maybe_transposed_self.dt.tz + _maybe_transposed_self = _maybe_transposed_self.astype('int') + _maybe_transposed_self[_datetime_nat_values] = np.nan + is_datetime = True data = _maybe_transposed_self._data new_data = data.interpolate(method=method, axis=ax, index=index, values=_maybe_transposed_self, limit=limit, @@ -6120,6 +6128,11 @@ def interpolate(self, method='linear', axis=0, limit=None, inplace=False, inplace=inplace, downcast=downcast, **kwargs) + if is_datetime: + new_data = self._constructor(new_data) + new_data = pd.to_datetime(new_data, utc=True).dt.tz_convert( + datetime_timezone) + if inplace: if axis == 1: new_data = self._constructor(new_data).T._data diff --git a/pandas/tests/series/test_missing.py b/pandas/tests/series/test_missing.py index 2bc44cb1c683f..fe43de74b3586 100644 --- a/pandas/tests/series/test_missing.py +++ b/pandas/tests/series/test_missing.py @@ -1317,3 +1317,23 @@ def test_series_interpolate_intraday(self): result = ts.reindex(new_index).interpolate(method='time') tm.assert_numpy_array_equal(result.values, exp.values) + + def test_series_interpolate_nat(self): + # GH 11701 + for tz in [None, 'UTC', 'Europe/Paris']: + expected = pd.Series(pd.date_range('2015-01-01', + '2015-01-30', tz=tz)) + result = expected.copy() + result[[3, 4, 5, 13, 14, 15]] = pd.NaT + result = result.interpolate() + tm.assert_series_equal(result, expected) + + def test_series_interpolate_nat_inplace(self): + # GH 11701 + for tz in [None, 'UTC', 'Europe/Paris']: + expected = pd.Series(pd.date_range('2015-01-01', + '2015-01-30', tz=tz)) + result = expected.copy() + result[[3, 4, 5, 13, 14, 15]] = pd.NaT + result.interpolate(inplace=True) + tm.assert_series_equal(result, expected) From 5d8d819e3eafa567a8a95775a6cfafb28ac482ee Mon Sep 17 00:00:00 2001 From: Atul Aggarwal Date: Thu, 12 Jul 2018 07:03:34 +0530 Subject: [PATCH 2/2] TST: support NaT values into datetime series for interpolation (#11701) --- pandas/tests/series/test_missing.py | 27 ++++++++++----------------- 1 file changed, 10 insertions(+), 17 deletions(-) diff --git a/pandas/tests/series/test_missing.py b/pandas/tests/series/test_missing.py index fe43de74b3586..033f9877d7768 100644 --- a/pandas/tests/series/test_missing.py +++ b/pandas/tests/series/test_missing.py @@ -1318,22 +1318,15 @@ def test_series_interpolate_intraday(self): tm.assert_numpy_array_equal(result.values, exp.values) - def test_series_interpolate_nat(self): + @pytest.mark.parametrize("inplace", [True, False]) + def test_series_interpolate_nat(self, tz_naive_fixture, inplace): # GH 11701 - for tz in [None, 'UTC', 'Europe/Paris']: - expected = pd.Series(pd.date_range('2015-01-01', - '2015-01-30', tz=tz)) - result = expected.copy() - result[[3, 4, 5, 13, 14, 15]] = pd.NaT + expected = pd.Series(pd.date_range('2015-01-01', + '2015-01-30', tz=tz_naive_fixture)) + result = expected.copy() + result[[3, 4, 5, 13, 14, 15]] = pd.NaT + if inplace: + result.interpolate(inplace=inplace) + else: result = result.interpolate() - tm.assert_series_equal(result, expected) - - def test_series_interpolate_nat_inplace(self): - # GH 11701 - for tz in [None, 'UTC', 'Europe/Paris']: - expected = pd.Series(pd.date_range('2015-01-01', - '2015-01-30', tz=tz)) - result = expected.copy() - result[[3, 4, 5, 13, 14, 15]] = pd.NaT - result.interpolate(inplace=True) - tm.assert_series_equal(result, expected) + tm.assert_series_equal(result, expected)