From 7c1b44c500d6f99419c41e7ae55b0b529e7b6be5 Mon Sep 17 00:00:00 2001 From: scls19fr Date: Thu, 28 Sep 2017 21:09:19 +0200 Subject: [PATCH 1/4] BUG: Implement interpolating NaT values in datetime series Closes #11701 --- doc/source/whatsnew/v0.21.0.txt | 1 + pandas/core/series.py | 15 +++++++++++++-- pandas/tests/series/test_missing.py | 10 ++++++++++ 3 files changed, 24 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index dae93feb48b02..37d67e93b86a7 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -163,6 +163,7 @@ Other Enhancements - :func:`Categorical.rename_categories` now accepts a dict-like argument as `new_categories` and only updates the categories found in that dict. (:issue:`17336`) - :func:`read_excel` raises ``ImportError`` with a better message if ``xlrd`` is not installed. (:issue:`17613`) - :meth:`DataFrame.assign` will preserve the original order of ``**kwargs`` for Python 3.6+ users instead of sorting the column names +- Implement interpolating ``NaT`` values in ``datetime`` series (:issue:`11701`) .. _whatsnew_0210.api_breaking: diff --git a/pandas/core/series.py b/pandas/core/series.py index a05324142b223..3430ddff1b041 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -19,6 +19,7 @@ is_integer, is_integer_dtype, is_float_dtype, is_extension_type, is_datetimetz, + is_datetime64_dtype, is_datetime64tz_dtype, is_timedelta64_dtype, is_list_like, @@ -34,8 +35,10 @@ maybe_upcast, infer_dtype_from_scalar, maybe_convert_platform, maybe_cast_to_datetime, maybe_castable) -from pandas.core.dtypes.missing import isna, notna, remove_na_arraylike - +from pandas.core.dtypes.missing import (isna, notna, + remove_na_arraylike, + isnull) +from pandas.core.tools.datetimes import to_datetime from pandas.core.common import (is_bool_indexer, _default_index, _asarray_tuplesafe, @@ -2734,6 +2737,14 @@ def from_csv(cls, path, sep=',', parse_dates=True, header=None, return result + def interpolate(self, *args, **kwargs): + if is_datetime64_dtype(self) and self.isnull().any(): + s2 = self.astype('i8').astype('f8') + s2[self.isnull()] = np.nan + return to_datetime(s2.interpolate(*args, **kwargs)) + else: + return super(Series, self).interpolate(*args, **kwargs) + def to_csv(self, path=None, index=True, sep=",", na_rep='', float_format=None, header=False, index_label=None, mode='w', encoding=None, date_format=None, decimal='.'): diff --git a/pandas/tests/series/test_missing.py b/pandas/tests/series/test_missing.py index 01bf7274fd384..c71510a45c118 100644 --- a/pandas/tests/series/test_missing.py +++ b/pandas/tests/series/test_missing.py @@ -1218,3 +1218,13 @@ def test_series_interpolate_intraday(self): result = ts.reindex(new_index).interpolate(method='time') tm.assert_numpy_array_equal(result.values, exp.values) + + def test_series_interpolate_nat(self): + # GH 11701 + expected = pd.Series(pd.date_range('2015-01-01', '2015-01-30')) + result = expected.copy() + result[[3, 4, 5, 13, 14, 15]] = pd.NaT + result = result.interpolate() + print(result) + print(expected) + tm.assert_series_equal(result, expected) From 113d7654ad89a3e1cd0ccbf746065598a0ee3a89 Mon Sep 17 00:00:00 2001 From: scls19fr Date: Thu, 28 Sep 2017 21:14:46 +0200 Subject: [PATCH 2/4] PEP8 --- pandas/core/series.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 3430ddff1b041..897192f4d630b 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2741,9 +2741,9 @@ def interpolate(self, *args, **kwargs): if is_datetime64_dtype(self) and self.isnull().any(): s2 = self.astype('i8').astype('f8') s2[self.isnull()] = np.nan - return to_datetime(s2.interpolate(*args, **kwargs)) + return to_datetime(s2.interpolate(*args, **kwargs)) else: - return super(Series, self).interpolate(*args, **kwargs) + return super(Series, self).interpolate(*args, **kwargs) def to_csv(self, path=None, index=True, sep=",", na_rep='', float_format=None, header=False, index_label=None, From 717443bd01da7c1be86ba8a9ccb2379246b8c188 Mon Sep 17 00:00:00 2001 From: scls19fr Date: Thu, 28 Sep 2017 21:27:34 +0200 Subject: [PATCH 3/4] Test with TZ aware datetime --- pandas/tests/series/test_missing.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/pandas/tests/series/test_missing.py b/pandas/tests/series/test_missing.py index c71510a45c118..1b526ebd26c38 100644 --- a/pandas/tests/series/test_missing.py +++ b/pandas/tests/series/test_missing.py @@ -1221,10 +1221,9 @@ def test_series_interpolate_intraday(self): def test_series_interpolate_nat(self): # GH 11701 - expected = pd.Series(pd.date_range('2015-01-01', '2015-01-30')) - result = expected.copy() - result[[3, 4, 5, 13, 14, 15]] = pd.NaT - result = result.interpolate() - print(result) - print(expected) - tm.assert_series_equal(result, expected) + for tz in [None, "UTC"]: + expected = pd.Series(pd.date_range('2015-01-01', '2015-01-30', tz=tz)) + result = expected.copy() + result[[3, 4, 5, 13, 14, 15]] = pd.NaT + result = result.interpolate() + tm.assert_series_equal(result, expected) From 3a90ea70cf6f1708c69c6f9c731a5a1815455f25 Mon Sep 17 00:00:00 2001 From: scls19fr Date: Thu, 28 Sep 2017 21:40:44 +0200 Subject: [PATCH 4/4] WIP --- pandas/core/series.py | 8 ++++---- pandas/tests/series/test_missing.py | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 897192f4d630b..31825f2aa3e45 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -35,9 +35,8 @@ maybe_upcast, infer_dtype_from_scalar, maybe_convert_platform, maybe_cast_to_datetime, maybe_castable) -from pandas.core.dtypes.missing import (isna, notna, - remove_na_arraylike, - isnull) +from pandas.core.dtypes.missing import isna, notna, remove_na_arraylike + from pandas.core.tools.datetimes import to_datetime from pandas.core.common import (is_bool_indexer, _default_index, @@ -2738,7 +2737,8 @@ def from_csv(cls, path, sep=',', parse_dates=True, header=None, return result def interpolate(self, *args, **kwargs): - if is_datetime64_dtype(self) and self.isnull().any(): + if (is_datetime64_dtype(self) or + is_datetime64tz_dtype(self)) and self.isnull().any(): s2 = self.astype('i8').astype('f8') s2[self.isnull()] = np.nan return to_datetime(s2.interpolate(*args, **kwargs)) diff --git a/pandas/tests/series/test_missing.py b/pandas/tests/series/test_missing.py index 1b526ebd26c38..3c74c0a8993c8 100644 --- a/pandas/tests/series/test_missing.py +++ b/pandas/tests/series/test_missing.py @@ -1221,7 +1221,7 @@ def test_series_interpolate_intraday(self): def test_series_interpolate_nat(self): # GH 11701 - for tz in [None, "UTC"]: + for tz in [None, 'UTC', 'Europe/Paris']: expected = pd.Series(pd.date_range('2015-01-01', '2015-01-30', tz=tz)) result = expected.copy() result[[3, 4, 5, 13, 14, 15]] = pd.NaT