From 1fef1f149aa76772c6e13d821180e0a918335338 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Thu, 11 Jan 2018 18:50:50 -0800 Subject: [PATCH 1/3] fix interpolate for datetime64 values --- doc/source/whatsnew/v0.23.0.txt | 2 +- pandas/core/generic.py | 7 ++-- pandas/core/internals.py | 52 +++++++++++++++++++++++++++++ pandas/tests/frame/test_missing.py | 13 ++++++++ pandas/tests/series/test_missing.py | 13 ++++++++ 5 files changed, 84 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index dc305f36f32ec..ed4fd65b1bf91 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -381,7 +381,7 @@ Conversion - Fixed bug where comparing :class:`DatetimeIndex` failed to raise ``TypeError`` when attempting to compare timezone-aware and timezone-naive datetimelike objects (:issue:`18162`) - Bug in :class:`DatetimeIndex` where the repr was not showing high-precision time values at the end of a day (e.g., 23:59:59.999999999) (:issue:`19030`) - Bug where dividing a scalar timedelta-like object with :class:`TimedeltaIndex` performed the reciprocal operation (:issue:`19125`) -- +- Bug in :meth:`Series.interpolate` and :class:`DataFrame.interpolate` where ``dtype='datetime64[ns]'`` series and columns were ignored. (:issue:`19199`) Indexing ^^^^^^^^ diff --git a/pandas/core/generic.py b/pandas/core/generic.py index cef1e551f948e..5ccc58a737130 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -5143,8 +5143,11 @@ def interpolate(self, method='linear', axis=0, limit=None, inplace=False, raise ValueError("Only `method=linear` interpolation is supported " "on MultiIndexes.") - if _maybe_transposed_self._data.get_dtype_counts().get( - 'object') == len(_maybe_transposed_self.T): + dtype_counts = _maybe_transposed_self._data.get_dtype_counts() + if ('object' in dtype_counts and + dtype_counts.get('object') == len(_maybe_transposed_self.T)): + # Checking for 'object' lets us avoid sometimes-fragile tranpose + # call GH#19198 raise TypeError("Cannot interpolate with all NaNs.") # create/use the index diff --git a/pandas/core/internals.py b/pandas/core/internals.py index ba90503e3bf40..7ac47034ac915 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -2588,6 +2588,58 @@ def set(self, locs, values, check=False): self.values[locs] = values + def _interpolate(self, method=None, index=None, values=None, + fill_value=None, axis=0, limit=None, + limit_direction='forward', inplace=False, downcast=None, + mgr=None, **kwargs): + """ interpolate using scipy wrappers, adapted to datetime64 values""" + + inplace = validate_bool_kwarg(inplace, 'inplace') + data = self.values if inplace else self.values.copy() + + # only deal with floats + mask = isna(self.values) + if self.is_datetimetz: + # Convert to UTC for interpolation + data = data.tz_convert('UTC').values + if self.ndim > 1: + # DataFrame + data = np.atleast_2d(data) + mask = np.atleast_2d(mask) + data = data.astype(np.float64) + data[mask] = np.nan + + if fill_value is None: + fill_value = self.fill_value + + if method in ('krogh', 'piecewise_polynomial', 'pchip'): + if not index.is_monotonic: + raise ValueError("{0} interpolation requires that the " + "index be monotonic.".format(method)) + # process 1-d slices in the axis direction + + def func(x): + # process a 1-d slice, returning it + # should the axis argument be handled below in apply_along_axis? + # i.e. not an arg to missing.interpolate_1d + return missing.interpolate_1d(index, x, method=method, limit=limit, + limit_direction=limit_direction, + fill_value=fill_value, + bounds_error=False, **kwargs) + + # interp each column independently + interp_values = np.apply_along_axis(func, axis, data) + if self.is_datetimetz: + interp_values = interp_values.squeeze() + utc_values = self._holder(interp_values, tz='UTC') + interp_values = utc_values.tz_convert(self.values.tz) + else: + interp_values = interp_values.astype(self.dtype) + + blocks = [self.make_block(interp_values, klass=self.__class__, + fastpath=True)] + return self._maybe_downcast(blocks, downcast) + class DatetimeTZBlock(NonConsolidatableMixIn, DatetimeBlock): """ implement a datetime64 block with a tz attribute """ diff --git a/pandas/tests/frame/test_missing.py b/pandas/tests/frame/test_missing.py index 2e4e8b9582cf6..f9db07ba2c67b 100644 --- a/pandas/tests/frame/test_missing.py +++ b/pandas/tests/frame/test_missing.py @@ -816,3 +816,16 @@ def test_interp_ignore_all_good(self): # all good result = df[['B', 'D']].interpolate(downcast=None) assert_frame_equal(result, df[['B', 'D']]) + + @pytest.mark.parametrize('tz', [None, 'US/Central']) + def test_interpolate_dt64_values(self, tz): + index = pd.Index([23, 26, 30]) + dti = pd.DatetimeIndex(['2015-09-23', '2015-09-26', '2015-09-30'], + tz=tz) + df = DataFrame(dti, index=index).reindex(range(23, 31)) + + dti_ex = pd.date_range('2015-09-23', '2015-09-30', tz=tz) + expected = DataFrame(dti_ex, index=df.index) + + result = df.interpolate() + assert_frame_equal(expected, result) diff --git a/pandas/tests/series/test_missing.py b/pandas/tests/series/test_missing.py index 0dc5e23184af7..a4bdcaa549b8b 100644 --- a/pandas/tests/series/test_missing.py +++ b/pandas/tests/series/test_missing.py @@ -1278,3 +1278,16 @@ def test_series_interpolate_intraday(self): result = ts.reindex(new_index).interpolate(method='time') tm.assert_numpy_array_equal(result.values, exp.values) + + @pytest.mark.parametrize('tz', [None, 'US/Central']) + def test_interpolate_dt64_values(self, tz): + index = pd.Index([23, 26, 30]) + dti = pd.DatetimeIndex(['2015-09-23', '2015-09-26', '2015-09-30'], + tz=tz) + ser = pd.Series(dti, index=index).reindex(range(23, 31)) + + dti_ex = pd.date_range('2015-09-23', '2015-09-30', tz=tz) + expected = pd.Series(dti_ex, index=ser.index) + + result = ser.interpolate() + tm.assert_series_equal(expected, result) From e3b6f39215db273017352e6d9d33fd893892b47d Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Wed, 17 Jan 2018 17:52:14 -0800 Subject: [PATCH 2/3] refactor out repeated code; fix flake8 complaints about except: --- pandas/core/internals.py | 68 ++++++++++++++++++---------------------- 1 file changed, 31 insertions(+), 37 deletions(-) diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 8d5ac24010192..0bf6f91892dde 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -1106,7 +1106,7 @@ def check_int_bool(self, inplace): # a fill na type method try: m = missing.clean_fill_method(method) - except: + except ValueError: m = None if m is not None: @@ -1121,7 +1121,7 @@ def check_int_bool(self, inplace): # try an interp method try: m = missing.clean_interp_method(method, **kwargs) - except: + except ValueError: m = None if m is not None: @@ -1180,24 +1180,9 @@ def _interpolate(self, method=None, index=None, values=None, if fill_value is None: fill_value = self.fill_value - if method in ('krogh', 'piecewise_polynomial', 'pchip'): - if not index.is_monotonic: - raise ValueError("{0} interpolation requires that the " - "index be monotonic.".format(method)) - # process 1-d slices in the axis direction - - def func(x): - - # process a 1-d slice, returning it - # should the axis argument be handled below in apply_along_axis? - # i.e. not an arg to missing.interpolate_1d - return missing.interpolate_1d(index, x, method=method, limit=limit, - limit_direction=limit_direction, - fill_value=fill_value, - bounds_error=False, **kwargs) - - # interp each column independently - interp_values = np.apply_along_axis(func, axis, data) + interp_values = _interpolate_values(method, data, index, axis, + limit, limit_direction, + fill_value, **kwargs) blocks = [self.make_block(interp_values, klass=self.__class__, fastpath=True)] @@ -2616,23 +2601,9 @@ def _interpolate(self, method=None, index=None, values=None, if fill_value is None: fill_value = self.fill_value - if method in ('krogh', 'piecewise_polynomial', 'pchip'): - if not index.is_monotonic: - raise ValueError("{0} interpolation requires that the " - "index be monotonic.".format(method)) - # process 1-d slices in the axis direction - - def func(x): - # process a 1-d slice, returning it - # should the axis argument be handled below in apply_along_axis? - # i.e. not an arg to missing.interpolate_1d - return missing.interpolate_1d(index, x, method=method, limit=limit, - limit_direction=limit_direction, - fill_value=fill_value, - bounds_error=False, **kwargs) - - # interp each column independently - interp_values = np.apply_along_axis(func, axis, data) + interp_values = _interpolate_values(method, data, index, axis, + limit, limit_direction, + fill_value, **kwargs) if self.is_datetimetz: interp_values = interp_values.squeeze() utc_values = self._holder(interp_values, tz='UTC') @@ -5727,3 +5698,26 @@ def _preprocess_slice_or_indexer(slice_or_indexer, length, allow_fill): if not allow_fill: indexer = maybe_convert_indices(indexer, length) return 'fancy', indexer, len(indexer) + + +def _interpolate_values(method, data, index, axis, limit, limit_direction, + fill_value, **kwargs): + """interpolate using scipy wrappers""" + if method in ('krogh', 'piecewise_polynomial', 'pchip'): + if not index.is_monotonic: + raise ValueError("{0} interpolation requires that the " + "index be monotonic.".format(method)) + # process 1-d slices in the axis direction + + def func(x): + # process a 1-d slice, returning it + # should the axis argument be handled below in apply_along_axis? + # i.e. not an arg to missing.interpolate_1d + return missing.interpolate_1d(index, x, method=method, limit=limit, + limit_direction=limit_direction, + fill_value=fill_value, + bounds_error=False, **kwargs) + + # interp each column independently + interp_values = np.apply_along_axis(func, axis, data) + return interp_values From 0115ef10c92f3e0d33c57e31dc6f85b21d590f12 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Fri, 19 Jan 2018 11:35:51 -0800 Subject: [PATCH 3/3] separate method for datetimeTZ as requested --- pandas/core/internals.py | 51 ++++++++++++++++++++++++++++++---------- 1 file changed, 38 insertions(+), 13 deletions(-) diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 4a3f9d3e63aaa..4392ce31bdf3c 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -2590,13 +2590,6 @@ def _interpolate(self, method=None, index=None, values=None, # only deal with floats mask = isna(self.values) - if self.is_datetimetz: - # Convert to UTC for interpolation - data = data.tz_convert('UTC').values - if self.ndim > 1: - # DataFrame - data = np.atleast_2d(data) - mask = np.atleast_2d(mask) data = data.astype(np.float64) data[mask] = np.nan @@ -2606,12 +2599,7 @@ def _interpolate(self, method=None, index=None, values=None, interp_values = _interpolate_values(method, data, index, axis, limit, limit_direction, fill_value, **kwargs) - if self.is_datetimetz: - interp_values = interp_values.squeeze() - utc_values = self._holder(interp_values, tz='UTC') - interp_values = utc_values.tz_convert(self.values.tz) - else: - interp_values = interp_values.astype(self.dtype) + interp_values = interp_values.astype(self.dtype) blocks = [self.make_block(interp_values, klass=self.__class__, fastpath=True)] @@ -2773,6 +2761,43 @@ def concat_same_type(self, to_concat, placement=None): return make_block( values, placement=placement or slice(0, len(values), 1)) + def _interpolate(self, method=None, index=None, values=None, + fill_value=None, axis=0, limit=None, + limit_direction='forward', inplace=False, downcast=None, + mgr=None, **kwargs): + """ interpolate using scipy wrappers, adapted to datetime64 values""" + + inplace = validate_bool_kwarg(inplace, 'inplace') + data = self.values if inplace else self.values.copy() + + # only deal with floats + mask = isna(self.values) + + # Convert to UTC for interpolation + data = data.tz_convert('UTC').values + + # data is 1D because it comes from a DatetimeIndex, but we need ndim + # to match self.ndim + data = data.reshape(self.shape) + mask = mask.reshape(self.shape) + data = data.astype(np.float64) + data[mask] = np.nan + + if fill_value is None: + fill_value = self.fill_value + + interp_values = _interpolate_values(method, data, index, axis, + limit, limit_direction, + fill_value, **kwargs) + + interp_values = interp_values.squeeze() + utc_values = self._holder(interp_values, tz='UTC') + interp_values = utc_values.tz_convert(self.values.tz) + + blocks = [self.make_block(interp_values, klass=self.__class__, + fastpath=True)] + return self._maybe_downcast(blocks, downcast) + class SparseBlock(NonConsolidatableMixIn, Block): """ implement as a list of sparse arrays of the same dtype """