diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index 86fc47dee09fc..80768945544f4 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -430,6 +430,7 @@ Conversion - +- Bug in :meth:`Series.interpolate` and :class:`DataFrame.interpolate` where ``dtype='datetime64[ns]'`` series and columns were ignored. (:issue:`19199`) - Bug in ``.astype()`` to non-ns timedelta units would hold the incorrect dtype (:issue:`19176`, :issue:`19223`, :issue:`12425`) - Bug in subtracting :class:`Series` from ``NaT`` incorrectly returning ``NaT`` (:issue:`19158`) - Bug in comparison of timezone-aware :class:`DatetimeIndex` against ``NaT`` incorrectly raising ``TypeError`` (:issue:`19276`) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 7ffef9c8a86d7..98fcde81e9e03 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -5151,8 +5151,11 @@ def interpolate(self, method='linear', axis=0, limit=None, inplace=False, raise ValueError("Only `method=linear` interpolation is supported " "on MultiIndexes.") - if _maybe_transposed_self._data.get_dtype_counts().get( - 'object') == len(_maybe_transposed_self.T): + dtype_counts = _maybe_transposed_self._data.get_dtype_counts() + if ('object' in dtype_counts and + dtype_counts.get('object') == len(_maybe_transposed_self.T)): + # Checking for 'object' lets us avoid sometimes-fragile tranpose + # call GH#19198 raise TypeError("Cannot interpolate with all NaNs.") # create/use the index diff --git a/pandas/core/internals.py b/pandas/core/internals.py index d95062c54b4c6..4392ce31bdf3c 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -1108,7 +1108,7 @@ def check_int_bool(self, inplace): # a fill na type method try: m = missing.clean_fill_method(method) - except: + except ValueError: m = None if m is not None: @@ -1123,7 +1123,7 @@ def check_int_bool(self, inplace): # try an interp method try: m = missing.clean_interp_method(method, **kwargs) - except: + except ValueError: m = None if m is not None: @@ -1182,24 +1182,9 @@ def _interpolate(self, method=None, index=None, values=None, if fill_value is None: fill_value = self.fill_value - if method in ('krogh', 'piecewise_polynomial', 'pchip'): - if not index.is_monotonic: - raise ValueError("{0} interpolation requires that the " - "index be monotonic.".format(method)) - # process 1-d slices in the axis direction - - def func(x): - - # process a 1-d slice, returning it - # should the axis argument be handled below in apply_along_axis? - # i.e. not an arg to missing.interpolate_1d - return missing.interpolate_1d(index, x, method=method, limit=limit, - limit_direction=limit_direction, - fill_value=fill_value, - bounds_error=False, **kwargs) - - # interp each column independently - interp_values = np.apply_along_axis(func, axis, data) + interp_values = _interpolate_values(method, data, index, axis, + limit, limit_direction, + fill_value, **kwargs) blocks = [self.make_block(interp_values, klass=self.__class__, fastpath=True)] @@ -2594,6 +2579,32 @@ def set(self, locs, values, check=False): self.values[locs] = values + def _interpolate(self, method=None, index=None, values=None, + fill_value=None, axis=0, limit=None, + limit_direction='forward', inplace=False, downcast=None, + mgr=None, **kwargs): + """ interpolate using scipy wrappers, adapted to datetime64 values""" + + inplace = validate_bool_kwarg(inplace, 'inplace') + data = self.values if inplace else self.values.copy() + + # only deal with floats + mask = isna(self.values) + data = data.astype(np.float64) + data[mask] = np.nan + + if fill_value is None: + fill_value = self.fill_value + + interp_values = _interpolate_values(method, data, index, axis, + limit, limit_direction, + fill_value, **kwargs) + interp_values = interp_values.astype(self.dtype) + + blocks = [self.make_block(interp_values, klass=self.__class__, + fastpath=True)] + return self._maybe_downcast(blocks, downcast) + class DatetimeTZBlock(NonConsolidatableMixIn, DatetimeBlock): """ implement a datetime64 block with a tz attribute """ @@ -2750,6 +2761,43 @@ def concat_same_type(self, to_concat, placement=None): return make_block( values, placement=placement or slice(0, len(values), 1)) + def _interpolate(self, method=None, index=None, values=None, + fill_value=None, axis=0, limit=None, + limit_direction='forward', inplace=False, downcast=None, + mgr=None, **kwargs): + """ interpolate using scipy wrappers, adapted to datetime64 values""" + + inplace = validate_bool_kwarg(inplace, 'inplace') + data = self.values if inplace else self.values.copy() + + # only deal with floats + mask = isna(self.values) + + # Convert to UTC for interpolation + data = data.tz_convert('UTC').values + + # data is 1D because it comes from a DatetimeIndex, but we need ndim + # to match self.ndim + data = data.reshape(self.shape) + mask = mask.reshape(self.shape) + data = data.astype(np.float64) + data[mask] = np.nan + + if fill_value is None: + fill_value = self.fill_value + + interp_values = _interpolate_values(method, data, index, axis, + limit, limit_direction, + fill_value, **kwargs) + + interp_values = interp_values.squeeze() + utc_values = self._holder(interp_values, tz='UTC') + interp_values = utc_values.tz_convert(self.values.tz) + + blocks = [self.make_block(interp_values, klass=self.__class__, + fastpath=True)] + return self._maybe_downcast(blocks, downcast) + class SparseBlock(NonConsolidatableMixIn, Block): """ implement as a list of sparse arrays of the same dtype """ @@ -5671,3 +5719,26 @@ def _preprocess_slice_or_indexer(slice_or_indexer, length, allow_fill): if not allow_fill: indexer = maybe_convert_indices(indexer, length) return 'fancy', indexer, len(indexer) + + +def _interpolate_values(method, data, index, axis, limit, limit_direction, + fill_value, **kwargs): + """interpolate using scipy wrappers""" + if method in ('krogh', 'piecewise_polynomial', 'pchip'): + if not index.is_monotonic: + raise ValueError("{0} interpolation requires that the " + "index be monotonic.".format(method)) + # process 1-d slices in the axis direction + + def func(x): + # process a 1-d slice, returning it + # should the axis argument be handled below in apply_along_axis? + # i.e. not an arg to missing.interpolate_1d + return missing.interpolate_1d(index, x, method=method, limit=limit, + limit_direction=limit_direction, + fill_value=fill_value, + bounds_error=False, **kwargs) + + # interp each column independently + interp_values = np.apply_along_axis(func, axis, data) + return interp_values diff --git a/pandas/tests/frame/test_missing.py b/pandas/tests/frame/test_missing.py index 2e4e8b9582cf6..f9db07ba2c67b 100644 --- a/pandas/tests/frame/test_missing.py +++ b/pandas/tests/frame/test_missing.py @@ -816,3 +816,16 @@ def test_interp_ignore_all_good(self): # all good result = df[['B', 'D']].interpolate(downcast=None) assert_frame_equal(result, df[['B', 'D']]) + + @pytest.mark.parametrize('tz', [None, 'US/Central']) + def test_interpolate_dt64_values(self, tz): + index = pd.Index([23, 26, 30]) + dti = pd.DatetimeIndex(['2015-09-23', '2015-09-26', '2015-09-30'], + tz=tz) + df = DataFrame(dti, index=index).reindex(range(23, 31)) + + dti_ex = pd.date_range('2015-09-23', '2015-09-30', tz=tz) + expected = DataFrame(dti_ex, index=df.index) + + result = df.interpolate() + assert_frame_equal(expected, result) diff --git a/pandas/tests/series/test_missing.py b/pandas/tests/series/test_missing.py index 0dc5e23184af7..a4bdcaa549b8b 100644 --- a/pandas/tests/series/test_missing.py +++ b/pandas/tests/series/test_missing.py @@ -1278,3 +1278,16 @@ def test_series_interpolate_intraday(self): result = ts.reindex(new_index).interpolate(method='time') tm.assert_numpy_array_equal(result.values, exp.values) + + @pytest.mark.parametrize('tz', [None, 'US/Central']) + def test_interpolate_dt64_values(self, tz): + index = pd.Index([23, 26, 30]) + dti = pd.DatetimeIndex(['2015-09-23', '2015-09-26', '2015-09-30'], + tz=tz) + ser = pd.Series(dti, index=index).reindex(range(23, 31)) + + dti_ex = pd.date_range('2015-09-23', '2015-09-30', tz=tz) + expected = pd.Series(dti_ex, index=ser.index) + + result = ser.interpolate() + tm.assert_series_equal(expected, result)