Skip to content

Fix (Series|DataFrame).interpolate for datetime dtypes #19291

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 6 commits into from
Closed
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.23.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -429,6 +429,7 @@ Conversion


-
- Bug in :meth:`Series.interpolate` and :class:`DataFrame.interpolate` where ``dtype='datetime64[ns]'`` series and columns were ignored. (:issue:`19199`)
- Bug in ``.astype()`` to non-ns timedelta units would hold the incorrect dtype (:issue:`19176`, :issue:`19223`, :issue:`12425`)
- Bug in subtracting :class:`Series` from ``NaT`` incorrectly returning ``NaT`` (:issue:`19158`)
- Bug in comparison of timezone-aware :class:`DatetimeIndex` against ``NaT`` incorrectly raising ``TypeError`` (:issue:`19276`)
Expand Down
7 changes: 5 additions & 2 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -5151,8 +5151,11 @@ def interpolate(self, method='linear', axis=0, limit=None, inplace=False,
raise ValueError("Only `method=linear` interpolation is supported "
"on MultiIndexes.")

if _maybe_transposed_self._data.get_dtype_counts().get(
'object') == len(_maybe_transposed_self.T):
dtype_counts = _maybe_transposed_self._data.get_dtype_counts()
if ('object' in dtype_counts and
dtype_counts.get('object') == len(_maybe_transposed_self.T)):
# Checking for 'object' lets us avoid sometimes-fragile tranpose
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

huh? where are you testing this

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a workaround until #19198 is fixed:

dti = pd.date_range('2016-01-01', periods=3, tz='US/Pacific').insert(1, pd.NaT)
ser = pd.Series(dti)
df = ser.to_frame()

>>> df.interpolate()
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  File "pandas/core/generic.py", line 5155, in interpolate
    'object') == len(_maybe_transposed_self.T):
  File "pandas/core/frame.py", line 1941, in transpose
    return super(DataFrame, self).transpose(1, 0, **kwargs)
  File "pandas/core/generic.py", line 616, in transpose
    new_values = self.values.transpose(axes_numbers)
  File "pandas/core/base.py", line 701, in transpose
    nv.validate_transpose(args, kwargs)
  File "pandas/compat/numpy/function.py", line 54, in __call__
    self.defaults)
  File "pandas/util/_validators.py", line 218, in validate_args_and_kwargs
    validate_kwargs(fname, kwargs, compat_args)
  File "pandas/util/_validators.py", line 157, in validate_kwargs
    _check_for_default_values(fname, kwds, compat_args)
  File "pandas/util/_validators.py", line 69, in _check_for_default_values
    format(fname=fname, arg=key)))
ValueError: the 'axes' parameter is not supported in the pandas implementation of transpose()

(ser.interpolate doesn't raise, just forgets to interpolate)

# call GH#19198
raise TypeError("Cannot interpolate with all NaNs.")

# create/use the index
Expand Down
86 changes: 66 additions & 20 deletions pandas/core/internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -1106,7 +1106,7 @@ def check_int_bool(self, inplace):
# a fill na type method
try:
m = missing.clean_fill_method(method)
except:
except ValueError:
m = None

if m is not None:
Expand All @@ -1121,7 +1121,7 @@ def check_int_bool(self, inplace):
# try an interp method
try:
m = missing.clean_interp_method(method, **kwargs)
except:
except ValueError:
m = None

if m is not None:
Expand Down Expand Up @@ -1180,24 +1180,9 @@ def _interpolate(self, method=None, index=None, values=None,
if fill_value is None:
fill_value = self.fill_value

if method in ('krogh', 'piecewise_polynomial', 'pchip'):
if not index.is_monotonic:
raise ValueError("{0} interpolation requires that the "
"index be monotonic.".format(method))
# process 1-d slices in the axis direction

def func(x):

# process a 1-d slice, returning it
# should the axis argument be handled below in apply_along_axis?
# i.e. not an arg to missing.interpolate_1d
return missing.interpolate_1d(index, x, method=method, limit=limit,
limit_direction=limit_direction,
fill_value=fill_value,
bounds_error=False, **kwargs)

# interp each column independently
interp_values = np.apply_along_axis(func, axis, data)
interp_values = _interpolate_values(method, data, index, axis,
limit, limit_direction,
fill_value, **kwargs)

blocks = [self.make_block(interp_values, klass=self.__class__,
fastpath=True)]
Expand Down Expand Up @@ -2592,6 +2577,44 @@ def set(self, locs, values, check=False):

self.values[locs] = values

def _interpolate(self, method=None, index=None, values=None,
fill_value=None, axis=0, limit=None,
limit_direction='forward', inplace=False, downcast=None,
mgr=None, **kwargs):
""" interpolate using scipy wrappers, adapted to datetime64 values"""

inplace = validate_bool_kwarg(inplace, 'inplace')
data = self.values if inplace else self.values.copy()

# only deal with floats
mask = isna(self.values)
if self.is_datetimetz:
# Convert to UTC for interpolation
data = data.tz_convert('UTC').values
if self.ndim > 1:
# DataFrame
data = np.atleast_2d(data)
mask = np.atleast_2d(mask)
data = data.astype(np.float64)
data[mask] = np.nan

Copy link
Contributor

@jreback jreback Jan 18, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ths is a hodgepodge. use the hierarchy to make this code readable. IOW put the datetime stuff in the datetimeblock.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is in the DatetimeBlock. I'm confused.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

then why would you need is_datetime.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this needs to be be way less if/then stuff.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

then why would you need is_datetime.

It's specifically is_datetimetz; the tzaware case gets slightly different handling. If you're suggesting a separate implementation for DatetimeTZBlock vs DatetimeBlock I guess that's OK, but pretty sure there'll be complaints about code duplication that way.

if fill_value is None:
fill_value = self.fill_value

interp_values = _interpolate_values(method, data, index, axis,
limit, limit_direction,
fill_value, **kwargs)
if self.is_datetimetz:
interp_values = interp_values.squeeze()
utc_values = self._holder(interp_values, tz='UTC')
interp_values = utc_values.tz_convert(self.values.tz)
else:
interp_values = interp_values.astype(self.dtype)

blocks = [self.make_block(interp_values, klass=self.__class__,
fastpath=True)]
return self._maybe_downcast(blocks, downcast)


class DatetimeTZBlock(NonConsolidatableMixIn, DatetimeBlock):
""" implement a datetime64 block with a tz attribute """
Expand Down Expand Up @@ -5675,3 +5698,26 @@ def _preprocess_slice_or_indexer(slice_or_indexer, length, allow_fill):
if not allow_fill:
indexer = maybe_convert_indices(indexer, length)
return 'fancy', indexer, len(indexer)


def _interpolate_values(method, data, index, axis, limit, limit_direction,
fill_value, **kwargs):
"""interpolate using scipy wrappers"""
if method in ('krogh', 'piecewise_polynomial', 'pchip'):
if not index.is_monotonic:
raise ValueError("{0} interpolation requires that the "
"index be monotonic.".format(method))
# process 1-d slices in the axis direction

def func(x):
# process a 1-d slice, returning it
# should the axis argument be handled below in apply_along_axis?
# i.e. not an arg to missing.interpolate_1d
return missing.interpolate_1d(index, x, method=method, limit=limit,
limit_direction=limit_direction,
fill_value=fill_value,
bounds_error=False, **kwargs)

# interp each column independently
interp_values = np.apply_along_axis(func, axis, data)
return interp_values
13 changes: 13 additions & 0 deletions pandas/tests/frame/test_missing.py
Original file line number Diff line number Diff line change
Expand Up @@ -816,3 +816,16 @@ def test_interp_ignore_all_good(self):
# all good
result = df[['B', 'D']].interpolate(downcast=None)
assert_frame_equal(result, df[['B', 'D']])

@pytest.mark.parametrize('tz', [None, 'US/Central'])
def test_interpolate_dt64_values(self, tz):
index = pd.Index([23, 26, 30])
dti = pd.DatetimeIndex(['2015-09-23', '2015-09-26', '2015-09-30'],
tz=tz)
df = DataFrame(dti, index=index).reindex(range(23, 31))

dti_ex = pd.date_range('2015-09-23', '2015-09-30', tz=tz)
expected = DataFrame(dti_ex, index=df.index)

result = df.interpolate()
assert_frame_equal(expected, result)
13 changes: 13 additions & 0 deletions pandas/tests/series/test_missing.py
Original file line number Diff line number Diff line change
Expand Up @@ -1278,3 +1278,16 @@ def test_series_interpolate_intraday(self):
result = ts.reindex(new_index).interpolate(method='time')

tm.assert_numpy_array_equal(result.values, exp.values)

@pytest.mark.parametrize('tz', [None, 'US/Central'])
def test_interpolate_dt64_values(self, tz):
index = pd.Index([23, 26, 30])
dti = pd.DatetimeIndex(['2015-09-23', '2015-09-26', '2015-09-30'],
tz=tz)
ser = pd.Series(dti, index=index).reindex(range(23, 31))

dti_ex = pd.date_range('2015-09-23', '2015-09-30', tz=tz)
expected = pd.Series(dti_ex, index=ser.index)

result = ser.interpolate()
tm.assert_series_equal(expected, result)