Skip to content

BUG: GH11349 where Series.apply and Series.map did not box timedelta64 #11564

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Dec 31, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 59 additions & 0 deletions doc/source/whatsnew/v0.18.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,64 @@ Backwards incompatible API changes
- The parameter ``out`` has been removed from the ``Series.round()`` method. (:issue:`11763`)
- ``DataFrame.round()`` leaves non-numeric columns unchanged in its return, rather than raises. (:issue:`11885`)

NaT and Timedelta operations
^^^^^^^^^^^^^^^^^^^^^^^^^^^^

``NaT`` and ``Timedelta`` have expanded arithmetic operations, which are extended to ``Series``
arithmetic where applicable. Operations defined for ``datetime64[ns]`` or ``timedelta64[ns]``
are now also defined for ``NaT`` (:issue:`11564`).

``NaT`` now supports arithmetic operations with integers and floats.

.. ipython:: python

pd.NaT * 1
pd.NaT * 1.5
pd.NaT / 2
pd.NaT * np.nan

``NaT`` defines more arithmetic operations with ``datetime64[ns]`` and ``timedelta64[ns]``.

.. ipython:: python

pd.NaT / pd.NaT
pd.Timedelta('1s') / pd.NaT

``NaT`` may represent either a ``datetime64[ns]`` null or a ``timedelta64[ns]`` null.
Given the ambiguity, it is treated as a `timedelta64[ns]`, which allows more operations
to succeed.

.. ipython:: python
:okexcept:

pd.NaT + pd.NaT
# same as
pd.Timedelta('1s') + pd.Timedelta('1s')
# as opposed to
pd.Timestamp('1990315') + pd.Timestamp('19900315')

However, when wrapped in a ``Series`` whose ``dtype`` is ``datetime64[ns]`` or ``timedelta64[ns]``,
the ``dtype`` information is respected.

.. ipython:: python

pd.Series([pd.NaT], dtype='<M8[ns]') + pd.Series([pd.NaT], dtype='<M8[ns]')
pd.Series([pd.NaT], dtype='<m8[ns]') + pd.Series([pd.NaT], dtype='<m8[ns]')

``Timedelta`` division by ``float``s now works.

.. ipython:: python

pd.Timedelta('1s') / 2.0

Subtraction by ``Timedelta`` in a ``Series`` by a ``Timestamp`` works (:issue:`11925`)

.. ipython:: python

ser = pd.Series(pd.timedelta_range('1 day', periods=3))
ser
pd.Timestamp('2012-01-01') - ser

Bug in QuarterBegin with n=0
^^^^^^^^^^^^^^^^^^^^^^^^^^^^

Expand Down Expand Up @@ -312,6 +370,7 @@ Bug Fixes
- Bug in ``DataFrame.info`` when duplicated column names exist (:issue:`11761`)
- Bug in ``.copy`` of datetime tz-aware objects (:issue:`11794`)

- Bug in ``Series.apply`` and ``Series.map`` where ``timedelta64`` was not boxed (:issue:`11349`)



Expand Down
94 changes: 60 additions & 34 deletions pandas/core/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -291,6 +291,7 @@ def __init__(self, left, right, name, na_op):
self.is_datetime64tz_lhs = is_datetime64tz_dtype(lvalues)
self.is_datetime_lhs = self.is_datetime64_lhs or self.is_datetime64tz_lhs
self.is_integer_lhs = left.dtype.kind in ['i', 'u']
self.is_floating_lhs = left.dtype.kind == 'f'

# right
self.right = right
Expand All @@ -300,32 +301,25 @@ def __init__(self, left, right, name, na_op):
self.is_datetime_rhs = self.is_datetime64_rhs or self.is_datetime64tz_rhs
self.is_timedelta_rhs = is_timedelta64_dtype(rvalues)
self.is_integer_rhs = rvalues.dtype.kind in ('i', 'u')
self.is_floating_rhs = rvalues.dtype.kind == 'f'

self._validate(lvalues, rvalues, name)
self.lvalues, self.rvalues = self._convert_for_datetime(lvalues, rvalues)

def _validate(self, lvalues, rvalues, name):
# timedelta and integer mul/div

if (self.is_timedelta_lhs and self.is_integer_rhs) or (
self.is_integer_lhs and self.is_timedelta_rhs):
if (self.is_timedelta_lhs and
(self.is_integer_rhs or self.is_floating_rhs)) or (
self.is_timedelta_rhs and
(self.is_integer_lhs or self.is_floating_lhs)):

if name not in ('__div__', '__truediv__', '__mul__'):
if name not in ('__div__', '__truediv__', '__mul__', '__rmul__'):
raise TypeError("can only operate on a timedelta and an "
"integer for division, but the operator [%s]"
"was passed" % name)
"integer or a float for division and "
"multiplication, but the operator [%s] was"
"passed" % name)

# 2 datetimes
elif self.is_datetime_lhs and self.is_datetime_rhs:

if name not in ('__sub__','__rsub__'):
raise TypeError("can only operate on a datetimes for"
" subtraction, but the operator [%s] was"
" passed" % name)

# if tz's must be equal (same or None)
if getattr(lvalues,'tz',None) != getattr(rvalues,'tz',None):
raise ValueError("Incompatbile tz's on datetime subtraction ops")

# 2 timedeltas
elif ((self.is_timedelta_lhs and
Expand All @@ -339,6 +333,7 @@ def _validate(self, lvalues, rvalues, name):
"addition, subtraction, and division, but the"
" operator [%s] was passed" % name)


# datetime and timedelta/DateOffset
elif (self.is_datetime_lhs and
(self.is_timedelta_rhs or self.is_offset_rhs)):
Expand All @@ -349,6 +344,28 @@ def _validate(self, lvalues, rvalues, name):
" but the operator [%s] was passed" %
name)

elif (self.is_datetime_rhs and
(self.is_timedelta_lhs or self.is_offset_lhs)):
if name not in ('__add__', '__radd__', '__rsub__'):
raise TypeError("can only operate on a timedelta/DateOffset with a rhs of"
" a datetime for addition,"
" but the operator [%s] was passed" %
name)


# 2 datetimes
elif self.is_datetime_lhs and self.is_datetime_rhs:

if name not in ('__sub__','__rsub__'):
raise TypeError("can only operate on a datetimes for"
" subtraction, but the operator [%s] was"
" passed" % name)

# if tz's must be equal (same or None)
if getattr(lvalues,'tz',None) != getattr(rvalues,'tz',None):
raise ValueError("Incompatbile tz's on datetime subtraction ops")


elif ((self.is_timedelta_lhs or self.is_offset_lhs)
and self.is_datetime_rhs):

Expand All @@ -357,7 +374,7 @@ def _validate(self, lvalues, rvalues, name):
" a datetime for addition, but the operator"
" [%s] was passed" % name)
else:
raise TypeError('cannot operate on a series with out a rhs '
raise TypeError('cannot operate on a series without a rhs '
'of a series/ndarray of type datetime64[ns] '
'or a timedelta')

Expand All @@ -366,17 +383,25 @@ def _convert_to_array(self, values, name=None, other=None):
from pandas.tseries.timedeltas import to_timedelta

ovalues = values
supplied_dtype = None
if not is_list_like(values):
values = np.array([values])

inferred_type = lib.infer_dtype(values)

if inferred_type in ('datetime64', 'datetime', 'date', 'time'):
# if this is a Series that contains relevant dtype info, then use this
# instead of the inferred type; this avoids coercing Series([NaT],
# dtype='datetime64[ns]') to Series([NaT], dtype='timedelta64[ns]')
elif isinstance(values, pd.Series) and (
is_timedelta64_dtype(values) or is_datetime64_dtype(values)):
supplied_dtype = values.dtype
inferred_type = supplied_dtype or lib.infer_dtype(values)
if (inferred_type in ('datetime64', 'datetime', 'date', 'time')
or com.is_datetimetz(inferred_type)):
# if we have a other of timedelta, but use pd.NaT here we
# we are in the wrong path
if (other is not None and other.dtype == 'timedelta64[ns]' and
all(isnull(v) for v in values)):
values = np.empty(values.shape, dtype=other.dtype)
if (supplied_dtype is None
and other is not None
and (other.dtype in ('timedelta64[ns]', 'datetime64[ns]'))
and isnull(values).all()):
values = np.empty(values.shape, dtype='timedelta64[ns]')
values[:] = iNaT

# a datelike
Expand All @@ -401,18 +426,15 @@ def _convert_to_array(self, values, name=None, other=None):
values = values.astype('timedelta64[ns]')
elif isinstance(values, pd.PeriodIndex):
values = values.to_timestamp().to_series()
elif name not in ('__truediv__', '__div__', '__mul__'):
elif name not in ('__truediv__', '__div__', '__mul__', '__rmul__'):
raise TypeError("incompatible type for a datetime/timedelta "
"operation [{0}]".format(name))
elif inferred_type == 'floating':
# all nan, so ok, use the other dtype (e.g. timedelta or datetime)
if isnull(values).all():
if isnull(values).all() and name in ('__add__', '__radd__',
'__sub__', '__rsub__'):
values = np.empty(values.shape, dtype=other.dtype)
values[:] = iNaT
else:
raise TypeError(
'incompatible type [{0}] for a datetime/timedelta '
'operation'.format(np.array(values).dtype))
return values
elif self._is_offset(values):
return values
else:
Expand All @@ -431,7 +453,10 @@ def _convert_for_datetime(self, lvalues, rvalues):

# datetime subtraction means timedelta
if self.is_datetime_lhs and self.is_datetime_rhs:
self.dtype = 'timedelta64[ns]'
if self.name in ('__sub__', '__rsub__'):
self.dtype = 'timedelta64[ns]'
else:
self.dtype = 'datetime64[ns]'
elif self.is_datetime64tz_lhs:
self.dtype = lvalues.dtype
elif self.is_datetime64tz_rhs:
Expand Down Expand Up @@ -482,7 +507,8 @@ def _offset(lvalues, rvalues):
rvalues = to_timedelta(rvalues)

lvalues = lvalues.astype(np.int64)
rvalues = rvalues.astype(np.int64)
if not self.is_floating_rhs:
rvalues = rvalues.astype(np.int64)

# time delta division -> unit less
# integer gets converted to timedelta in np < 1.6
Expand Down Expand Up @@ -580,7 +606,7 @@ def wrapper(left, right, name=name, na_op=na_op):
lvalues, rvalues = left, right
dtype = None
wrap_results = lambda x: x
elif time_converted == NotImplemented:
elif time_converted is NotImplemented:
return NotImplemented
else:
left, right = time_converted.left, time_converted.right
Expand Down
10 changes: 6 additions & 4 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -2078,8 +2078,9 @@ def map(self, arg, na_action=None):
same index as caller
"""
values = self._values
if com.is_datetime64_dtype(values.dtype):
values = lib.map_infer(values, lib.Timestamp)
if needs_i8_conversion(values.dtype):
boxer = i8_boxer(values)
values = lib.map_infer(values, boxer)

if na_action == 'ignore':
mask = isnull(values)
Expand Down Expand Up @@ -2210,8 +2211,9 @@ def apply(self, func, convert_dtype=True, args=(), **kwds):
return f(self)

values = _values_from_object(self)
if com.is_datetime64_dtype(values.dtype):
values = lib.map_infer(values, lib.Timestamp)
if needs_i8_conversion(values.dtype):
boxer = i8_boxer(values)
values = lib.map_infer(values, boxer)

mapped = lib.map_infer(values, f, convert=convert_dtype)
if len(mapped) and isinstance(mapped[0], Series):
Expand Down
Loading