Skip to content

Commit a89f489

Browse files
committed
Merge pull request #11564 from kawochen/BUG-FIX-11349
BUG: GH11349 where Series.apply and Series.map did not box timedelta64
2 parents 030099a + 513c5c8 commit a89f489

File tree

8 files changed

+412
-73
lines changed

8 files changed

+412
-73
lines changed

doc/source/whatsnew/v0.18.0.txt

+59
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,64 @@ Backwards incompatible API changes
167167
- The parameter ``out`` has been removed from the ``Series.round()`` method. (:issue:`11763`)
168168
- ``DataFrame.round()`` leaves non-numeric columns unchanged in its return, rather than raises. (:issue:`11885`)
169169

170+
NaT and Timedelta operations
171+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
172+
173+
``NaT`` and ``Timedelta`` have expanded arithmetic operations, which are extended to ``Series``
174+
arithmetic where applicable. Operations defined for ``datetime64[ns]`` or ``timedelta64[ns]``
175+
are now also defined for ``NaT`` (:issue:`11564`).
176+
177+
``NaT`` now supports arithmetic operations with integers and floats.
178+
179+
.. ipython:: python
180+
181+
pd.NaT * 1
182+
pd.NaT * 1.5
183+
pd.NaT / 2
184+
pd.NaT * np.nan
185+
186+
``NaT`` defines more arithmetic operations with ``datetime64[ns]`` and ``timedelta64[ns]``.
187+
188+
.. ipython:: python
189+
190+
pd.NaT / pd.NaT
191+
pd.Timedelta('1s') / pd.NaT
192+
193+
``NaT`` may represent either a ``datetime64[ns]`` null or a ``timedelta64[ns]`` null.
194+
Given the ambiguity, it is treated as a `timedelta64[ns]`, which allows more operations
195+
to succeed.
196+
197+
.. ipython:: python
198+
:okexcept:
199+
200+
pd.NaT + pd.NaT
201+
# same as
202+
pd.Timedelta('1s') + pd.Timedelta('1s')
203+
# as opposed to
204+
pd.Timestamp('1990315') + pd.Timestamp('19900315')
205+
206+
However, when wrapped in a ``Series`` whose ``dtype`` is ``datetime64[ns]`` or ``timedelta64[ns]``,
207+
the ``dtype`` information is respected.
208+
209+
.. ipython:: python
210+
211+
pd.Series([pd.NaT], dtype='<M8[ns]') + pd.Series([pd.NaT], dtype='<M8[ns]')
212+
pd.Series([pd.NaT], dtype='<m8[ns]') + pd.Series([pd.NaT], dtype='<m8[ns]')
213+
214+
``Timedelta`` division by ``float``s now works.
215+
216+
.. ipython:: python
217+
218+
pd.Timedelta('1s') / 2.0
219+
220+
Subtraction by ``Timedelta`` in a ``Series`` by a ``Timestamp`` works (:issue:`11925`)
221+
222+
.. ipython:: python
223+
224+
ser = pd.Series(pd.timedelta_range('1 day', periods=3))
225+
ser
226+
pd.Timestamp('2012-01-01') - ser
227+
170228
Bug in QuarterBegin with n=0
171229
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
172230

@@ -312,6 +370,7 @@ Bug Fixes
312370
- Bug in ``DataFrame.info`` when duplicated column names exist (:issue:`11761`)
313371
- Bug in ``.copy`` of datetime tz-aware objects (:issue:`11794`)
314372

373+
- Bug in ``Series.apply`` and ``Series.map`` where ``timedelta64`` was not boxed (:issue:`11349`)
315374

316375

317376

pandas/core/ops.py

+60-34
Original file line numberDiff line numberDiff line change
@@ -291,6 +291,7 @@ def __init__(self, left, right, name, na_op):
291291
self.is_datetime64tz_lhs = is_datetime64tz_dtype(lvalues)
292292
self.is_datetime_lhs = self.is_datetime64_lhs or self.is_datetime64tz_lhs
293293
self.is_integer_lhs = left.dtype.kind in ['i', 'u']
294+
self.is_floating_lhs = left.dtype.kind == 'f'
294295

295296
# right
296297
self.right = right
@@ -300,32 +301,25 @@ def __init__(self, left, right, name, na_op):
300301
self.is_datetime_rhs = self.is_datetime64_rhs or self.is_datetime64tz_rhs
301302
self.is_timedelta_rhs = is_timedelta64_dtype(rvalues)
302303
self.is_integer_rhs = rvalues.dtype.kind in ('i', 'u')
304+
self.is_floating_rhs = rvalues.dtype.kind == 'f'
303305

304306
self._validate(lvalues, rvalues, name)
305307
self.lvalues, self.rvalues = self._convert_for_datetime(lvalues, rvalues)
306308

307309
def _validate(self, lvalues, rvalues, name):
308310
# timedelta and integer mul/div
309311

310-
if (self.is_timedelta_lhs and self.is_integer_rhs) or (
311-
self.is_integer_lhs and self.is_timedelta_rhs):
312+
if (self.is_timedelta_lhs and
313+
(self.is_integer_rhs or self.is_floating_rhs)) or (
314+
self.is_timedelta_rhs and
315+
(self.is_integer_lhs or self.is_floating_lhs)):
312316

313-
if name not in ('__div__', '__truediv__', '__mul__'):
317+
if name not in ('__div__', '__truediv__', '__mul__', '__rmul__'):
314318
raise TypeError("can only operate on a timedelta and an "
315-
"integer for division, but the operator [%s]"
316-
"was passed" % name)
319+
"integer or a float for division and "
320+
"multiplication, but the operator [%s] was"
321+
"passed" % name)
317322

318-
# 2 datetimes
319-
elif self.is_datetime_lhs and self.is_datetime_rhs:
320-
321-
if name not in ('__sub__','__rsub__'):
322-
raise TypeError("can only operate on a datetimes for"
323-
" subtraction, but the operator [%s] was"
324-
" passed" % name)
325-
326-
# if tz's must be equal (same or None)
327-
if getattr(lvalues,'tz',None) != getattr(rvalues,'tz',None):
328-
raise ValueError("Incompatbile tz's on datetime subtraction ops")
329323

330324
# 2 timedeltas
331325
elif ((self.is_timedelta_lhs and
@@ -339,6 +333,7 @@ def _validate(self, lvalues, rvalues, name):
339333
"addition, subtraction, and division, but the"
340334
" operator [%s] was passed" % name)
341335

336+
342337
# datetime and timedelta/DateOffset
343338
elif (self.is_datetime_lhs and
344339
(self.is_timedelta_rhs or self.is_offset_rhs)):
@@ -349,6 +344,28 @@ def _validate(self, lvalues, rvalues, name):
349344
" but the operator [%s] was passed" %
350345
name)
351346

347+
elif (self.is_datetime_rhs and
348+
(self.is_timedelta_lhs or self.is_offset_lhs)):
349+
if name not in ('__add__', '__radd__', '__rsub__'):
350+
raise TypeError("can only operate on a timedelta/DateOffset with a rhs of"
351+
" a datetime for addition,"
352+
" but the operator [%s] was passed" %
353+
name)
354+
355+
356+
# 2 datetimes
357+
elif self.is_datetime_lhs and self.is_datetime_rhs:
358+
359+
if name not in ('__sub__','__rsub__'):
360+
raise TypeError("can only operate on a datetimes for"
361+
" subtraction, but the operator [%s] was"
362+
" passed" % name)
363+
364+
# if tz's must be equal (same or None)
365+
if getattr(lvalues,'tz',None) != getattr(rvalues,'tz',None):
366+
raise ValueError("Incompatbile tz's on datetime subtraction ops")
367+
368+
352369
elif ((self.is_timedelta_lhs or self.is_offset_lhs)
353370
and self.is_datetime_rhs):
354371

@@ -357,7 +374,7 @@ def _validate(self, lvalues, rvalues, name):
357374
" a datetime for addition, but the operator"
358375
" [%s] was passed" % name)
359376
else:
360-
raise TypeError('cannot operate on a series with out a rhs '
377+
raise TypeError('cannot operate on a series without a rhs '
361378
'of a series/ndarray of type datetime64[ns] '
362379
'or a timedelta')
363380

@@ -366,17 +383,25 @@ def _convert_to_array(self, values, name=None, other=None):
366383
from pandas.tseries.timedeltas import to_timedelta
367384

368385
ovalues = values
386+
supplied_dtype = None
369387
if not is_list_like(values):
370388
values = np.array([values])
371-
372-
inferred_type = lib.infer_dtype(values)
373-
374-
if inferred_type in ('datetime64', 'datetime', 'date', 'time'):
389+
# if this is a Series that contains relevant dtype info, then use this
390+
# instead of the inferred type; this avoids coercing Series([NaT],
391+
# dtype='datetime64[ns]') to Series([NaT], dtype='timedelta64[ns]')
392+
elif isinstance(values, pd.Series) and (
393+
is_timedelta64_dtype(values) or is_datetime64_dtype(values)):
394+
supplied_dtype = values.dtype
395+
inferred_type = supplied_dtype or lib.infer_dtype(values)
396+
if (inferred_type in ('datetime64', 'datetime', 'date', 'time')
397+
or com.is_datetimetz(inferred_type)):
375398
# if we have a other of timedelta, but use pd.NaT here we
376399
# we are in the wrong path
377-
if (other is not None and other.dtype == 'timedelta64[ns]' and
378-
all(isnull(v) for v in values)):
379-
values = np.empty(values.shape, dtype=other.dtype)
400+
if (supplied_dtype is None
401+
and other is not None
402+
and (other.dtype in ('timedelta64[ns]', 'datetime64[ns]'))
403+
and isnull(values).all()):
404+
values = np.empty(values.shape, dtype='timedelta64[ns]')
380405
values[:] = iNaT
381406

382407
# a datelike
@@ -401,18 +426,15 @@ def _convert_to_array(self, values, name=None, other=None):
401426
values = values.astype('timedelta64[ns]')
402427
elif isinstance(values, pd.PeriodIndex):
403428
values = values.to_timestamp().to_series()
404-
elif name not in ('__truediv__', '__div__', '__mul__'):
429+
elif name not in ('__truediv__', '__div__', '__mul__', '__rmul__'):
405430
raise TypeError("incompatible type for a datetime/timedelta "
406431
"operation [{0}]".format(name))
407432
elif inferred_type == 'floating':
408-
# all nan, so ok, use the other dtype (e.g. timedelta or datetime)
409-
if isnull(values).all():
433+
if isnull(values).all() and name in ('__add__', '__radd__',
434+
'__sub__', '__rsub__'):
410435
values = np.empty(values.shape, dtype=other.dtype)
411436
values[:] = iNaT
412-
else:
413-
raise TypeError(
414-
'incompatible type [{0}] for a datetime/timedelta '
415-
'operation'.format(np.array(values).dtype))
437+
return values
416438
elif self._is_offset(values):
417439
return values
418440
else:
@@ -431,7 +453,10 @@ def _convert_for_datetime(self, lvalues, rvalues):
431453

432454
# datetime subtraction means timedelta
433455
if self.is_datetime_lhs and self.is_datetime_rhs:
434-
self.dtype = 'timedelta64[ns]'
456+
if self.name in ('__sub__', '__rsub__'):
457+
self.dtype = 'timedelta64[ns]'
458+
else:
459+
self.dtype = 'datetime64[ns]'
435460
elif self.is_datetime64tz_lhs:
436461
self.dtype = lvalues.dtype
437462
elif self.is_datetime64tz_rhs:
@@ -482,7 +507,8 @@ def _offset(lvalues, rvalues):
482507
rvalues = to_timedelta(rvalues)
483508

484509
lvalues = lvalues.astype(np.int64)
485-
rvalues = rvalues.astype(np.int64)
510+
if not self.is_floating_rhs:
511+
rvalues = rvalues.astype(np.int64)
486512

487513
# time delta division -> unit less
488514
# integer gets converted to timedelta in np < 1.6
@@ -580,7 +606,7 @@ def wrapper(left, right, name=name, na_op=na_op):
580606
lvalues, rvalues = left, right
581607
dtype = None
582608
wrap_results = lambda x: x
583-
elif time_converted == NotImplemented:
609+
elif time_converted is NotImplemented:
584610
return NotImplemented
585611
else:
586612
left, right = time_converted.left, time_converted.right

pandas/core/series.py

+6-4
Original file line numberDiff line numberDiff line change
@@ -2078,8 +2078,9 @@ def map(self, arg, na_action=None):
20782078
same index as caller
20792079
"""
20802080
values = self._values
2081-
if com.is_datetime64_dtype(values.dtype):
2082-
values = lib.map_infer(values, lib.Timestamp)
2081+
if needs_i8_conversion(values.dtype):
2082+
boxer = i8_boxer(values)
2083+
values = lib.map_infer(values, boxer)
20832084

20842085
if na_action == 'ignore':
20852086
mask = isnull(values)
@@ -2210,8 +2211,9 @@ def apply(self, func, convert_dtype=True, args=(), **kwds):
22102211
return f(self)
22112212

22122213
values = _values_from_object(self)
2213-
if com.is_datetime64_dtype(values.dtype):
2214-
values = lib.map_infer(values, lib.Timestamp)
2214+
if needs_i8_conversion(values.dtype):
2215+
boxer = i8_boxer(values)
2216+
values = lib.map_infer(values, boxer)
22152217

22162218
mapped = lib.map_infer(values, f, convert=convert_dtype)
22172219
if len(mapped) and isinstance(mapped[0], Series):

0 commit comments

Comments
 (0)