Skip to content

Commit 3198b9d

Browse files
jbrockmendeljreback
authored andcommitted
dispatch Series[datetime64] ops to DatetimeIndex (#19024)
1 parent 61ed3e5 commit 3198b9d

File tree

4 files changed

+58
-124
lines changed

4 files changed

+58
-124
lines changed

doc/source/whatsnew/v0.23.0.txt

+3
Original file line numberDiff line numberDiff line change
@@ -208,6 +208,9 @@ Other API Changes
208208
- In :func:`read_excel`, the ``comment`` argument is now exposed as a named parameter (:issue:`18735`)
209209
- Rearranged the order of keyword arguments in :func:`read_excel()` to align with :func:`read_csv()` (:issue:`16672`)
210210
- The options ``html.border`` and ``mode.use_inf_as_null`` were deprecated in prior versions, these will now show ``FutureWarning`` rather than a ``DeprecationWarning`` (:issue:`19003`)
211+
- Subtracting ``NaT`` from a :class:`Series` with ``dtype='datetime64[ns]'`` returns a ``Series`` with ``dtype='timedelta64[ns]'`` instead of ``dtype='datetime64[ns]'``(:issue:`18808`)
212+
- Operations between a :class:`Series` with dtype ``dtype='datetime64[ns]'`` and a :class:`PeriodIndex` will correctly raises ``TypeError`` (:issue:`18850`)
213+
- Subtraction of :class:`Series` with timezone-aware ``dtype='datetime64[ns]'`` with mis-matched timezones will raise ``TypeError`` instead of ``ValueError`` (issue:`18817`)
211214

212215
.. _whatsnew_0230.deprecations:
213216

pandas/core/ops.py

+29-117
Original file line numberDiff line numberDiff line change
@@ -341,10 +341,8 @@ def get_op(cls, left, right, name, na_op):
341341
normal numpy path.
342342
"""
343343
is_timedelta_lhs = is_timedelta64_dtype(left)
344-
is_datetime_lhs = (is_datetime64_dtype(left) or
345-
is_datetime64tz_dtype(left))
346344

347-
if not (is_datetime_lhs or is_timedelta_lhs):
345+
if not is_timedelta_lhs:
348346
return _Op(left, right, name, na_op)
349347
else:
350348
return _TimeOp(left, right, name, na_op)
@@ -364,14 +362,8 @@ def __init__(self, left, right, name, na_op):
364362
rvalues = self._convert_to_array(right, name=name, other=lvalues)
365363

366364
# left
367-
self.is_offset_lhs = is_offsetlike(left)
368365
self.is_timedelta_lhs = is_timedelta64_dtype(lvalues)
369-
self.is_datetime64_lhs = is_datetime64_dtype(lvalues)
370-
self.is_datetime64tz_lhs = is_datetime64tz_dtype(lvalues)
371-
self.is_datetime_lhs = (self.is_datetime64_lhs or
372-
self.is_datetime64tz_lhs)
373-
self.is_integer_lhs = left.dtype.kind in ['i', 'u']
374-
self.is_floating_lhs = left.dtype.kind == 'f'
366+
assert self.is_timedelta_lhs
375367

376368
# right
377369
self.is_offset_rhs = is_offsetlike(right)
@@ -387,34 +379,6 @@ def __init__(self, left, right, name, na_op):
387379
self.lvalues, self.rvalues = self._convert_for_datetime(lvalues,
388380
rvalues)
389381

390-
def _validate_datetime(self, lvalues, rvalues, name):
391-
# assumes self.is_datetime_lhs
392-
393-
if (self.is_timedelta_rhs or self.is_offset_rhs):
394-
# datetime and timedelta/DateOffset
395-
if name not in ('__add__', '__radd__', '__sub__'):
396-
raise TypeError("can only operate on a datetime with a rhs of "
397-
"a timedelta/DateOffset for addition and "
398-
"subtraction, but the operator [{name}] was "
399-
"passed".format(name=name))
400-
401-
elif self.is_datetime_rhs:
402-
# 2 datetimes
403-
if name not in ('__sub__', '__rsub__'):
404-
raise TypeError("can only operate on a datetimes for"
405-
" subtraction, but the operator [{name}] was"
406-
" passed".format(name=name))
407-
408-
# if tz's must be equal (same or None)
409-
if getattr(lvalues, 'tz', None) != getattr(rvalues, 'tz', None):
410-
raise ValueError("Incompatible tz's on datetime subtraction "
411-
"ops")
412-
413-
else:
414-
raise TypeError('cannot operate on a series without a rhs '
415-
'of a series/ndarray of type datetime64[ns] '
416-
'or a timedelta')
417-
418382
def _validate_timedelta(self, name):
419383
# assumes self.is_timedelta_lhs
420384

@@ -440,44 +404,8 @@ def _validate_timedelta(self, name):
440404
'of a series/ndarray of type datetime64[ns] '
441405
'or a timedelta')
442406

443-
def _validate_offset(self, name):
444-
# assumes self.is_offset_lhs
445-
446-
if self.is_timedelta_rhs:
447-
# 2 timedeltas
448-
if name not in ('__div__', '__rdiv__', '__truediv__',
449-
'__rtruediv__', '__add__', '__radd__', '__sub__',
450-
'__rsub__'):
451-
raise TypeError("can only operate on a timedeltas for addition"
452-
", subtraction, and division, but the operator"
453-
" [{name}] was passed".format(name=name))
454-
455-
elif self.is_datetime_rhs:
456-
if name not in ('__add__', '__radd__'):
457-
raise TypeError("can only operate on a timedelta/DateOffset "
458-
"and a datetime for addition, but the operator"
459-
" [{name}] was passed".format(name=name))
460-
461-
else:
462-
raise TypeError('cannot operate on a series without a rhs '
463-
'of a series/ndarray of type datetime64[ns] '
464-
'or a timedelta')
465-
466407
def _validate(self, lvalues, rvalues, name):
467-
if self.is_datetime_lhs:
468-
return self._validate_datetime(lvalues, rvalues, name)
469-
elif self.is_timedelta_lhs:
470-
return self._validate_timedelta(name)
471-
elif self.is_offset_lhs:
472-
return self._validate_offset(name)
473-
474-
if ((self.is_integer_lhs or self.is_floating_lhs) and
475-
self.is_timedelta_rhs):
476-
self._check_timedelta_with_numeric(name)
477-
else:
478-
raise TypeError('cannot operate on a series without a rhs '
479-
'of a series/ndarray of type datetime64[ns] '
480-
'or a timedelta')
408+
return self._validate_timedelta(name)
481409

482410
def _check_timedelta_with_numeric(self, name):
483411
if name not in ('__div__', '__truediv__', '__mul__', '__rmul__'):
@@ -498,7 +426,7 @@ def _convert_to_array(self, values, name=None, other=None):
498426
# if this is a Series that contains relevant dtype info, then use this
499427
# instead of the inferred type; this avoids coercing Series([NaT],
500428
# dtype='datetime64[ns]') to Series([NaT], dtype='timedelta64[ns]')
501-
elif (isinstance(values, pd.Series) and
429+
elif (isinstance(values, (pd.Series, ABCDatetimeIndex)) and
502430
(is_timedelta64_dtype(values) or is_datetime64_dtype(values))):
503431
supplied_dtype = values.dtype
504432

@@ -513,13 +441,11 @@ def _convert_to_array(self, values, name=None, other=None):
513441
values = np.empty(values.shape, dtype='timedelta64[ns]')
514442
values[:] = iNaT
515443

516-
# a datelike
517444
elif isinstance(values, ABCDatetimeIndex):
518-
# TODO: why are we casting to_series in the first place?
519-
values = values.to_series(keep_tz=True)
520-
# datetime with tz
521-
elif (isinstance(ovalues, datetime.datetime) and
522-
hasattr(ovalues, 'tzinfo')):
445+
# a datelike
446+
pass
447+
elif isinstance(ovalues, datetime.datetime):
448+
# datetime scalar
523449
values = pd.DatetimeIndex(values)
524450
# datetime array with tz
525451
elif is_datetimetz(values):
@@ -571,17 +497,10 @@ def _convert_for_datetime(self, lvalues, rvalues):
571497
mask = isna(lvalues) | isna(rvalues)
572498

573499
# datetimes require views
574-
if self.is_datetime_lhs or self.is_datetime_rhs:
500+
if self.is_datetime_rhs:
575501

576502
# datetime subtraction means timedelta
577-
if self.is_datetime_lhs and self.is_datetime_rhs:
578-
if self.name in ('__sub__', '__rsub__'):
579-
self.dtype = 'timedelta64[ns]'
580-
else:
581-
self.dtype = 'datetime64[ns]'
582-
elif self.is_datetime64tz_lhs:
583-
self.dtype = lvalues.dtype
584-
elif self.is_datetime64tz_rhs:
503+
if self.is_datetime64tz_rhs:
585504
self.dtype = rvalues.dtype
586505
else:
587506
self.dtype = 'datetime64[ns]'
@@ -601,15 +520,11 @@ def _offset(lvalues, rvalues):
601520
self.na_op = lambda x, y: getattr(x, self.name)(y)
602521
return lvalues, rvalues
603522

604-
if self.is_offset_lhs:
605-
lvalues, rvalues = _offset(lvalues, rvalues)
606-
elif self.is_offset_rhs:
523+
if self.is_offset_rhs:
607524
rvalues, lvalues = _offset(rvalues, lvalues)
608525
else:
609526

610527
# with tz, convert to UTC
611-
if self.is_datetime64tz_lhs:
612-
lvalues = lvalues.tz_convert('UTC').tz_localize(None)
613528
if self.is_datetime64tz_rhs:
614529
rvalues = rvalues.tz_convert('UTC').tz_localize(None)
615530

@@ -622,8 +537,6 @@ def _offset(lvalues, rvalues):
622537
self.dtype = 'timedelta64[ns]'
623538

624539
# convert Tick DateOffset to underlying delta
625-
if self.is_offset_lhs:
626-
lvalues = to_timedelta(lvalues, box=False)
627540
if self.is_offset_rhs:
628541
rvalues = to_timedelta(rvalues, box=False)
629542

@@ -634,7 +547,7 @@ def _offset(lvalues, rvalues):
634547
# time delta division -> unit less
635548
# integer gets converted to timedelta in np < 1.6
636549
if ((self.is_timedelta_lhs and self.is_timedelta_rhs) and
637-
not self.is_integer_rhs and not self.is_integer_lhs and
550+
not self.is_integer_rhs and
638551
self.name in ('__div__', '__rdiv__',
639552
'__truediv__', '__rtruediv__',
640553
'__floordiv__', '__rfloordiv__')):
@@ -750,10 +663,16 @@ def wrapper(left, right, name=name, na_op=na_op):
750663
return NotImplemented
751664

752665
left, right = _align_method_SERIES(left, right)
666+
if is_datetime64_dtype(left) or is_datetime64tz_dtype(left):
667+
result = op(pd.DatetimeIndex(left), right)
668+
res_name = _get_series_op_result_name(left, right)
669+
result.name = res_name # needs to be overriden if None
670+
return construct_result(left, result,
671+
index=left.index, name=res_name,
672+
dtype=result.dtype)
753673

754674
converted = _Op.get_op(left, right, name, na_op)
755675

756-
left, right = converted.left, converted.right
757676
lvalues, rvalues = converted.lvalues, converted.rvalues
758677
dtype = converted.dtype
759678
wrap_results = converted.wrap_results
@@ -775,6 +694,7 @@ def wrapper(left, right, name=name, na_op=na_op):
775694
res_name = left.name
776695

777696
result = wrap_results(safe_na_op(lvalues, rvalues))
697+
res_name = _get_series_op_result_name(left, right)
778698
return construct_result(
779699
left,
780700
result,
@@ -786,6 +706,15 @@ def wrapper(left, right, name=name, na_op=na_op):
786706
return wrapper
787707

788708

709+
def _get_series_op_result_name(left, right):
710+
# `left` is always a pd.Series
711+
if isinstance(right, (ABCSeries, pd.Index)):
712+
name = _maybe_match_name(left, right)
713+
else:
714+
name = left.name
715+
return name
716+
717+
789718
def _comp_method_OBJECT_ARRAY(op, x, y):
790719
if isinstance(y, list):
791720
y = construct_1d_object_array_from_listlike(y)
@@ -1388,23 +1317,6 @@ def f(self, other):
13881317

13891318
def _arith_method_PANEL(op, name, str_rep=None, fill_zeros=None,
13901319
default_axis=None, **eval_kwargs):
1391-
# copied from Series na_op above, but without unnecessary branch for
1392-
# non-scalar
1393-
def na_op(x, y):
1394-
import pandas.core.computation.expressions as expressions
1395-
1396-
try:
1397-
result = expressions.evaluate(op, str_rep, x, y, **eval_kwargs)
1398-
except TypeError:
1399-
1400-
# TODO: might need to find_common_type here?
1401-
result = np.empty(len(x), dtype=x.dtype)
1402-
mask = notna(x)
1403-
result[mask] = op(x[mask], y)
1404-
result, changed = maybe_upcast_putmask(result, ~mask, np.nan)
1405-
1406-
result = missing.fill_zeros(result, x, y, name, fill_zeros)
1407-
return result
14081320

14091321
# work only for scalars
14101322
def f(self, other):

pandas/tests/series/test_operators.py

+25-6
Original file line numberDiff line numberDiff line change
@@ -960,6 +960,13 @@ def test_timedelta64_ops_nat(self):
960960
assert_series_equal(timedelta_series / nan,
961961
nat_series_dtype_timedelta)
962962

963+
def test_td64_sub_NaT(self):
964+
# GH#18808
965+
ser = Series([NaT, Timedelta('1s')])
966+
res = ser - NaT
967+
expected = Series([NaT, NaT], dtype='timedelta64[ns]')
968+
tm.assert_series_equal(res, expected)
969+
963970
@pytest.mark.parametrize('scalar_td', [timedelta(minutes=5, seconds=4),
964971
Timedelta(minutes=5, seconds=4),
965972
Timedelta('5m4s').to_timedelta64()])
@@ -1076,7 +1083,7 @@ def run_ops(ops, get_ser, test_ser):
10761083
# defined
10771084
for op_str in ops:
10781085
op = getattr(get_ser, op_str, None)
1079-
with tm.assert_raises_regex(TypeError, 'operate'):
1086+
with tm.assert_raises_regex(TypeError, 'operate|cannot'):
10801087
op(test_ser)
10811088

10821089
# ## timedelta64 ###
@@ -1253,20 +1260,31 @@ def test_datetime_series_with_DateOffset(self):
12531260
s + op(5)
12541261
op(5) + s
12551262

1263+
def test_dt64_sub_NaT(self):
1264+
# GH#18808
1265+
dti = pd.DatetimeIndex([pd.NaT, pd.Timestamp('19900315')])
1266+
ser = pd.Series(dti)
1267+
res = ser - pd.NaT
1268+
expected = pd.Series([pd.NaT, pd.NaT], dtype='timedelta64[ns]')
1269+
tm.assert_series_equal(res, expected)
1270+
1271+
dti_tz = dti.tz_localize('Asia/Tokyo')
1272+
ser_tz = pd.Series(dti_tz)
1273+
res = ser_tz - pd.NaT
1274+
expected = pd.Series([pd.NaT, pd.NaT], dtype='timedelta64[ns]')
1275+
tm.assert_series_equal(res, expected)
1276+
12561277
def test_datetime64_ops_nat(self):
12571278
# GH 11349
12581279
datetime_series = Series([NaT, Timestamp('19900315')])
12591280
nat_series_dtype_timestamp = Series([NaT, NaT], dtype='datetime64[ns]')
12601281
single_nat_dtype_datetime = Series([NaT], dtype='datetime64[ns]')
12611282

12621283
# subtraction
1263-
assert_series_equal(datetime_series - NaT, nat_series_dtype_timestamp)
12641284
assert_series_equal(-NaT + datetime_series, nat_series_dtype_timestamp)
12651285
with pytest.raises(TypeError):
12661286
-single_nat_dtype_datetime + datetime_series
12671287

1268-
assert_series_equal(nat_series_dtype_timestamp - NaT,
1269-
nat_series_dtype_timestamp)
12701288
assert_series_equal(-NaT + nat_series_dtype_timestamp,
12711289
nat_series_dtype_timestamp)
12721290
with pytest.raises(TypeError):
@@ -2036,8 +2054,9 @@ def test_datetime64_with_index(self):
20362054
result = s - s.index
20372055
assert_series_equal(result, expected)
20382056

2039-
result = s - s.index.to_period()
2040-
assert_series_equal(result, expected)
2057+
with pytest.raises(TypeError):
2058+
# GH#18850
2059+
result = s - s.index.to_period()
20412060

20422061
df = DataFrame(np.random.randn(5, 2),
20432062
index=date_range('20130101', periods=5))

pandas/tests/series/test_timeseries.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@ def test_shift(self):
107107
# incompat tz
108108
s2 = Series(date_range('2000-01-01 09:00:00', periods=5,
109109
tz='CET'), name='foo')
110-
pytest.raises(ValueError, lambda: s - s2)
110+
pytest.raises(TypeError, lambda: s - s2)
111111

112112
def test_shift2(self):
113113
ts = Series(np.random.randn(5),

0 commit comments

Comments
 (0)