diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index bd3bee507baa3..a182f8bd9c9a4 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -208,6 +208,9 @@ Other API Changes - In :func:`read_excel`, the ``comment`` argument is now exposed as a named parameter (:issue:`18735`) - Rearranged the order of keyword arguments in :func:`read_excel()` to align with :func:`read_csv()` (:issue:`16672`) - The options ``html.border`` and ``mode.use_inf_as_null`` were deprecated in prior versions, these will now show ``FutureWarning`` rather than a ``DeprecationWarning`` (:issue:`19003`) +- Subtracting ``NaT`` from a :class:`Series` with ``dtype='datetime64[ns]'`` returns a ``Series`` with ``dtype='timedelta64[ns]'`` instead of ``dtype='datetime64[ns]'``(:issue:`18808`) +- Operations between a :class:`Series` with dtype ``dtype='datetime64[ns]'`` and a :class:`PeriodIndex` will correctly raises ``TypeError`` (:issue:`18850`) +- Subtraction of :class:`Series` with timezone-aware ``dtype='datetime64[ns]'`` with mis-matched timezones will raise ``TypeError`` instead of ``ValueError`` (issue:`18817`) .. _whatsnew_0230.deprecations: diff --git a/pandas/core/ops.py b/pandas/core/ops.py index 0229f7c256464..554f0cb3803e9 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -341,10 +341,8 @@ def get_op(cls, left, right, name, na_op): normal numpy path. """ is_timedelta_lhs = is_timedelta64_dtype(left) - is_datetime_lhs = (is_datetime64_dtype(left) or - is_datetime64tz_dtype(left)) - if not (is_datetime_lhs or is_timedelta_lhs): + if not is_timedelta_lhs: return _Op(left, right, name, na_op) else: return _TimeOp(left, right, name, na_op) @@ -364,14 +362,8 @@ def __init__(self, left, right, name, na_op): rvalues = self._convert_to_array(right, name=name, other=lvalues) # left - self.is_offset_lhs = is_offsetlike(left) self.is_timedelta_lhs = is_timedelta64_dtype(lvalues) - self.is_datetime64_lhs = is_datetime64_dtype(lvalues) - self.is_datetime64tz_lhs = is_datetime64tz_dtype(lvalues) - self.is_datetime_lhs = (self.is_datetime64_lhs or - self.is_datetime64tz_lhs) - self.is_integer_lhs = left.dtype.kind in ['i', 'u'] - self.is_floating_lhs = left.dtype.kind == 'f' + assert self.is_timedelta_lhs # right self.is_offset_rhs = is_offsetlike(right) @@ -387,34 +379,6 @@ def __init__(self, left, right, name, na_op): self.lvalues, self.rvalues = self._convert_for_datetime(lvalues, rvalues) - def _validate_datetime(self, lvalues, rvalues, name): - # assumes self.is_datetime_lhs - - if (self.is_timedelta_rhs or self.is_offset_rhs): - # datetime and timedelta/DateOffset - if name not in ('__add__', '__radd__', '__sub__'): - raise TypeError("can only operate on a datetime with a rhs of " - "a timedelta/DateOffset for addition and " - "subtraction, but the operator [{name}] was " - "passed".format(name=name)) - - elif self.is_datetime_rhs: - # 2 datetimes - if name not in ('__sub__', '__rsub__'): - raise TypeError("can only operate on a datetimes for" - " subtraction, but the operator [{name}] was" - " passed".format(name=name)) - - # if tz's must be equal (same or None) - if getattr(lvalues, 'tz', None) != getattr(rvalues, 'tz', None): - raise ValueError("Incompatible tz's on datetime subtraction " - "ops") - - else: - raise TypeError('cannot operate on a series without a rhs ' - 'of a series/ndarray of type datetime64[ns] ' - 'or a timedelta') - def _validate_timedelta(self, name): # assumes self.is_timedelta_lhs @@ -440,44 +404,8 @@ def _validate_timedelta(self, name): 'of a series/ndarray of type datetime64[ns] ' 'or a timedelta') - def _validate_offset(self, name): - # assumes self.is_offset_lhs - - if self.is_timedelta_rhs: - # 2 timedeltas - if name not in ('__div__', '__rdiv__', '__truediv__', - '__rtruediv__', '__add__', '__radd__', '__sub__', - '__rsub__'): - raise TypeError("can only operate on a timedeltas for addition" - ", subtraction, and division, but the operator" - " [{name}] was passed".format(name=name)) - - elif self.is_datetime_rhs: - if name not in ('__add__', '__radd__'): - raise TypeError("can only operate on a timedelta/DateOffset " - "and a datetime for addition, but the operator" - " [{name}] was passed".format(name=name)) - - else: - raise TypeError('cannot operate on a series without a rhs ' - 'of a series/ndarray of type datetime64[ns] ' - 'or a timedelta') - def _validate(self, lvalues, rvalues, name): - if self.is_datetime_lhs: - return self._validate_datetime(lvalues, rvalues, name) - elif self.is_timedelta_lhs: - return self._validate_timedelta(name) - elif self.is_offset_lhs: - return self._validate_offset(name) - - if ((self.is_integer_lhs or self.is_floating_lhs) and - self.is_timedelta_rhs): - self._check_timedelta_with_numeric(name) - else: - raise TypeError('cannot operate on a series without a rhs ' - 'of a series/ndarray of type datetime64[ns] ' - 'or a timedelta') + return self._validate_timedelta(name) def _check_timedelta_with_numeric(self, name): if name not in ('__div__', '__truediv__', '__mul__', '__rmul__'): @@ -498,7 +426,7 @@ def _convert_to_array(self, values, name=None, other=None): # if this is a Series that contains relevant dtype info, then use this # instead of the inferred type; this avoids coercing Series([NaT], # dtype='datetime64[ns]') to Series([NaT], dtype='timedelta64[ns]') - elif (isinstance(values, pd.Series) and + elif (isinstance(values, (pd.Series, ABCDatetimeIndex)) and (is_timedelta64_dtype(values) or is_datetime64_dtype(values))): supplied_dtype = values.dtype @@ -513,13 +441,11 @@ def _convert_to_array(self, values, name=None, other=None): values = np.empty(values.shape, dtype='timedelta64[ns]') values[:] = iNaT - # a datelike elif isinstance(values, ABCDatetimeIndex): - # TODO: why are we casting to_series in the first place? - values = values.to_series(keep_tz=True) - # datetime with tz - elif (isinstance(ovalues, datetime.datetime) and - hasattr(ovalues, 'tzinfo')): + # a datelike + pass + elif isinstance(ovalues, datetime.datetime): + # datetime scalar values = pd.DatetimeIndex(values) # datetime array with tz elif is_datetimetz(values): @@ -571,17 +497,10 @@ def _convert_for_datetime(self, lvalues, rvalues): mask = isna(lvalues) | isna(rvalues) # datetimes require views - if self.is_datetime_lhs or self.is_datetime_rhs: + if self.is_datetime_rhs: # datetime subtraction means timedelta - if self.is_datetime_lhs and self.is_datetime_rhs: - if self.name in ('__sub__', '__rsub__'): - self.dtype = 'timedelta64[ns]' - else: - self.dtype = 'datetime64[ns]' - elif self.is_datetime64tz_lhs: - self.dtype = lvalues.dtype - elif self.is_datetime64tz_rhs: + if self.is_datetime64tz_rhs: self.dtype = rvalues.dtype else: self.dtype = 'datetime64[ns]' @@ -601,15 +520,11 @@ def _offset(lvalues, rvalues): self.na_op = lambda x, y: getattr(x, self.name)(y) return lvalues, rvalues - if self.is_offset_lhs: - lvalues, rvalues = _offset(lvalues, rvalues) - elif self.is_offset_rhs: + if self.is_offset_rhs: rvalues, lvalues = _offset(rvalues, lvalues) else: # with tz, convert to UTC - if self.is_datetime64tz_lhs: - lvalues = lvalues.tz_convert('UTC').tz_localize(None) if self.is_datetime64tz_rhs: rvalues = rvalues.tz_convert('UTC').tz_localize(None) @@ -622,8 +537,6 @@ def _offset(lvalues, rvalues): self.dtype = 'timedelta64[ns]' # convert Tick DateOffset to underlying delta - if self.is_offset_lhs: - lvalues = to_timedelta(lvalues, box=False) if self.is_offset_rhs: rvalues = to_timedelta(rvalues, box=False) @@ -634,7 +547,7 @@ def _offset(lvalues, rvalues): # time delta division -> unit less # integer gets converted to timedelta in np < 1.6 if ((self.is_timedelta_lhs and self.is_timedelta_rhs) and - not self.is_integer_rhs and not self.is_integer_lhs and + not self.is_integer_rhs and self.name in ('__div__', '__rdiv__', '__truediv__', '__rtruediv__', '__floordiv__', '__rfloordiv__')): @@ -750,10 +663,16 @@ def wrapper(left, right, name=name, na_op=na_op): return NotImplemented left, right = _align_method_SERIES(left, right) + if is_datetime64_dtype(left) or is_datetime64tz_dtype(left): + result = op(pd.DatetimeIndex(left), right) + res_name = _get_series_op_result_name(left, right) + result.name = res_name # needs to be overriden if None + return construct_result(left, result, + index=left.index, name=res_name, + dtype=result.dtype) converted = _Op.get_op(left, right, name, na_op) - left, right = converted.left, converted.right lvalues, rvalues = converted.lvalues, converted.rvalues dtype = converted.dtype wrap_results = converted.wrap_results @@ -775,6 +694,7 @@ def wrapper(left, right, name=name, na_op=na_op): res_name = left.name result = wrap_results(safe_na_op(lvalues, rvalues)) + res_name = _get_series_op_result_name(left, right) return construct_result( left, result, @@ -786,6 +706,15 @@ def wrapper(left, right, name=name, na_op=na_op): return wrapper +def _get_series_op_result_name(left, right): + # `left` is always a pd.Series + if isinstance(right, (ABCSeries, pd.Index)): + name = _maybe_match_name(left, right) + else: + name = left.name + return name + + def _comp_method_OBJECT_ARRAY(op, x, y): if isinstance(y, list): y = construct_1d_object_array_from_listlike(y) @@ -1388,23 +1317,6 @@ def f(self, other): def _arith_method_PANEL(op, name, str_rep=None, fill_zeros=None, default_axis=None, **eval_kwargs): - # copied from Series na_op above, but without unnecessary branch for - # non-scalar - def na_op(x, y): - import pandas.core.computation.expressions as expressions - - try: - result = expressions.evaluate(op, str_rep, x, y, **eval_kwargs) - except TypeError: - - # TODO: might need to find_common_type here? - result = np.empty(len(x), dtype=x.dtype) - mask = notna(x) - result[mask] = op(x[mask], y) - result, changed = maybe_upcast_putmask(result, ~mask, np.nan) - - result = missing.fill_zeros(result, x, y, name, fill_zeros) - return result # work only for scalars def f(self, other): diff --git a/pandas/tests/series/test_operators.py b/pandas/tests/series/test_operators.py index a421f2cb15bba..c1e9a62d98fd3 100644 --- a/pandas/tests/series/test_operators.py +++ b/pandas/tests/series/test_operators.py @@ -960,6 +960,13 @@ def test_timedelta64_ops_nat(self): assert_series_equal(timedelta_series / nan, nat_series_dtype_timedelta) + def test_td64_sub_NaT(self): + # GH#18808 + ser = Series([NaT, Timedelta('1s')]) + res = ser - NaT + expected = Series([NaT, NaT], dtype='timedelta64[ns]') + tm.assert_series_equal(res, expected) + @pytest.mark.parametrize('scalar_td', [timedelta(minutes=5, seconds=4), Timedelta(minutes=5, seconds=4), Timedelta('5m4s').to_timedelta64()]) @@ -1076,7 +1083,7 @@ def run_ops(ops, get_ser, test_ser): # defined for op_str in ops: op = getattr(get_ser, op_str, None) - with tm.assert_raises_regex(TypeError, 'operate'): + with tm.assert_raises_regex(TypeError, 'operate|cannot'): op(test_ser) # ## timedelta64 ### @@ -1253,6 +1260,20 @@ def test_datetime_series_with_DateOffset(self): s + op(5) op(5) + s + def test_dt64_sub_NaT(self): + # GH#18808 + dti = pd.DatetimeIndex([pd.NaT, pd.Timestamp('19900315')]) + ser = pd.Series(dti) + res = ser - pd.NaT + expected = pd.Series([pd.NaT, pd.NaT], dtype='timedelta64[ns]') + tm.assert_series_equal(res, expected) + + dti_tz = dti.tz_localize('Asia/Tokyo') + ser_tz = pd.Series(dti_tz) + res = ser_tz - pd.NaT + expected = pd.Series([pd.NaT, pd.NaT], dtype='timedelta64[ns]') + tm.assert_series_equal(res, expected) + def test_datetime64_ops_nat(self): # GH 11349 datetime_series = Series([NaT, Timestamp('19900315')]) @@ -1260,13 +1281,10 @@ def test_datetime64_ops_nat(self): single_nat_dtype_datetime = Series([NaT], dtype='datetime64[ns]') # subtraction - assert_series_equal(datetime_series - NaT, nat_series_dtype_timestamp) assert_series_equal(-NaT + datetime_series, nat_series_dtype_timestamp) with pytest.raises(TypeError): -single_nat_dtype_datetime + datetime_series - assert_series_equal(nat_series_dtype_timestamp - NaT, - nat_series_dtype_timestamp) assert_series_equal(-NaT + nat_series_dtype_timestamp, nat_series_dtype_timestamp) with pytest.raises(TypeError): @@ -2036,8 +2054,9 @@ def test_datetime64_with_index(self): result = s - s.index assert_series_equal(result, expected) - result = s - s.index.to_period() - assert_series_equal(result, expected) + with pytest.raises(TypeError): + # GH#18850 + result = s - s.index.to_period() df = DataFrame(np.random.randn(5, 2), index=date_range('20130101', periods=5)) diff --git a/pandas/tests/series/test_timeseries.py b/pandas/tests/series/test_timeseries.py index 2e3a7a6c28a11..6e711abf4491b 100644 --- a/pandas/tests/series/test_timeseries.py +++ b/pandas/tests/series/test_timeseries.py @@ -107,7 +107,7 @@ def test_shift(self): # incompat tz s2 = Series(date_range('2000-01-01 09:00:00', periods=5, tz='CET'), name='foo') - pytest.raises(ValueError, lambda: s - s2) + pytest.raises(TypeError, lambda: s - s2) def test_shift2(self): ts = Series(np.random.randn(5),