From d4b799aed8ae7829f5c388318b2be70eb500ea3c Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Tue, 19 Dec 2017 10:16:54 -0800 Subject: [PATCH 1/5] handle tzs, remove tests for old behavior --- doc/source/whatsnew/v0.22.0.txt | 1 + pandas/core/ops.py | 23 ++++++++++++++++++----- pandas/tests/series/test_operators.py | 24 +++++++++++++++++++++--- 3 files changed, 40 insertions(+), 8 deletions(-) diff --git a/doc/source/whatsnew/v0.22.0.txt b/doc/source/whatsnew/v0.22.0.txt index 0579a80aad28e..81213f6c8aa4b 100644 --- a/doc/source/whatsnew/v0.22.0.txt +++ b/doc/source/whatsnew/v0.22.0.txt @@ -198,6 +198,7 @@ Other API Changes - Rearranged the order of keyword arguments in :func:`read_excel()` to align with :func:`read_csv()` (:issue:`16672`) - :func:`pandas.merge` now raises a ``ValueError`` when trying to merge on incompatible data types (:issue:`9780`) - :func:`wide_to_long` previously kept numeric-like suffixes as ``object`` dtype. Now they are cast to numeric if possible (:issue:`17627`) +- Subtracting ``NaT`` from a :class:`Series` with ``dtype='datetime64[ns]'`` returns a ``Series`` with ``dtype='timedelta64[ns]'`` instead of ``dtype='datetime64[ns]'``(:issue:`18808`) .. _whatsnew_0220.deprecations: diff --git a/pandas/core/ops.py b/pandas/core/ops.py index e23609b23f529..f0a21ad4d6588 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -442,8 +442,12 @@ def _validate(self, lvalues, rvalues, name): # if tz's must be equal (same or None) if getattr(lvalues, 'tz', None) != getattr(rvalues, 'tz', None): - raise ValueError("Incompatible tz's on datetime subtraction " - "ops") + if len(rvalues) == 1 and np.isnat(rvalues[0]): + # NaT gets a pass + pass + else: + raise ValueError("Incompatible tz's on datetime " + "subtraction ops", rvalues) elif ((self.is_timedelta_lhs or self.is_offset_lhs) and self.is_datetime_rhs): @@ -476,11 +480,20 @@ def _convert_to_array(self, values, name=None, other=None): inferred_type = lib.infer_dtype(values) if (inferred_type in ('datetime64', 'datetime', 'date', 'time') or is_datetimetz(inferred_type)): + + if ovalues is pd.NaT and name == '__sub__': + # Note: This can only occur when `values` represents `right` + # i.e. `other`. + if other.dtype == 'timedelta64[ns]': + values = np.array([iNaT], dtype='timedelta64[ns]') + else: + values = np.array([iNaT], dtype='datetime64[ns]') + # if we have a other of timedelta, but use pd.NaT here we # we are in the wrong path - if (supplied_dtype is None and other is not None and - (other.dtype in ('timedelta64[ns]', 'datetime64[ns]')) and - isna(values).all()): + elif (supplied_dtype is None and other is not None and + (other.dtype in ('timedelta64[ns]', 'datetime64[ns]')) and + isna(values).all()): values = np.empty(values.shape, dtype='timedelta64[ns]') values[:] = iNaT diff --git a/pandas/tests/series/test_operators.py b/pandas/tests/series/test_operators.py index 89a6311153d15..a435a22c723a4 100644 --- a/pandas/tests/series/test_operators.py +++ b/pandas/tests/series/test_operators.py @@ -960,6 +960,13 @@ def test_timedelta64_ops_nat(self): assert_series_equal(timedelta_series / nan, nat_series_dtype_timedelta) + def test_td64_sub_NaT(self): + # GH#18808 + ser = pd.Series([pd.NaT, pd.Timedelta('1s')]) + res = ser - pd.NaT + expected = pd.Series([pd.NaT, pd.NaT], dtype='timedelta64[ns]') + tm.assert_series_equal(res, expected) + class TestDatetimeSeriesArithmetic(object): def test_operators_datetimelike(self): @@ -1164,13 +1171,10 @@ def test_datetime64_ops_nat(self): single_nat_dtype_datetime = Series([NaT], dtype='datetime64[ns]') # subtraction - assert_series_equal(datetime_series - NaT, nat_series_dtype_timestamp) assert_series_equal(-NaT + datetime_series, nat_series_dtype_timestamp) with pytest.raises(TypeError): -single_nat_dtype_datetime + datetime_series - assert_series_equal(nat_series_dtype_timestamp - NaT, - nat_series_dtype_timestamp) assert_series_equal(-NaT + nat_series_dtype_timestamp, nat_series_dtype_timestamp) with pytest.raises(TypeError): @@ -1203,6 +1207,20 @@ def test_datetime64_ops_nat(self): with pytest.raises(TypeError): nat_series_dtype_timestamp / 1 + def test_dt64_sub_NaT(self): + # GH#18808 + dti = pd.DatetimeIndex([pd.NaT, pd.Timestamp('19900315')]) + ser = pd.Series(dti) + res = ser - pd.NaT + expected = pd.Series([pd.NaT, pd.NaT], dtype='timedelta64[ns]') + tm.assert_series_equal(res, expected) + + dti_tz = dti.tz_localize('Asia/Tokyo') + ser_tz = pd.Series(dti_tz) + res = ser_tz - pd.NaT + expected = pd.Series([pd.NaT, pd.NaT], dtype='timedelta64[ns]') + tm.assert_series_equal(res, expected) + class TestSeriesOperators(TestData): def test_op_method(self): From 49ed387d26e835fad2730549c852311ec0c25861 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Wed, 27 Dec 2017 12:14:57 -0800 Subject: [PATCH 2/5] use isna instead of np.isnat --- pandas/core/ops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/ops.py b/pandas/core/ops.py index 578d916aa7239..ddcab3a08b224 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -407,7 +407,7 @@ def _validate_datetime(self, lvalues, rvalues, name): # if tz's must be equal (same or None) if getattr(lvalues, 'tz', None) != getattr(rvalues, 'tz', None): - if len(rvalues) == 1 and np.isnat(rvalues[0]): + if len(rvalues) == 1 and isna(rvalues[0]): # NaT gets a pass pass else: From f0da8ecd36002a4dd60c0563cf14b26006200149 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sat, 30 Dec 2017 17:39:36 -0800 Subject: [PATCH 3/5] adhere to convention --- pandas/tests/series/test_operators.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/tests/series/test_operators.py b/pandas/tests/series/test_operators.py index f26c28d0366d5..019476c467166 100644 --- a/pandas/tests/series/test_operators.py +++ b/pandas/tests/series/test_operators.py @@ -962,9 +962,9 @@ def test_timedelta64_ops_nat(self): def test_td64_sub_NaT(self): # GH#18808 - ser = pd.Series([pd.NaT, pd.Timedelta('1s')]) - res = ser - pd.NaT - expected = pd.Series([pd.NaT, pd.NaT], dtype='timedelta64[ns]') + ser = Series([NaT, Timedelta('1s')]) + res = ser - NaT + expected = Series([NaT, NaT], dtype='timedelta64[ns]') tm.assert_series_equal(res, expected) @pytest.mark.parametrize('scalar_td', [timedelta(minutes=5, seconds=4), From f275a6c44e1b7f7ba90411251e09ebabbd707686 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sat, 30 Dec 2017 17:41:33 -0800 Subject: [PATCH 4/5] make code 'simpler' --- pandas/core/ops.py | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/pandas/core/ops.py b/pandas/core/ops.py index 6a499386b818e..3389200b7ae39 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -407,7 +407,7 @@ def _validate_datetime(self, lvalues, rvalues, name): # if tz's must be equal (same or None) if getattr(lvalues, 'tz', None) != getattr(rvalues, 'tz', None): - if len(rvalues) == 1 and isna(rvalues[0]): + if len(rvalues) == 1 and isna(rvalues).all(): # NaT gets a pass pass else: @@ -510,20 +510,17 @@ def _convert_to_array(self, values, name=None, other=None): if (inferred_type in ('datetime64', 'datetime', 'date', 'time') or is_datetimetz(inferred_type)): - if ovalues is pd.NaT and name == '__sub__': - # Note: This can only occur when `values` represents `right` - # i.e. `other`. - if other.dtype == 'timedelta64[ns]': - values = np.array([iNaT], dtype='timedelta64[ns]') - else: - values = np.array([iNaT], dtype='datetime64[ns]') - # if we have a other of timedelta, but use pd.NaT here we # we are in the wrong path - elif (supplied_dtype is None and other is not None and - (other.dtype in ('timedelta64[ns]', 'datetime64[ns]')) and - isna(values).all()): - values = np.empty(values.shape, dtype='timedelta64[ns]') + if (supplied_dtype is None and other is not None and + (other.dtype in ('timedelta64[ns]', 'datetime64[ns]')) and + isna(values).all()): + if len(values) == 1 and other.dtype == 'timedelta64[ns]': + values = np.empty(values.shape, dtype='timedelta64[ns]') + elif len(values) == 1 and other.dtype == 'datetime64[ns]': + values = np.empty(values.shape, dtype='datetime64[ns]') + else: + values = np.empty(values.shape, dtype='timedelta64[ns]') values[:] = iNaT # a datelike From 9ac970d4777503988aad67aca843080e165df96b Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sat, 30 Dec 2017 18:05:46 -0800 Subject: [PATCH 5/5] revert simplification --- pandas/core/ops.py | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/pandas/core/ops.py b/pandas/core/ops.py index 3389200b7ae39..18659898ae442 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -510,17 +510,20 @@ def _convert_to_array(self, values, name=None, other=None): if (inferred_type in ('datetime64', 'datetime', 'date', 'time') or is_datetimetz(inferred_type)): + if ovalues is pd.NaT and name == '__sub__': + # Note: This can only occur when `values` represents `right` + # i.e. `other`. + if other.dtype == 'timedelta64[ns]': + values = np.array([iNaT], dtype='timedelta64[ns]') + else: + values = np.array([iNaT], dtype='datetime64[ns]') + # if we have a other of timedelta, but use pd.NaT here we # we are in the wrong path - if (supplied_dtype is None and other is not None and - (other.dtype in ('timedelta64[ns]', 'datetime64[ns]')) and - isna(values).all()): - if len(values) == 1 and other.dtype == 'timedelta64[ns]': - values = np.empty(values.shape, dtype='timedelta64[ns]') - elif len(values) == 1 and other.dtype == 'datetime64[ns]': - values = np.empty(values.shape, dtype='datetime64[ns]') - else: - values = np.empty(values.shape, dtype='timedelta64[ns]') + elif (supplied_dtype is None and other is not None and + (other.dtype in ('timedelta64[ns]', 'datetime64[ns]')) and + isna(values).all()): + values = np.empty(values.shape, dtype='timedelta64[ns]') values[:] = iNaT # a datelike