-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
Make Series[datetime64] - pd.NaT behave like DatetimeIndex - pd.NaT #18960
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 2 commits
d4b799a
c07f49a
49ed387
782ea74
5540a4b
e99b5e1
f0da8ec
f275a6c
9ac970d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -407,8 +407,12 @@ def _validate_datetime(self, lvalues, rvalues, name): | |
|
||
# if tz's must be equal (same or None) | ||
if getattr(lvalues, 'tz', None) != getattr(rvalues, 'tz', None): | ||
raise ValueError("Incompatible tz's on datetime subtraction " | ||
"ops") | ||
if len(rvalues) == 1 and np.isnat(rvalues[0]): | ||
# NaT gets a pass | ||
pass | ||
else: | ||
raise ValueError("Incompatible tz's on datetime " | ||
"subtraction ops", rvalues) | ||
|
||
else: | ||
raise TypeError('cannot operate on a series without a rhs ' | ||
|
@@ -505,11 +509,20 @@ def _convert_to_array(self, values, name=None, other=None): | |
inferred_type = lib.infer_dtype(values) | ||
if (inferred_type in ('datetime64', 'datetime', 'date', 'time') or | ||
is_datetimetz(inferred_type)): | ||
|
||
if ovalues is pd.NaT and name == '__sub__': | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. what's wrong this this code that is existing? (obviously the dtype is incorrect if other is dt64 but otherwise this is fine)
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For starters the other.dtype condition excludes datetime64s with timezones. More generally, this PR is specifically about scalar subtraction of pd.NaT and I would rather give that its own block than try to shoe-horn it into an existing block. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. simplify this like the above code There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. OK. But in my head I'm going to put sarcasm quotes around the word "simplify". There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same issue. you are repeating code. you only need to select the dtype if other.dtype == 'timedelta64[ns]' There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What you're suggesting would catch |
||
# Note: This can only occur when `values` represents `right` | ||
# i.e. `other`. | ||
if other.dtype == 'timedelta64[ns]': | ||
values = np.array([iNaT], dtype='timedelta64[ns]') | ||
else: | ||
values = np.array([iNaT], dtype='datetime64[ns]') | ||
|
||
# if we have a other of timedelta, but use pd.NaT here we | ||
# we are in the wrong path | ||
if (supplied_dtype is None and other is not None and | ||
(other.dtype in ('timedelta64[ns]', 'datetime64[ns]')) and | ||
isna(values).all()): | ||
elif (supplied_dtype is None and other is not None and | ||
(other.dtype in ('timedelta64[ns]', 'datetime64[ns]')) and | ||
isna(values).all()): | ||
values = np.empty(values.shape, dtype='timedelta64[ns]') | ||
values[:] = iNaT | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -960,6 +960,13 @@ def test_timedelta64_ops_nat(self): | |
assert_series_equal(timedelta_series / nan, | ||
nat_series_dtype_timedelta) | ||
|
||
def test_td64_sub_NaT(self): | ||
# GH#18808 | ||
ser = pd.Series([pd.NaT, pd.Timedelta('1s')]) | ||
res = ser - pd.NaT | ||
expected = pd.Series([pd.NaT, pd.NaT], dtype='timedelta64[ns]') | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. convention is not to use pd. |
||
tm.assert_series_equal(res, expected) | ||
|
||
@pytest.mark.parametrize('scalar_td', [timedelta(minutes=5, seconds=4), | ||
Timedelta(minutes=5, seconds=4), | ||
Timedelta('5m4s').to_timedelta64()]) | ||
|
@@ -1004,7 +1011,6 @@ def test_operators_timedelta64_with_timedelta_invalid(self, scalar_td): | |
|
||
|
||
class TestDatetimeSeriesArithmetic(object): | ||
|
||
def test_operators_datetimelike(self): | ||
def run_ops(ops, get_ser, test_ser): | ||
|
||
|
@@ -1197,13 +1203,10 @@ def test_datetime64_ops_nat(self): | |
single_nat_dtype_datetime = Series([NaT], dtype='datetime64[ns]') | ||
|
||
# subtraction | ||
assert_series_equal(datetime_series - NaT, nat_series_dtype_timestamp) | ||
assert_series_equal(-NaT + datetime_series, nat_series_dtype_timestamp) | ||
with pytest.raises(TypeError): | ||
-single_nat_dtype_datetime + datetime_series | ||
|
||
assert_series_equal(nat_series_dtype_timestamp - NaT, | ||
nat_series_dtype_timestamp) | ||
assert_series_equal(-NaT + nat_series_dtype_timestamp, | ||
nat_series_dtype_timestamp) | ||
with pytest.raises(TypeError): | ||
|
@@ -1236,6 +1239,20 @@ def test_datetime64_ops_nat(self): | |
with pytest.raises(TypeError): | ||
nat_series_dtype_timestamp / 1 | ||
|
||
def test_dt64_sub_NaT(self): | ||
# GH#18808 | ||
dti = pd.DatetimeIndex([pd.NaT, pd.Timestamp('19900315')]) | ||
ser = pd.Series(dti) | ||
res = ser - pd.NaT | ||
expected = pd.Series([pd.NaT, pd.NaT], dtype='timedelta64[ns]') | ||
tm.assert_series_equal(res, expected) | ||
|
||
dti_tz = dti.tz_localize('Asia/Tokyo') | ||
ser_tz = pd.Series(dti_tz) | ||
res = ser_tz - pd.NaT | ||
expected = pd.Series([pd.NaT, pd.NaT], dtype='timedelta64[ns]') | ||
tm.assert_series_equal(res, expected) | ||
|
||
|
||
class TestSeriesOperators(TestData): | ||
def test_op_method(self): | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
we never use
np.isnat
, always useisna
.why is this restricted to len(..) == 1? if they are all nat should be enough
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Because the bug being addressed is specific to scalar NaT subtraction. Non-scalar cases are handled elsewhere.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't have a strong opinion. np.isnat is more strict, which I figure is preferable since all else equal I'd like to avoid catching extra cases by accident.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
well I do. don't use it.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You got it buddy. Just changed and pushed.