-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
BUG: Series/Index results in datetime/timedelta incorrectly if inputs are all nan/nat like #13477
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -103,6 +103,7 @@ def infer_dtype(object _values): | |
Py_ssize_t i, n | ||
object val | ||
ndarray values | ||
bint seen_pdnat = False, seen_val = False | ||
|
||
if isinstance(_values, np.ndarray): | ||
values = _values | ||
|
@@ -141,17 +142,34 @@ def infer_dtype(object _values): | |
values = values.ravel() | ||
|
||
# try to use a valid value | ||
for i in range(n): | ||
val = util.get_value_1d(values, i) | ||
if not is_null_datetimelike(val): | ||
break | ||
for i from 0 <= i < n: | ||
val = util.get_value_1d(values, i) | ||
|
||
if util.is_datetime64_object(val) or val is NaT: | ||
# do not use is_nul_datetimelike to keep | ||
# np.datetime64('nat') and np.timedelta64('nat') | ||
if util._checknull(val): | ||
pass | ||
elif val is NaT: | ||
seen_pdnat = True | ||
else: | ||
seen_val = True | ||
break | ||
|
||
# if all values are nan/NaT | ||
if seen_val is False and seen_pdnat is True: | ||
return 'datetime' | ||
# float/object nan is handled in latter logic | ||
|
||
if util.is_datetime64_object(val): | ||
if is_datetime64_array(values): | ||
return 'datetime64' | ||
elif is_timedelta_or_timedelta64_array(values): | ||
return 'timedelta' | ||
|
||
elif is_timedelta(val): | ||
if is_timedelta_or_timedelta64_array(values): | ||
return 'timedelta' | ||
|
||
elif util.is_integer_object(val): | ||
# a timedelta will show true here as well | ||
if is_timedelta(val): | ||
|
@@ -200,17 +218,15 @@ def infer_dtype(object _values): | |
if is_bytes_array(values): | ||
return 'bytes' | ||
|
||
elif is_timedelta(val): | ||
if is_timedelta_or_timedelta64_array(values): | ||
return 'timedelta' | ||
|
||
elif is_period(val): | ||
if is_period_array(values): | ||
return 'period' | ||
|
||
for i in range(n): | ||
val = util.get_value_1d(values, i) | ||
if util.is_integer_object(val): | ||
if (util.is_integer_object(val) and | ||
not util.is_timedelta64_object(val) and | ||
not util.is_datetime64_object(val)): | ||
return 'mixed-integer' | ||
|
||
return 'mixed' | ||
|
@@ -237,20 +253,46 @@ def is_possible_datetimelike_array(object arr): | |
return False | ||
return seen_datetime or seen_timedelta | ||
|
||
|
||
cdef inline bint is_null_datetimelike(v): | ||
# determine if we have a null for a timedelta/datetime (or integer versions)x | ||
if util._checknull(v): | ||
return True | ||
elif v is NaT: | ||
return True | ||
elif util.is_timedelta64_object(v): | ||
return v.view('int64') == iNaT | ||
elif util.is_datetime64_object(v): | ||
return v.view('int64') == iNaT | ||
elif util.is_integer_object(v): | ||
return v == iNaT | ||
return False | ||
|
||
|
||
cdef inline bint is_null_datetime64(v): | ||
# determine if we have a null for a datetime (or integer versions)x, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. do we need to export these (as in tslib.pxd)? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not needed ATM because |
||
# excluding np.timedelta64('nat') | ||
if util._checknull(v): | ||
return True | ||
elif v is NaT: | ||
return True | ||
elif util.is_datetime64_object(v): | ||
return v.view('int64') == iNaT | ||
return False | ||
|
||
|
||
cdef inline bint is_null_timedelta64(v): | ||
# determine if we have a null for a timedelta (or integer versions)x, | ||
# excluding np.datetime64('nat') | ||
if util._checknull(v): | ||
return True | ||
elif v is NaT: | ||
return True | ||
elif util.is_timedelta64_object(v): | ||
return v.view('int64') == iNaT | ||
return False | ||
|
||
|
||
cdef inline bint is_datetime(object o): | ||
return PyDateTime_Check(o) | ||
|
||
|
@@ -420,7 +462,7 @@ def is_datetime_array(ndarray[object] values): | |
# return False for all nulls | ||
for i in range(n): | ||
v = values[i] | ||
if is_null_datetimelike(v): | ||
if is_null_datetime64(v): | ||
# we are a regular null | ||
if util._checknull(v): | ||
null_count += 1 | ||
|
@@ -437,7 +479,7 @@ def is_datetime64_array(ndarray values): | |
# return False for all nulls | ||
for i in range(n): | ||
v = values[i] | ||
if is_null_datetimelike(v): | ||
if is_null_datetime64(v): | ||
# we are a regular null | ||
if util._checknull(v): | ||
null_count += 1 | ||
|
@@ -481,7 +523,7 @@ def is_timedelta_array(ndarray values): | |
return False | ||
for i in range(n): | ||
v = values[i] | ||
if is_null_datetimelike(v): | ||
if is_null_timedelta64(v): | ||
# we are a regular null | ||
if util._checknull(v): | ||
null_count += 1 | ||
|
@@ -496,7 +538,7 @@ def is_timedelta64_array(ndarray values): | |
return False | ||
for i in range(n): | ||
v = values[i] | ||
if is_null_datetimelike(v): | ||
if is_null_timedelta64(v): | ||
# we are a regular null | ||
if util._checknull(v): | ||
null_count += 1 | ||
|
@@ -512,7 +554,7 @@ def is_timedelta_or_timedelta64_array(ndarray values): | |
return False | ||
for i in range(n): | ||
v = values[i] | ||
if is_null_datetimelike(v): | ||
if is_null_timedelta64(v): | ||
# we are a regular null | ||
if util._checknull(v): | ||
null_count += 1 | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -252,6 +252,24 @@ def test_constructor_pass_none(self): | |
expected = Series(index=Index([None])) | ||
assert_series_equal(s, expected) | ||
|
||
def test_constructor_pass_nan_nat(self): | ||
# GH 13467 | ||
exp = Series([np.nan, np.nan], dtype=np.float64) | ||
self.assertEqual(exp.dtype, np.float64) | ||
tm.assert_series_equal(Series([np.nan, np.nan]), exp) | ||
tm.assert_series_equal(Series(np.array([np.nan, np.nan])), exp) | ||
|
||
exp = Series([pd.NaT, pd.NaT]) | ||
self.assertEqual(exp.dtype, 'datetime64[ns]') | ||
tm.assert_series_equal(Series([pd.NaT, pd.NaT]), exp) | ||
tm.assert_series_equal(Series(np.array([pd.NaT, pd.NaT])), exp) | ||
|
||
tm.assert_series_equal(Series([pd.NaT, np.nan]), exp) | ||
tm.assert_series_equal(Series(np.array([pd.NaT, np.nan])), exp) | ||
|
||
tm.assert_series_equal(Series([np.nan, pd.NaT]), exp) | ||
tm.assert_series_equal(Series(np.array([np.nan, pd.NaT])), exp) | ||
|
||
def test_constructor_cast(self): | ||
self.assertRaises(ValueError, Series, ['a', 'b', 'c'], dtype=float) | ||
|
||
|
@@ -688,8 +706,9 @@ def test_constructor_dtype_timedelta64(self): | |
td = Series([np.timedelta64(300000000), pd.NaT]) | ||
self.assertEqual(td.dtype, 'timedelta64[ns]') | ||
|
||
# because iNaT is int, not coerced to timedelta | ||
td = Series([np.timedelta64(300000000), tslib.iNaT]) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. oh I c, this is essentially an invalid test because of the integer. oh then. |
||
self.assertEqual(td.dtype, 'timedelta64[ns]') | ||
self.assertEqual(td.dtype, 'object') | ||
|
||
td = Series([np.timedelta64(300000000), np.nan]) | ||
self.assertEqual(td.dtype, 'timedelta64[ns]') | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This must be moved here. Otherwise,
timedelta
andobject
mixed data is regarded as "mixed-integer".