Skip to content

Commit b60b559

Browse files
committed
Merge pull request #11212 from kawochen/BUG-FIX-11206
BUG: GH11206 where pd.isnull did not consider numpy NaT null
2 parents d7dd136 + d38bd8a commit b60b559

File tree

8 files changed

+66
-7
lines changed

8 files changed

+66
-7
lines changed

asv_bench/benchmarks/frame_methods.py

+28-1
Original file line numberDiff line numberDiff line change
@@ -582,7 +582,7 @@ def time_frame_interpolate_some_good_infer(self):
582582
self.df.interpolate(downcast='infer')
583583

584584

585-
class frame_isnull(object):
585+
class frame_isnull_floats_no_null(object):
586586
goal_time = 0.2
587587

588588
def setup(self):
@@ -593,6 +593,33 @@ def time_frame_isnull(self):
593593
isnull(self.df)
594594

595595

596+
class frame_isnull_floats(object):
597+
goal_time = 0.2
598+
599+
def setup(self):
600+
np.random.seed(1234)
601+
self.sample = np.array([np.nan, 1.0])
602+
self.data = np.random.choice(self.sample, (1000, 1000))
603+
self.df = DataFrame(self.data)
604+
605+
def time_frame_isnull(self):
606+
isnull(self.df)
607+
608+
609+
class frame_isnull_obj(object):
610+
goal_time = 0.2
611+
612+
def setup(self):
613+
np.random.seed(1234)
614+
self.sample = np.array([NaT, np.nan, None, np.datetime64('NaT'),
615+
np.timedelta64('NaT'), 0, 1, 2.0, '', 'abcd'])
616+
self.data = np.random.choice(self.sample, (1000, 1000))
617+
self.df = DataFrame(self.data)
618+
619+
def time_frame_isnull(self):
620+
isnull(self.df)
621+
622+
596623
class frame_iteritems(object):
597624
goal_time = 0.2
598625

doc/source/whatsnew/v0.17.1.txt

+1
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,7 @@ Bug Fixes
168168
- Bug in output formatting when using an index of ambiguous times (:issue:`11619`)
169169
- Bug in comparisons of Series vs list-likes (:issue:`11339`)
170170
- Bug in ``DataFrame.replace`` with a ``datetime64[ns, tz]`` and a non-compat to_replace (:issue:`11326`, :issue:`11153`)
171+
- Bug in ``isnull`` where ``numpy.datetime64('NaT')`` in a ``numpy.array`` was not determined to be null(:issue:`11206`)
171172
- Bug in list-like indexing with a mixed-integer Index (:issue:`11320`)
172173
- Bug in ``pivot_table`` with ``margins=True`` when indexes are of ``Categorical`` dtype (:issue:`10993`)
173174
- Bug in ``DataFrame.plot`` cannot use hex strings colors (:issue:`10299`)

pandas/lib.pyx

+5-5
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
# cython: profile=False
12
cimport numpy as np
23
cimport cython
34
import numpy as np
@@ -54,7 +55,8 @@ from datetime import datetime as pydatetime
5455
# this is our tseries.pxd
5556
from datetime cimport *
5657

57-
from tslib cimport convert_to_tsobject, convert_to_timedelta64
58+
from tslib cimport (convert_to_tsobject, convert_to_timedelta64,
59+
_check_all_nulls)
5860
import tslib
5961
from tslib import NaT, Timestamp, Timedelta
6062

@@ -245,8 +247,6 @@ def time64_to_datetime(ndarray[int64_t, ndim=1] arr):
245247

246248
return result
247249

248-
cdef inline int64_t get_timedelta64_value(val):
249-
return val.view('i8')
250250

251251
#----------------------------------------------------------------------
252252
# isnull / notnull related
@@ -346,10 +346,10 @@ def isnullobj(ndarray[object] arr):
346346
cdef ndarray[uint8_t] result
347347

348348
n = len(arr)
349-
result = np.zeros(n, dtype=np.uint8)
349+
result = np.empty(n, dtype=np.uint8)
350350
for i from 0 <= i < n:
351351
val = arr[i]
352-
result[i] = val is NaT or _checknull(val)
352+
result[i] = _check_all_nulls(val)
353353
return result.view(np.bool_)
354354

355355
@cython.wraparound(False)

pandas/src/datetime.pxd

+2
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
# cython: profile=False
12
from numpy cimport int64_t, int32_t, npy_int64, npy_int32, ndarray
23
from cpython cimport PyObject
34

@@ -59,6 +60,7 @@ cdef extern from "numpy/ndarrayobject.h":
5960

6061
cdef extern from "numpy_helper.h":
6162
npy_datetime get_datetime64_value(object o)
63+
npy_timedelta get_timedelta64_value(object o)
6264

6365
cdef extern from "numpy/npy_common.h":
6466

pandas/src/numpy_helper.h

+4
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,11 @@ get_nat(void) {
4040
PANDAS_INLINE npy_datetime
4141
get_datetime64_value(PyObject* obj) {
4242
return ((PyDatetimeScalarObject*) obj)->obval;
43+
}
4344

45+
PANDAS_INLINE npy_timedelta
46+
get_timedelta64_value(PyObject* obj) {
47+
return ((PyTimedeltaScalarObject*) obj)->obval;
4448
}
4549

4650
PANDAS_INLINE int

pandas/tests/test_common.py

+7
Original file line numberDiff line numberDiff line change
@@ -235,6 +235,13 @@ def test_isnull_nat():
235235
exp = np.array([True])
236236
assert(np.array_equal(result, exp))
237237

238+
def test_isnull_numpy_nat():
239+
arr = np.array([NaT, np.datetime64('NaT'), np.timedelta64('NaT'),
240+
np.datetime64('NaT', 's')])
241+
result = isnull(arr)
242+
expected = np.array([True] * 4)
243+
tm.assert_numpy_array_equal(result, expected)
244+
238245
def test_isnull_datetime():
239246
assert (not isnull(datetime.now()))
240247
assert notnull(datetime.now())

pandas/tslib.pxd

+1
Original file line numberDiff line numberDiff line change
@@ -7,3 +7,4 @@ cdef bint _is_utc(object)
77
cdef bint _is_tzlocal(object)
88
cdef object _get_dst_info(object)
99
cdef bint _nat_scalar_rules[6]
10+
cdef bint _check_all_nulls(obj)

pandas/tslib.pyx

+18-1
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
cimport numpy as np
44
from numpy cimport (int8_t, int32_t, int64_t, import_array, ndarray,
55
NPY_INT64, NPY_DATETIME, NPY_TIMEDELTA)
6+
from datetime cimport get_datetime64_value, get_timedelta64_value
67
import numpy as np
78

89
# GH3363
@@ -707,12 +708,28 @@ NaT = NaTType()
707708

708709
iNaT = util.get_nat()
709710

710-
711711
cdef inline bint _checknull_with_nat(object val):
712712
""" utility to check if a value is a nat or not """
713713
return val is None or (
714714
PyFloat_Check(val) and val != val) or val is NaT
715715

716+
cdef inline bint _check_all_nulls(object val):
717+
""" utility to check if a value is any type of null """
718+
cdef bint res
719+
if PyFloat_Check(val):
720+
res = val != val
721+
elif val is NaT:
722+
res = 1
723+
elif val is None:
724+
res = 1
725+
elif is_datetime64_object(val):
726+
res = get_datetime64_value(val) == NPY_NAT
727+
elif is_timedelta64_object(val):
728+
res = get_timedelta64_value(val) == NPY_NAT
729+
else:
730+
res = 0
731+
return res
732+
716733
cdef inline bint _cmp_nat_dt(_NaT lhs, _Timestamp rhs, int op) except -1:
717734
return _nat_scalar_rules[op]
718735

0 commit comments

Comments
 (0)