diff --git a/asv_bench/benchmarks/frame_methods.py b/asv_bench/benchmarks/frame_methods.py index 2c07c28066faf..9367c42f8d39a 100644 --- a/asv_bench/benchmarks/frame_methods.py +++ b/asv_bench/benchmarks/frame_methods.py @@ -582,7 +582,7 @@ def time_frame_interpolate_some_good_infer(self): self.df.interpolate(downcast='infer') -class frame_isnull(object): +class frame_isnull_floats_no_null(object): goal_time = 0.2 def setup(self): @@ -593,6 +593,33 @@ def time_frame_isnull(self): isnull(self.df) +class frame_isnull_floats(object): + goal_time = 0.2 + + def setup(self): + np.random.seed(1234) + self.sample = np.array([np.nan, 1.0]) + self.data = np.random.choice(self.sample, (1000, 1000)) + self.df = DataFrame(self.data) + + def time_frame_isnull(self): + isnull(self.df) + + +class frame_isnull_obj(object): + goal_time = 0.2 + + def setup(self): + np.random.seed(1234) + self.sample = np.array([NaT, np.nan, None, np.datetime64('NaT'), + np.timedelta64('NaT'), 0, 1, 2.0, '', 'abcd']) + self.data = np.random.choice(self.sample, (1000, 1000)) + self.df = DataFrame(self.data) + + def time_frame_isnull(self): + isnull(self.df) + + class frame_iteritems(object): goal_time = 0.2 diff --git a/doc/source/whatsnew/v0.17.1.txt b/doc/source/whatsnew/v0.17.1.txt index f2008727017f8..f8801cfdf9785 100755 --- a/doc/source/whatsnew/v0.17.1.txt +++ b/doc/source/whatsnew/v0.17.1.txt @@ -156,6 +156,7 @@ Bug Fixes - Bug in output formatting when using an index of ambiguous times (:issue:`11619`) - Bug in comparisons of Series vs list-likes (:issue:`11339`) - Bug in ``DataFrame.replace`` with a ``datetime64[ns, tz]`` and a non-compat to_replace (:issue:`11326`, :issue:`11153`) +- Bug in ``isnull`` where ``numpy.datetime64('NaT')`` in a ``numpy.array`` was not determined to be null(:issue:`11206`) - Bug in list-like indexing with a mixed-integer Index (:issue:`11320`) - Bug in ``pivot_table`` with ``margins=True`` when indexes are of ``Categorical`` dtype (:issue:`10993`) - Bug in ``DataFrame.plot`` cannot use hex strings colors (:issue:`10299`) diff --git a/pandas/lib.pyx b/pandas/lib.pyx index 1a1f04cba1cb9..f7978c4791538 100644 --- a/pandas/lib.pyx +++ b/pandas/lib.pyx @@ -1,3 +1,4 @@ +# cython: profile=False cimport numpy as np cimport cython import numpy as np @@ -54,7 +55,8 @@ from datetime import datetime as pydatetime # this is our tseries.pxd from datetime cimport * -from tslib cimport convert_to_tsobject, convert_to_timedelta64 +from tslib cimport (convert_to_tsobject, convert_to_timedelta64, + _check_all_nulls) import tslib from tslib import NaT, Timestamp, Timedelta @@ -245,8 +247,6 @@ def time64_to_datetime(ndarray[int64_t, ndim=1] arr): return result -cdef inline int64_t get_timedelta64_value(val): - return val.view('i8') #---------------------------------------------------------------------- # isnull / notnull related @@ -346,10 +346,10 @@ def isnullobj(ndarray[object] arr): cdef ndarray[uint8_t] result n = len(arr) - result = np.zeros(n, dtype=np.uint8) + result = np.empty(n, dtype=np.uint8) for i from 0 <= i < n: val = arr[i] - result[i] = val is NaT or _checknull(val) + result[i] = _check_all_nulls(val) return result.view(np.bool_) @cython.wraparound(False) diff --git a/pandas/src/datetime.pxd b/pandas/src/datetime.pxd index f2f764c785894..5f7de8244d17e 100644 --- a/pandas/src/datetime.pxd +++ b/pandas/src/datetime.pxd @@ -1,3 +1,4 @@ +# cython: profile=False from numpy cimport int64_t, int32_t, npy_int64, npy_int32, ndarray from cpython cimport PyObject @@ -59,6 +60,7 @@ cdef extern from "numpy/ndarrayobject.h": cdef extern from "numpy_helper.h": npy_datetime get_datetime64_value(object o) + npy_timedelta get_timedelta64_value(object o) cdef extern from "numpy/npy_common.h": diff --git a/pandas/src/numpy_helper.h b/pandas/src/numpy_helper.h index 8b79bbe79ff2f..9f406890c4e68 100644 --- a/pandas/src/numpy_helper.h +++ b/pandas/src/numpy_helper.h @@ -40,7 +40,11 @@ get_nat(void) { PANDAS_INLINE npy_datetime get_datetime64_value(PyObject* obj) { return ((PyDatetimeScalarObject*) obj)->obval; +} +PANDAS_INLINE npy_timedelta +get_timedelta64_value(PyObject* obj) { + return ((PyTimedeltaScalarObject*) obj)->obval; } PANDAS_INLINE int diff --git a/pandas/tests/test_common.py b/pandas/tests/test_common.py index 89826209fa46d..57448e2d018dc 100644 --- a/pandas/tests/test_common.py +++ b/pandas/tests/test_common.py @@ -179,6 +179,13 @@ def test_isnull_nat(): exp = np.array([True]) assert(np.array_equal(result, exp)) +def test_isnull_numpy_nat(): + arr = np.array([NaT, np.datetime64('NaT'), np.timedelta64('NaT'), + np.datetime64('NaT', 's')]) + result = isnull(arr) + expected = np.array([True] * 4) + tm.assert_numpy_array_equal(result, expected) + def test_isnull_datetime(): assert (not isnull(datetime.now())) assert notnull(datetime.now()) diff --git a/pandas/tslib.pxd b/pandas/tslib.pxd index 3cb7e94c65100..5e0c88604206c 100644 --- a/pandas/tslib.pxd +++ b/pandas/tslib.pxd @@ -7,3 +7,4 @@ cdef bint _is_utc(object) cdef bint _is_tzlocal(object) cdef object _get_dst_info(object) cdef bint _nat_scalar_rules[6] +cdef bint _check_all_nulls(obj) diff --git a/pandas/tslib.pyx b/pandas/tslib.pyx index 0d47c2526df14..713cf08bfc3e2 100644 --- a/pandas/tslib.pyx +++ b/pandas/tslib.pyx @@ -3,6 +3,7 @@ cimport numpy as np from numpy cimport (int8_t, int32_t, int64_t, import_array, ndarray, NPY_INT64, NPY_DATETIME, NPY_TIMEDELTA) +from datetime cimport get_datetime64_value, get_timedelta64_value import numpy as np # GH3363 @@ -707,12 +708,28 @@ NaT = NaTType() iNaT = util.get_nat() - cdef inline bint _checknull_with_nat(object val): """ utility to check if a value is a nat or not """ return val is None or ( PyFloat_Check(val) and val != val) or val is NaT +cdef inline bint _check_all_nulls(object val): + """ utility to check if a value is any type of null """ + cdef bint res + if PyFloat_Check(val): + res = val != val + elif val is NaT: + res = 1 + elif val is None: + res = 1 + elif is_datetime64_object(val): + res = get_datetime64_value(val) == NPY_NAT + elif is_timedelta64_object(val): + res = get_timedelta64_value(val) == NPY_NAT + else: + res = 0 + return res + cdef inline bint _cmp_nat_dt(_NaT lhs, _Timestamp rhs, int op) except -1: return _nat_scalar_rules[op]