pandas-dev · jbrockmendel · Aug 7, 2018 · Jul 31, 2018 · Jul 31, 2018 · Jul 31, 2018
diff --git a/pandas/_libs/hashing.pyx b/pandas/_libs/hashing.pyx
@@ -3,14 +3,14 @@
 # at https://github.com/veorq/SipHash
 
 import cython
+from cpython cimport PyBytes_Check, PyUnicode_Check
+from libc.stdlib cimport malloc, free
 
 import numpy as np
-from numpy cimport uint8_t, uint32_t, uint64_t
+from numpy cimport uint8_t, uint32_t, uint64_t, import_array
+import_array()
 
-from util cimport _checknull
-from cpython cimport (PyBytes_Check,
-                      PyUnicode_Check)
-from libc.stdlib cimport malloc, free
+from util cimport is_nan
 
 DEF cROUNDS = 2
 DEF dROUNDS = 4
@@ -65,7 +65,7 @@ def hash_object_array(object[:] arr, object key, object encoding='utf8'):
             data = <bytes>val
         elif PyUnicode_Check(val):
             data = <bytes>val.encode(encoding)
-        elif _checknull(val):
+        elif val is None or is_nan(val):
             # null, stringify and encode
             data = <bytes>str(val).encode(encoding)
 

diff --git a/pandas/_libs/interval.pyx b/pandas/_libs/interval.pyx
@@ -12,6 +12,7 @@ from numpy cimport ndarray
 
 
 cimport util
+util.import_array()
 
 from tslibs import Timestamp
 from tslibs.timezones cimport tz_compare
@@ -391,7 +392,7 @@ cpdef intervals_to_interval_bounds(ndarray intervals,
 
     for i in range(len(intervals)):
         interval = intervals[i]
-        if util._checknull(interval):
+        if interval is None or util.is_nan(interval):
             left[i] = np.nan
             right[i] = np.nan
             continue

diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
@@ -34,7 +34,7 @@ from missing cimport checknull
 
 cimport util
 cdef int64_t NPY_NAT = util.get_nat()
-from util cimport is_array, _checknull
+from util cimport is_array, is_nan
 
 
 def values_from_object(object o):
@@ -429,7 +429,7 @@ cpdef bint array_equivalent_object(object[:] left, object[:] right):
         # we are either not equal or both nan
         # I think None == None will be true here
         if not (PyObject_RichCompareBool(x, y, Py_EQ) or
-                _checknull(x) and _checknull(y)):
+                (x is None or is_nan(x)) and (y is None or is_nan(y))):
             return False
     return True
 

diff --git a/pandas/_libs/missing.pyx b/pandas/_libs/missing.pyx
@@ -74,7 +74,7 @@ cpdef bint checknull(object val):
     elif util.is_array(val):
         return False
     else:
-        return util._checknull(val)
+        return val is None or util.is_nan(val)
 
 
 cpdef bint checknull_old(object val):
@@ -113,7 +113,7 @@ cpdef bint checknull_old(object val):
     elif util.is_array(val):
         return False
     else:
-        return util._checknull(val)
+        return val is None or util.is_nan(val)
 
 
 cdef inline bint _check_none_nan_inf_neginf(object val):
@@ -297,7 +297,7 @@ cpdef bint isneginf_scalar(object val):
 cdef inline bint is_null_datetime64(v):
     # determine if we have a null for a datetime (or integer versions),
     # excluding np.timedelta64('nat')
-    if util._checknull(v):
+    if v is None or util.is_nan(v):
         return True
     elif v is NaT:
         return True
@@ -309,7 +309,7 @@ cdef inline bint is_null_datetime64(v):
 cdef inline bint is_null_timedelta64(v):
     # determine if we have a null for a timedelta (or integer versions),
     # excluding np.datetime64('nat')
-    if util._checknull(v):
+    if v is None or util.is_nan(v):
         return True
     elif v is NaT:
         return True
@@ -321,7 +321,7 @@ cdef inline bint is_null_timedelta64(v):
 cdef inline bint is_null_period(v):
     # determine if we have a null for a Period (or integer versions),
     # excluding np.datetime64('nat') and np.timedelta64('nat')
-    if util._checknull(v):
+    if v is None or util.is_nan(v):
         return True
     elif v is NaT:
         return True

diff --git a/pandas/_libs/ops.pyx b/pandas/_libs/ops.pyx
@@ -10,10 +10,11 @@ cimport cython
 from cython cimport Py_ssize_t
 
 import numpy as np
-from numpy cimport ndarray, uint8_t
+from numpy cimport ndarray, uint8_t, import_array
+import_array()
 
 
-from util cimport UINT8_MAX, _checknull
+from util cimport UINT8_MAX, is_nan
 
 from missing cimport checknull
 
@@ -190,13 +191,13 @@ def scalar_binop(ndarray[object] values, object val, object op):
         object x
 
     result = np.empty(n, dtype=object)
-    if _checknull(val):
+    if val is None or is_nan(val):
         result.fill(val)
         return result
 
     for i in range(n):
         x = values[i]
-        if _checknull(x):
+        if x is None or is_nan(x):
             result[i] = x
         else:
             result[i] = op(x, val)
@@ -237,9 +238,9 @@ def vec_binop(ndarray[object] left, ndarray[object] right, object op):
         try:
             result[i] = op(x, y)
         except TypeError:
-            if _checknull(x):
+            if x is None or is_nan(x):
                 result[i] = x
-            elif _checknull(y):
+            elif y is None or is_nan(y):
                 result[i] = y
             else:
                 raise

diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx
@@ -53,7 +53,7 @@ from pandas.core.dtypes.common import (
     pandas_dtype)
 from pandas.core.arrays import Categorical
 from pandas.core.dtypes.concat import union_categoricals
-import pandas.io.common as com
+import pandas.io.common as icom
 
 from pandas.errors import (ParserError, DtypeWarning,
                            EmptyDataError, ParserWarning)
@@ -665,7 +665,8 @@ cdef class TextReader:
             if b'utf-16' in (self.encoding or b''):
                 # we need to read utf-16 through UTF8Recoder.
                 # if source is utf-16, convert source to utf-8 by UTF8Recoder.
-                source = com.UTF8Recoder(source, self.encoding.decode('utf-8'))
+                source = icom.UTF8Recoder(source,
+                                          self.encoding.decode('utf-8'))
                 self.encoding = b'utf-8'
                 self.c_encoding = <char*> self.encoding
 
@@ -1356,7 +1357,7 @@ cdef asbytes(object o):
 # common NA values
 # no longer excluding inf representations
 # '1.#INF','-1.#INF', '1.#INF000000',
-_NA_VALUES = _ensure_encoded(list(com._NA_VALUES))
+_NA_VALUES = _ensure_encoded(list(icom._NA_VALUES))
 
 
 def _maybe_upcast(arr):
@@ -2247,7 +2248,7 @@ def sanitize_objects(ndarray[object] values, set na_values,
     n = len(values)
     onan = np.nan
 
-    for i from 0 <= i < n:
+    for i in range(n):
         val = values[i]
         if (convert_empty and val == '') or (val in na_values):
             values[i] = onan

diff --git a/pandas/_libs/skiplist.pyx b/pandas/_libs/skiplist.pyx
@@ -5,6 +5,7 @@
 # Link: http://code.activestate.com/recipes/576930/
 
 # Cython version: Wes McKinney
+from random import random
 
 from libc.math cimport log
 
@@ -17,8 +18,6 @@ cdef double Log2(double x):
     return log(x) / log(2.)
 
 
-from random import random
-
 # TODO: optimize this, make less messy
 
 cdef class Node:
@@ -32,9 +31,11 @@ cdef class Node:
         self.next = next
         self.width = width
 
+
 # Singleton terminator node
 NIL = Node(np.inf, [], [])
 
+
 cdef class IndexableSkiplist:
     """
     Sorted collection supporting O(lg n) insertion, removal, and

diff --git a/pandas/_libs/src/inference.pyx b/pandas/_libs/src/inference.pyx
@@ -393,7 +393,7 @@ def infer_dtype(object value, bint skipna=False):
 
         # do not use is_nul_datetimelike to keep
         # np.datetime64('nat') and np.timedelta64('nat')
-        if util._checknull(val):
+        if val is None or util.is_nan(val):
             pass
         elif val is NaT:
             seen_pdnat = True
@@ -522,7 +522,7 @@ cpdef object infer_datetimelike_array(object arr):
             if len(objs) == 3:
                 break
 
-        elif util._checknull(v):
+        elif v is None or util.is_nan(v):
             # nan or None
             pass
         elif v is NaT:
@@ -660,7 +660,7 @@ cdef class Validator:
         )
 
     cdef bint is_valid_null(self, object value) except -1:
-        return util._checknull(value)
+        return value is None or util.is_nan(value)
 
     cdef bint is_array_typed(self) except -1:
         return False
@@ -828,7 +828,7 @@ cdef class TemporalValidator(Validator):
     cdef inline bint is_valid_skipna(self, object value) except -1:
         cdef:
             bint is_typed_null = self.is_valid_null(value)
-            bint is_generic_null = util._checknull(value)
+            bint is_generic_null = value is None or util.is_nan(value)
         self.generic_null_count += is_typed_null and is_generic_null
         return self.is_value_typed(value) or is_typed_null or is_generic_null
 

diff --git a/pandas/_libs/tslibs/nattype.pyx b/pandas/_libs/tslibs/nattype.pyx
@@ -586,8 +586,7 @@ NaT = NaTType()
 
 cdef inline bint checknull_with_nat(object val):
     """ utility to check if a value is a nat or not """
-    return val is None or (
-        PyFloat_Check(val) and val != val) or val is NaT
+    return val is None or util.is_nan(val) or val is NaT
 
 
 cdef inline bint is_null_datetimelike(object val):
@@ -602,7 +601,7 @@ cdef inline bint is_null_datetimelike(object val):
     -------
     null_datetimelike : bool
     """
-    if util._checknull(val):
+    if val is None or util.is_nan(val):
         return True
     elif val is NaT:
         return True

diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx
@@ -1368,7 +1368,7 @@ class Timedelta(_Timedelta):
                             '{op}'.format(dtype=other.dtype,
                                           op='__floordiv__'))
 
-        elif is_float_object(other) and util._checknull(other):
+        elif is_float_object(other) and util.is_nan(other):
             # i.e. np.nan
             return NotImplemented
 

diff --git a/pandas/_libs/tslibs/util.pxd b/pandas/_libs/tslibs/util.pxd
@@ -228,5 +228,16 @@ cdef inline bint is_offset_object(object val):
     return getattr(val, '_typ', None) == "dateoffset"
 
 
-cdef inline bint _checknull(object val):
-    return val is None or (PyFloat_Check(val) and val != val)
+cdef inline bint is_nan(object val):
+    """
+    Check if val is a Not-A-Number float, including float('NaN') and np.nan.
+
+    Parameters
+    ----------
+    val : object
+
+    Returns
+    -------
+    is_nan : bool
+    """
+    return is_float_object(val) and val != val
diff --git a/pandas/_libs/writers.pyx b/pandas/_libs/writers.pyx
@@ -163,7 +163,7 @@ def string_array_replace_from_nan_rep(
     if replace is None:
         replace = np.nan
 
-    for i from 0 <= i < length:
+    for i in range(length):
         if arr[i] == nan_rep:
             arr[i] = replace