Skip to content

Replace (private) _checknull with (public) is_nan #22146

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Aug 7, 2018
12 changes: 6 additions & 6 deletions pandas/_libs/hashing.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,14 @@
# at https://github.com/veorq/SipHash

import cython
from cpython cimport PyBytes_Check, PyUnicode_Check
from libc.stdlib cimport malloc, free

import numpy as np
from numpy cimport uint8_t, uint32_t, uint64_t
from numpy cimport uint8_t, uint32_t, uint64_t, import_array
import_array()

from util cimport _checknull
from cpython cimport (PyBytes_Check,
PyUnicode_Check)
from libc.stdlib cimport malloc, free
from util cimport is_nan

DEF cROUNDS = 2
DEF dROUNDS = 4
Expand Down Expand Up @@ -65,7 +65,7 @@ def hash_object_array(object[:] arr, object key, object encoding='utf8'):
data = <bytes>val
elif PyUnicode_Check(val):
data = <bytes>val.encode(encoding)
elif _checknull(val):
elif val is None or is_nan(val):
# null, stringify and encode
data = <bytes>str(val).encode(encoding)

Expand Down
3 changes: 2 additions & 1 deletion pandas/_libs/interval.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ from numpy cimport ndarray


cimport util
util.import_array()

from tslibs import Timestamp
from tslibs.timezones cimport tz_compare
Expand Down Expand Up @@ -391,7 +392,7 @@ cpdef intervals_to_interval_bounds(ndarray intervals,

for i in range(len(intervals)):
interval = intervals[i]
if util._checknull(interval):
if interval is None or util.is_nan(interval):
left[i] = np.nan
right[i] = np.nan
continue
Expand Down
4 changes: 2 additions & 2 deletions pandas/_libs/lib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ from missing cimport checknull

cimport util
cdef int64_t NPY_NAT = util.get_nat()
from util cimport is_array, _checknull
from util cimport is_array, is_nan


def values_from_object(object o):
Expand Down Expand Up @@ -429,7 +429,7 @@ cpdef bint array_equivalent_object(object[:] left, object[:] right):
# we are either not equal or both nan
# I think None == None will be true here
if not (PyObject_RichCompareBool(x, y, Py_EQ) or
_checknull(x) and _checknull(y)):
(x is None or is_nan(x)) and (y is None or is_nan(y))):
return False
return True

Expand Down
10 changes: 5 additions & 5 deletions pandas/_libs/missing.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ cpdef bint checknull(object val):
elif util.is_array(val):
return False
else:
return util._checknull(val)
return val is None or util.is_nan(val)


cpdef bint checknull_old(object val):
Expand Down Expand Up @@ -113,7 +113,7 @@ cpdef bint checknull_old(object val):
elif util.is_array(val):
return False
else:
return util._checknull(val)
return val is None or util.is_nan(val)


cdef inline bint _check_none_nan_inf_neginf(object val):
Expand Down Expand Up @@ -297,7 +297,7 @@ cpdef bint isneginf_scalar(object val):
cdef inline bint is_null_datetime64(v):
# determine if we have a null for a datetime (or integer versions),
# excluding np.timedelta64('nat')
if util._checknull(v):
if v is None or util.is_nan(v):
return True
elif v is NaT:
return True
Expand All @@ -309,7 +309,7 @@ cdef inline bint is_null_datetime64(v):
cdef inline bint is_null_timedelta64(v):
# determine if we have a null for a timedelta (or integer versions),
# excluding np.datetime64('nat')
if util._checknull(v):
if v is None or util.is_nan(v):
return True
elif v is NaT:
return True
Expand All @@ -321,7 +321,7 @@ cdef inline bint is_null_timedelta64(v):
cdef inline bint is_null_period(v):
# determine if we have a null for a Period (or integer versions),
# excluding np.datetime64('nat') and np.timedelta64('nat')
if util._checknull(v):
if v is None or util.is_nan(v):
return True
elif v is NaT:
return True
Expand Down
13 changes: 7 additions & 6 deletions pandas/_libs/ops.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,11 @@ cimport cython
from cython cimport Py_ssize_t

import numpy as np
from numpy cimport ndarray, uint8_t
from numpy cimport ndarray, uint8_t, import_array
import_array()


from util cimport UINT8_MAX, _checknull
from util cimport UINT8_MAX, is_nan

from missing cimport checknull

Expand Down Expand Up @@ -190,13 +191,13 @@ def scalar_binop(ndarray[object] values, object val, object op):
object x

result = np.empty(n, dtype=object)
if _checknull(val):
if val is None or is_nan(val):
result.fill(val)
return result

for i in range(n):
x = values[i]
if _checknull(x):
if x is None or is_nan(x):
result[i] = x
else:
result[i] = op(x, val)
Expand Down Expand Up @@ -237,9 +238,9 @@ def vec_binop(ndarray[object] left, ndarray[object] right, object op):
try:
result[i] = op(x, y)
except TypeError:
if _checknull(x):
if x is None or is_nan(x):
result[i] = x
elif _checknull(y):
elif y is None or is_nan(y):
result[i] = y
else:
raise
Expand Down
9 changes: 5 additions & 4 deletions pandas/_libs/parsers.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ from pandas.core.dtypes.common import (
pandas_dtype)
from pandas.core.arrays import Categorical
from pandas.core.dtypes.concat import union_categoricals
import pandas.io.common as com
import pandas.io.common as icom

from pandas.errors import (ParserError, DtypeWarning,
EmptyDataError, ParserWarning)
Expand Down Expand Up @@ -665,7 +665,8 @@ cdef class TextReader:
if b'utf-16' in (self.encoding or b''):
# we need to read utf-16 through UTF8Recoder.
# if source is utf-16, convert source to utf-8 by UTF8Recoder.
source = com.UTF8Recoder(source, self.encoding.decode('utf-8'))
source = icom.UTF8Recoder(source,
self.encoding.decode('utf-8'))
self.encoding = b'utf-8'
self.c_encoding = <char*> self.encoding

Expand Down Expand Up @@ -1356,7 +1357,7 @@ cdef asbytes(object o):
# common NA values
# no longer excluding inf representations
# '1.#INF','-1.#INF', '1.#INF000000',
_NA_VALUES = _ensure_encoded(list(com._NA_VALUES))
_NA_VALUES = _ensure_encoded(list(icom._NA_VALUES))


def _maybe_upcast(arr):
Expand Down Expand Up @@ -2247,7 +2248,7 @@ def sanitize_objects(ndarray[object] values, set na_values,
n = len(values)
onan = np.nan

for i from 0 <= i < n:
for i in range(n):
val = values[i]
if (convert_empty and val == '') or (val in na_values):
values[i] = onan
Expand Down
5 changes: 3 additions & 2 deletions pandas/_libs/skiplist.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
# Link: http://code.activestate.com/recipes/576930/

# Cython version: Wes McKinney
from random import random

from libc.math cimport log

Expand All @@ -17,8 +18,6 @@ cdef double Log2(double x):
return log(x) / log(2.)


from random import random

# TODO: optimize this, make less messy

cdef class Node:
Expand All @@ -32,9 +31,11 @@ cdef class Node:
self.next = next
self.width = width


# Singleton terminator node
NIL = Node(np.inf, [], [])


cdef class IndexableSkiplist:
"""
Sorted collection supporting O(lg n) insertion, removal, and
Expand Down
8 changes: 4 additions & 4 deletions pandas/_libs/src/inference.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -393,7 +393,7 @@ def infer_dtype(object value, bint skipna=False):

# do not use is_nul_datetimelike to keep
# np.datetime64('nat') and np.timedelta64('nat')
if util._checknull(val):
if val is None or util.is_nan(val):
pass
elif val is NaT:
seen_pdnat = True
Expand Down Expand Up @@ -522,7 +522,7 @@ cpdef object infer_datetimelike_array(object arr):
if len(objs) == 3:
break

elif util._checknull(v):
elif v is None or util.is_nan(v):
# nan or None
pass
elif v is NaT:
Expand Down Expand Up @@ -660,7 +660,7 @@ cdef class Validator:
)

cdef bint is_valid_null(self, object value) except -1:
return util._checknull(value)
return value is None or util.is_nan(value)

cdef bint is_array_typed(self) except -1:
return False
Expand Down Expand Up @@ -828,7 +828,7 @@ cdef class TemporalValidator(Validator):
cdef inline bint is_valid_skipna(self, object value) except -1:
cdef:
bint is_typed_null = self.is_valid_null(value)
bint is_generic_null = util._checknull(value)
bint is_generic_null = value is None or util.is_nan(value)
self.generic_null_count += is_typed_null and is_generic_null
return self.is_value_typed(value) or is_typed_null or is_generic_null

Expand Down
5 changes: 2 additions & 3 deletions pandas/_libs/tslibs/nattype.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -586,8 +586,7 @@ NaT = NaTType()

cdef inline bint checknull_with_nat(object val):
""" utility to check if a value is a nat or not """
return val is None or (
PyFloat_Check(val) and val != val) or val is NaT
return val is None or util.is_nan(val) or val is NaT


cdef inline bint is_null_datetimelike(object val):
Expand All @@ -602,7 +601,7 @@ cdef inline bint is_null_datetimelike(object val):
-------
null_datetimelike : bool
"""
if util._checknull(val):
if val is None or util.is_nan(val):
return True
elif val is NaT:
return True
Expand Down
2 changes: 1 addition & 1 deletion pandas/_libs/tslibs/timedeltas.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -1368,7 +1368,7 @@ class Timedelta(_Timedelta):
'{op}'.format(dtype=other.dtype,
op='__floordiv__'))

elif is_float_object(other) and util._checknull(other):
elif is_float_object(other) and util.is_nan(other):
# i.e. np.nan
return NotImplemented

Expand Down
15 changes: 13 additions & 2 deletions pandas/_libs/tslibs/util.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -228,5 +228,16 @@ cdef inline bint is_offset_object(object val):
return getattr(val, '_typ', None) == "dateoffset"


cdef inline bint _checknull(object val):
return val is None or (PyFloat_Check(val) and val != val)
cdef inline bint is_nan(object val):
"""
Check if val is a Not-A-Number float, including float('NaN') and np.nan.

Parameters
----------
val : object

Returns
-------
is_nan : bool
"""
return is_float_object(val) and val != val
2 changes: 1 addition & 1 deletion pandas/_libs/writers.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ def string_array_replace_from_nan_rep(
if replace is None:
replace = np.nan

for i from 0 <= i < length:
for i in range(length):
if arr[i] == nan_rep:
arr[i] = replace

Expand Down