Skip to content

Commit 0041681

Browse files
authored
Replace (private) _checknull with (public) is_nan (#22146)
* make public is_nan instead of private _checknull * cleanup non-py syntax * fixup missed usage * Cleanup import and whitespace * have is_nan check for np.float_ * fix segfaults
1 parent 429ff1c commit 0041681

12 files changed

+51
-37
lines changed

pandas/_libs/hashing.pyx

+6-6
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,14 @@
33
# at https://github.com/veorq/SipHash
44

55
import cython
6+
from cpython cimport PyBytes_Check, PyUnicode_Check
7+
from libc.stdlib cimport malloc, free
68

79
import numpy as np
8-
from numpy cimport uint8_t, uint32_t, uint64_t
10+
from numpy cimport uint8_t, uint32_t, uint64_t, import_array
11+
import_array()
912

10-
from util cimport _checknull
11-
from cpython cimport (PyBytes_Check,
12-
PyUnicode_Check)
13-
from libc.stdlib cimport malloc, free
13+
from util cimport is_nan
1414

1515
DEF cROUNDS = 2
1616
DEF dROUNDS = 4
@@ -65,7 +65,7 @@ def hash_object_array(object[:] arr, object key, object encoding='utf8'):
6565
data = <bytes>val
6666
elif PyUnicode_Check(val):
6767
data = <bytes>val.encode(encoding)
68-
elif _checknull(val):
68+
elif val is None or is_nan(val):
6969
# null, stringify and encode
7070
data = <bytes>str(val).encode(encoding)
7171

pandas/_libs/interval.pyx

+2-1
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ from numpy cimport ndarray
1212

1313

1414
cimport util
15+
util.import_array()
1516

1617
from tslibs import Timestamp
1718
from tslibs.timezones cimport tz_compare
@@ -391,7 +392,7 @@ cpdef intervals_to_interval_bounds(ndarray intervals,
391392

392393
for i in range(len(intervals)):
393394
interval = intervals[i]
394-
if util._checknull(interval):
395+
if interval is None or util.is_nan(interval):
395396
left[i] = np.nan
396397
right[i] = np.nan
397398
continue

pandas/_libs/lib.pyx

+2-2
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ from missing cimport checknull
3434

3535
cimport util
3636
cdef int64_t NPY_NAT = util.get_nat()
37-
from util cimport is_array, _checknull
37+
from util cimport is_array, is_nan
3838

3939

4040
def values_from_object(object o):
@@ -429,7 +429,7 @@ cpdef bint array_equivalent_object(object[:] left, object[:] right):
429429
# we are either not equal or both nan
430430
# I think None == None will be true here
431431
if not (PyObject_RichCompareBool(x, y, Py_EQ) or
432-
_checknull(x) and _checknull(y)):
432+
(x is None or is_nan(x)) and (y is None or is_nan(y))):
433433
return False
434434
return True
435435

pandas/_libs/missing.pyx

+5-5
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ cpdef bint checknull(object val):
7474
elif util.is_array(val):
7575
return False
7676
else:
77-
return util._checknull(val)
77+
return val is None or util.is_nan(val)
7878

7979

8080
cpdef bint checknull_old(object val):
@@ -113,7 +113,7 @@ cpdef bint checknull_old(object val):
113113
elif util.is_array(val):
114114
return False
115115
else:
116-
return util._checknull(val)
116+
return val is None or util.is_nan(val)
117117

118118

119119
cdef inline bint _check_none_nan_inf_neginf(object val):
@@ -297,7 +297,7 @@ cpdef bint isneginf_scalar(object val):
297297
cdef inline bint is_null_datetime64(v):
298298
# determine if we have a null for a datetime (or integer versions),
299299
# excluding np.timedelta64('nat')
300-
if util._checknull(v):
300+
if v is None or util.is_nan(v):
301301
return True
302302
elif v is NaT:
303303
return True
@@ -309,7 +309,7 @@ cdef inline bint is_null_datetime64(v):
309309
cdef inline bint is_null_timedelta64(v):
310310
# determine if we have a null for a timedelta (or integer versions),
311311
# excluding np.datetime64('nat')
312-
if util._checknull(v):
312+
if v is None or util.is_nan(v):
313313
return True
314314
elif v is NaT:
315315
return True
@@ -321,7 +321,7 @@ cdef inline bint is_null_timedelta64(v):
321321
cdef inline bint is_null_period(v):
322322
# determine if we have a null for a Period (or integer versions),
323323
# excluding np.datetime64('nat') and np.timedelta64('nat')
324-
if util._checknull(v):
324+
if v is None or util.is_nan(v):
325325
return True
326326
elif v is NaT:
327327
return True

pandas/_libs/ops.pyx

+7-6
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,11 @@ cimport cython
1010
from cython cimport Py_ssize_t
1111

1212
import numpy as np
13-
from numpy cimport ndarray, uint8_t
13+
from numpy cimport ndarray, uint8_t, import_array
14+
import_array()
1415

1516

16-
from util cimport UINT8_MAX, _checknull
17+
from util cimport UINT8_MAX, is_nan
1718

1819
from missing cimport checknull
1920

@@ -190,13 +191,13 @@ def scalar_binop(ndarray[object] values, object val, object op):
190191
object x
191192

192193
result = np.empty(n, dtype=object)
193-
if _checknull(val):
194+
if val is None or is_nan(val):
194195
result.fill(val)
195196
return result
196197

197198
for i in range(n):
198199
x = values[i]
199-
if _checknull(x):
200+
if x is None or is_nan(x):
200201
result[i] = x
201202
else:
202203
result[i] = op(x, val)
@@ -237,9 +238,9 @@ def vec_binop(ndarray[object] left, ndarray[object] right, object op):
237238
try:
238239
result[i] = op(x, y)
239240
except TypeError:
240-
if _checknull(x):
241+
if x is None or is_nan(x):
241242
result[i] = x
242-
elif _checknull(y):
243+
elif y is None or is_nan(y):
243244
result[i] = y
244245
else:
245246
raise

pandas/_libs/parsers.pyx

+5-4
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ from pandas.core.dtypes.common import (
5353
pandas_dtype)
5454
from pandas.core.arrays import Categorical
5555
from pandas.core.dtypes.concat import union_categoricals
56-
import pandas.io.common as com
56+
import pandas.io.common as icom
5757

5858
from pandas.errors import (ParserError, DtypeWarning,
5959
EmptyDataError, ParserWarning)
@@ -665,7 +665,8 @@ cdef class TextReader:
665665
if b'utf-16' in (self.encoding or b''):
666666
# we need to read utf-16 through UTF8Recoder.
667667
# if source is utf-16, convert source to utf-8 by UTF8Recoder.
668-
source = com.UTF8Recoder(source, self.encoding.decode('utf-8'))
668+
source = icom.UTF8Recoder(source,
669+
self.encoding.decode('utf-8'))
669670
self.encoding = b'utf-8'
670671
self.c_encoding = <char*> self.encoding
671672

@@ -1356,7 +1357,7 @@ cdef asbytes(object o):
13561357
# common NA values
13571358
# no longer excluding inf representations
13581359
# '1.#INF','-1.#INF', '1.#INF000000',
1359-
_NA_VALUES = _ensure_encoded(list(com._NA_VALUES))
1360+
_NA_VALUES = _ensure_encoded(list(icom._NA_VALUES))
13601361

13611362

13621363
def _maybe_upcast(arr):
@@ -2247,7 +2248,7 @@ def sanitize_objects(ndarray[object] values, set na_values,
22472248
n = len(values)
22482249
onan = np.nan
22492250

2250-
for i from 0 <= i < n:
2251+
for i in range(n):
22512252
val = values[i]
22522253
if (convert_empty and val == '') or (val in na_values):
22532254
values[i] = onan

pandas/_libs/skiplist.pyx

+3-2
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
# Link: http://code.activestate.com/recipes/576930/
66

77
# Cython version: Wes McKinney
8+
from random import random
89

910
from libc.math cimport log
1011

@@ -17,8 +18,6 @@ cdef double Log2(double x):
1718
return log(x) / log(2.)
1819

1920

20-
from random import random
21-
2221
# TODO: optimize this, make less messy
2322

2423
cdef class Node:
@@ -32,9 +31,11 @@ cdef class Node:
3231
self.next = next
3332
self.width = width
3433

34+
3535
# Singleton terminator node
3636
NIL = Node(np.inf, [], [])
3737

38+
3839
cdef class IndexableSkiplist:
3940
"""
4041
Sorted collection supporting O(lg n) insertion, removal, and

pandas/_libs/src/inference.pyx

+4-4
Original file line numberDiff line numberDiff line change
@@ -393,7 +393,7 @@ def infer_dtype(object value, bint skipna=False):
393393

394394
# do not use is_nul_datetimelike to keep
395395
# np.datetime64('nat') and np.timedelta64('nat')
396-
if util._checknull(val):
396+
if val is None or util.is_nan(val):
397397
pass
398398
elif val is NaT:
399399
seen_pdnat = True
@@ -522,7 +522,7 @@ cpdef object infer_datetimelike_array(object arr):
522522
if len(objs) == 3:
523523
break
524524

525-
elif util._checknull(v):
525+
elif v is None or util.is_nan(v):
526526
# nan or None
527527
pass
528528
elif v is NaT:
@@ -660,7 +660,7 @@ cdef class Validator:
660660
)
661661

662662
cdef bint is_valid_null(self, object value) except -1:
663-
return util._checknull(value)
663+
return value is None or util.is_nan(value)
664664

665665
cdef bint is_array_typed(self) except -1:
666666
return False
@@ -828,7 +828,7 @@ cdef class TemporalValidator(Validator):
828828
cdef inline bint is_valid_skipna(self, object value) except -1:
829829
cdef:
830830
bint is_typed_null = self.is_valid_null(value)
831-
bint is_generic_null = util._checknull(value)
831+
bint is_generic_null = value is None or util.is_nan(value)
832832
self.generic_null_count += is_typed_null and is_generic_null
833833
return self.is_value_typed(value) or is_typed_null or is_generic_null
834834

pandas/_libs/tslibs/nattype.pyx

+2-3
Original file line numberDiff line numberDiff line change
@@ -586,8 +586,7 @@ NaT = NaTType()
586586

587587
cdef inline bint checknull_with_nat(object val):
588588
""" utility to check if a value is a nat or not """
589-
return val is None or (
590-
PyFloat_Check(val) and val != val) or val is NaT
589+
return val is None or util.is_nan(val) or val is NaT
591590

592591

593592
cdef inline bint is_null_datetimelike(object val):
@@ -602,7 +601,7 @@ cdef inline bint is_null_datetimelike(object val):
602601
-------
603602
null_datetimelike : bool
604603
"""
605-
if util._checknull(val):
604+
if val is None or util.is_nan(val):
606605
return True
607606
elif val is NaT:
608607
return True

pandas/_libs/tslibs/timedeltas.pyx

+1-1
Original file line numberDiff line numberDiff line change
@@ -1369,7 +1369,7 @@ class Timedelta(_Timedelta):
13691369
'{op}'.format(dtype=other.dtype,
13701370
op='__floordiv__'))
13711371

1372-
elif is_float_object(other) and util._checknull(other):
1372+
elif is_float_object(other) and util.is_nan(other):
13731373
# i.e. np.nan
13741374
return NotImplemented
13751375

pandas/_libs/tslibs/util.pxd

+13-2
Original file line numberDiff line numberDiff line change
@@ -228,5 +228,16 @@ cdef inline bint is_offset_object(object val):
228228
return getattr(val, '_typ', None) == "dateoffset"
229229

230230

231-
cdef inline bint _checknull(object val):
232-
return val is None or (PyFloat_Check(val) and val != val)
231+
cdef inline bint is_nan(object val):
232+
"""
233+
Check if val is a Not-A-Number float, including float('NaN') and np.nan.
234+
235+
Parameters
236+
----------
237+
val : object
238+
239+
Returns
240+
-------
241+
is_nan : bool
242+
"""
243+
return is_float_object(val) and val != val

pandas/_libs/writers.pyx

+1-1
Original file line numberDiff line numberDiff line change
@@ -163,7 +163,7 @@ def string_array_replace_from_nan_rep(
163163
if replace is None:
164164
replace = np.nan
165165

166-
for i from 0 <= i < length:
166+
for i in range(length):
167167
if arr[i] == nan_rep:
168168
arr[i] = replace
169169

0 commit comments

Comments
 (0)