Skip to content

Commit 7b79b66

Browse files
committed
ENH: add method use_inf_as_null to core.common (GH pandas-dev#1919)
1 parent fe12c8a commit 7b79b66

File tree

4 files changed

+132
-1
lines changed

4 files changed

+132
-1
lines changed

pandas/core/common.py

+76
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,58 @@ def isnull(obj):
6565
return _isnull_ndarraylike(obj)
6666
else:
6767
return obj is None
68+
isnull_new = isnull
69+
70+
def isnull_old(obj):
71+
'''
72+
Replacement for numpy.isnan / -numpy.isfinite which is suitable
73+
for use on object arrays. Treat None, NaN, INF, -INF as null.
74+
75+
Parameters
76+
----------
77+
arr: ndarray or object value
78+
79+
Returns
80+
-------
81+
boolean ndarray or boolean
82+
'''
83+
if lib.isscalar(obj):
84+
return lib.checknull_old(obj)
85+
86+
from pandas.core.generic import PandasObject
87+
if isinstance(obj, np.ndarray):
88+
return _isnull_ndarraylike_old(obj)
89+
elif isinstance(obj, PandasObject):
90+
# TODO: optimize for DataFrame, etc.
91+
return obj.apply(isnull_old)
92+
elif isinstance(obj, list) or hasattr(obj, '__array__'):
93+
return _isnull_ndarraylike_old(obj)
94+
else:
95+
return obj is None
96+
97+
def use_inf_as_null(flag):
98+
'''
99+
Choose which replacement for numpy.isnan / -numpy.isfinite is used.
100+
101+
Parameters
102+
----------
103+
flag: bool
104+
True means treat None, NaN, INF, -INF as null (old way),
105+
False means None and NaN are null, but INF, -INF are not null
106+
(new way).
107+
108+
Notes
109+
-----
110+
This approach to setting global module values is discussed and
111+
approved here:
112+
113+
* http://stackoverflow.com/questions/4859217/programmatically-creating-variables-in-python/4859312#4859312
114+
'''
115+
if flag == True:
116+
globals()['isnull'] = isnull_old
117+
else:
118+
globals()['isnull'] = isnull_new
119+
68120

69121
def _isnull_ndarraylike(obj):
70122
from pandas import Series
@@ -90,6 +142,30 @@ def _isnull_ndarraylike(obj):
90142
result = -np.isfinite(obj)
91143
return result
92144

145+
def _isnull_ndarraylike_old(obj):
146+
from pandas import Series
147+
values = np.asarray(obj)
148+
149+
if values.dtype.kind in ('O', 'S', 'U'):
150+
# Working around NumPy ticket 1542
151+
shape = values.shape
152+
153+
if values.dtype.kind in ('S', 'U'):
154+
result = np.zeros(values.shape, dtype=bool)
155+
else:
156+
result = np.empty(shape, dtype=bool)
157+
vec = lib.isnullobj_old(values.ravel())
158+
result[:] = vec.reshape(shape)
159+
160+
if isinstance(obj, Series):
161+
result = Series(result, index=obj.index, copy=False)
162+
elif values.dtype == np.dtype('M8[ns]'):
163+
# this is the NaT pattern
164+
result = values.view('i8') == lib.iNaT
165+
else:
166+
result = -np.isfinite(obj)
167+
return result
168+
93169
def notnull(obj):
94170
'''
95171
Replacement for numpy.isfinite / -numpy.isnan which is suitable

pandas/src/tseries.pyx

+41
Original file line numberDiff line numberDiff line change
@@ -189,6 +189,18 @@ cpdef checknull(object val):
189189
else:
190190
return util._checknull(val)
191191

192+
cpdef checknull_old(object val):
193+
if util.is_float_object(val) or util.is_complex_object(val):
194+
return val != val or val == INF or val == NEGINF
195+
elif util.is_datetime64_object(val):
196+
return get_datetime64_value(val) == NPY_NAT
197+
elif isinstance(val, _NaT):
198+
return True
199+
elif is_array(val):
200+
return False
201+
else:
202+
return util._checknull(val)
203+
192204

193205
def isscalar(object val):
194206
return np.isscalar(val) or val is None or isinstance(val, _Timestamp)
@@ -207,6 +219,19 @@ def isnullobj(ndarray[object] arr):
207219
result[i] = util._checknull(arr[i])
208220
return result.view(np.bool_)
209221

222+
@cython.wraparound(False)
223+
@cython.boundscheck(False)
224+
def isnullobj_old(ndarray[object] arr):
225+
cdef Py_ssize_t i, n
226+
cdef object val
227+
cdef ndarray[uint8_t] result
228+
229+
n = len(arr)
230+
result = np.zeros(n, dtype=np.uint8)
231+
for i from 0 <= i < n:
232+
result[i] = util._checknull_old(arr[i])
233+
return result.view(np.bool_)
234+
210235

211236
@cython.wraparound(False)
212237
@cython.boundscheck(False)
@@ -224,6 +249,22 @@ def isnullobj2d(ndarray[object, ndim=2] arr):
224249
result[i, j] = 1
225250
return result.view(np.bool_)
226251

252+
@cython.wraparound(False)
253+
@cython.boundscheck(False)
254+
def isnullobj2d_old(ndarray[object, ndim=2] arr):
255+
cdef Py_ssize_t i, j, n, m
256+
cdef object val
257+
cdef ndarray[uint8_t, ndim=2] result
258+
259+
n, m = (<object> arr).shape
260+
result = np.zeros((n, m), dtype=np.uint8)
261+
for i from 0 <= i < n:
262+
for j from 0 <= j < m:
263+
val = arr[i, j]
264+
if checknull_old(val):
265+
result[i, j] = 1
266+
return result.view(np.bool_)
267+
227268
def list_to_object_array(list obj):
228269
'''
229270
Convert list to object ndarray. Seriously can't believe I had to write this

pandas/src/util.pxd

+6
Original file line numberDiff line numberDiff line change
@@ -68,5 +68,11 @@ cdef inline bint _checknull(object val):
6868
except ValueError:
6969
return False
7070

71+
cdef inline bint _checknull_old(object val):
72+
try:
73+
return bool(val is None or val != val)
74+
except ValueError:
75+
return False
76+
7177
cdef inline bint _checknan(object val):
7278
return not cnp.PyArray_Check(val) and val != val

pandas/tests/test_common.py

+9-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
import unittest
77

88
from pandas import Series, DataFrame, date_range, DatetimeIndex
9-
from pandas.core.common import notnull, isnull
9+
from pandas.core.common import notnull, isnull, use_inf_as_null
1010
import pandas.core.common as com
1111
import pandas.util.testing as tm
1212

@@ -18,9 +18,17 @@ def test_notnull():
1818
assert notnull(1.)
1919
assert not notnull(None)
2020
assert not notnull(np.NaN)
21+
22+
use_inf_as_null(False)
2123
assert notnull(np.inf)
2224
assert notnull(-np.inf)
2325

26+
use_inf_as_null(True)
27+
assert not notnull(np.inf)
28+
assert not notnull(-np.inf)
29+
30+
31+
2432
float_series = Series(np.random.randn(5))
2533
obj_series = Series(np.random.randn(5), dtype=object)
2634
assert(isinstance(notnull(float_series), Series))

0 commit comments

Comments
 (0)