Skip to content

Commit 4fe7c68

Browse files
committed
ERR: Boolean comparisons of a Series vs None will now be equivalent of to null comparisions, rather than raise TypeError, xref, pandas-dev#1079
1 parent 5b97367 commit 4fe7c68

File tree

7 files changed

+169
-103
lines changed

7 files changed

+169
-103
lines changed

doc/source/whatsnew/v0.17.0.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -231,7 +231,7 @@ Other API Changes
231231
- Enable serialization of lists and dicts to strings in ExcelWriter (:issue:`8188`)
232232
- Allow passing `kwargs` to the interpolation methods (:issue:`10378`).
233233
- Serialize metadata properties of subclasses of pandas objects (:issue:`10553`).
234-
234+
- Boolean comparisons of a ``Series`` vs None will now be equivalent to comparing with np.nan, rather than raise ``TypeError``, xref (:issue:`1079`).
235235

236236
.. _whatsnew_0170.deprecations:
237237

pandas/core/ops.py

+48-23
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,11 @@
1313
from pandas.util.decorators import Appender
1414
import pandas.core.common as com
1515
import pandas.computation.expressions as expressions
16+
from pandas.lib import isscalar
17+
from pandas.tslib import iNaT
1618
from pandas.core.common import(bind_method, is_list_like, notnull, isnull,
17-
_values_from_object, _maybe_match_name)
19+
_values_from_object, _maybe_match_name,
20+
needs_i8_conversion, is_integer_dtype)
1821

1922
# -----------------------------------------------------------------------------
2023
# Functions that add arithmetic methods to objects, given arithmetic factory
@@ -257,7 +260,7 @@ class _TimeOp(object):
257260
Generally, you should use classmethod ``maybe_convert_for_time_op`` as an
258261
entry point.
259262
"""
260-
fill_value = tslib.iNaT
263+
fill_value = iNaT
261264
wrap_results = staticmethod(lambda x: x)
262265
dtype = None
263266

@@ -346,7 +349,7 @@ def _convert_to_array(self, values, name=None, other=None):
346349
if (other is not None and other.dtype == 'timedelta64[ns]' and
347350
all(isnull(v) for v in values)):
348351
values = np.empty(values.shape, dtype=other.dtype)
349-
values[:] = tslib.iNaT
352+
values[:] = iNaT
350353

351354
# a datelike
352355
elif isinstance(values, pd.DatetimeIndex):
@@ -381,7 +384,7 @@ def _convert_to_array(self, values, name=None, other=None):
381384
# all nan, so ok, use the other dtype (e.g. timedelta or datetime)
382385
if isnull(values).all():
383386
values = np.empty(values.shape, dtype=other.dtype)
384-
values[:] = tslib.iNaT
387+
values[:] = iNaT
385388
else:
386389
raise TypeError(
387390
'incompatible type [{0}] for a datetime/timedelta '
@@ -549,26 +552,59 @@ def na_op(x, y):
549552
elif com.is_categorical_dtype(y) and not lib.isscalar(y):
550553
return op(y,x)
551554

552-
if x.dtype == np.object_:
555+
if com.is_object_dtype(x.dtype):
553556
if isinstance(y, list):
554557
y = lib.list_to_object_array(y)
555558

556559
if isinstance(y, (np.ndarray, pd.Series)):
557-
if y.dtype != np.object_:
560+
if not com.is_object_dtype(y.dtype):
558561
result = lib.vec_compare(x, y.astype(np.object_), op)
559562
else:
560563
result = lib.vec_compare(x, y, op)
561564
else:
562565
result = lib.scalar_compare(x, y, op)
563566
else:
564567

568+
# numpy does not like comparisons vs None
569+
if lib.isscalar(y) and isnull(y):
570+
y = np.nan
571+
572+
# we want to compare like types
573+
# we only want to convert to integer like if
574+
# we are not NotImplemented, otherwise
575+
# we would allow datetime64 (but viewed as i8) against
576+
# integer comparisons
577+
if needs_i8_conversion(x) and (not isscalar(y) and is_integer_dtype(y)):
578+
raise TypeError("invalid type comparison")
579+
elif (not isscalar(y) and needs_i8_conversion(y)) and is_integer_dtype(x):
580+
raise TypeError("invalid type comparison")
581+
582+
# we have a datetime/timedelta and may need to convert
583+
mask = None
584+
if needs_i8_conversion(x) or (not isscalar(y) and needs_i8_conversion(y)):
585+
586+
if isscalar(y):
587+
y = _index.convert_scalar(x,_values_from_object(y))
588+
else:
589+
y = y.view('i8')
590+
591+
if name == '__ne__':
592+
mask = notnull(x)
593+
else:
594+
mask = isnull(x)
595+
596+
x = x.view('i8')
597+
565598
try:
566599
result = getattr(x, name)(y)
567600
if result is NotImplemented:
568601
raise TypeError("invalid type comparison")
569-
except (AttributeError):
602+
except AttributeError:
570603
result = op(x, y)
571604

605+
if mask is not None and mask.any():
606+
result[mask] = False
607+
572608
return result
573609

574610
def wrapper(self, other, axis=None):
@@ -596,23 +632,18 @@ def wrapper(self, other, axis=None):
596632
raise TypeError(msg.format(op=op,typ=self.dtype))
597633

598634

599-
mask = isnull(self)
600-
601635
if com.is_categorical_dtype(self):
602636
# cats are a special case as get_values() would return an ndarray, which would then
603637
# not take categories ordering into account
604638
# we can go directly to op, as the na_op would just test again and dispatch to it.
605639
res = op(self.values, other)
606640
else:
607641
values = self.get_values()
608-
other = _index.convert_scalar(values,_values_from_object(other))
609-
610-
if issubclass(values.dtype.type, (np.datetime64, np.timedelta64)):
611-
values = values.view('i8')
642+
if is_list_like(other):
643+
other = np.asarray(other)
612644

613-
# scalars
614645
res = na_op(values, other)
615-
if np.isscalar(res):
646+
if lib.isscalar(res):
616647
raise TypeError('Could not compare %s type with Series'
617648
% type(other))
618649

@@ -621,11 +652,6 @@ def wrapper(self, other, axis=None):
621652

622653
res = pd.Series(res, index=self.index, name=self.name,
623654
dtype='bool')
624-
625-
# mask out the invalids
626-
if mask.any():
627-
res[mask] = masker
628-
629655
return res
630656
return wrapper
631657

@@ -643,8 +669,7 @@ def na_op(x, y):
643669
y = lib.list_to_object_array(y)
644670

645671
if isinstance(y, (np.ndarray, pd.Series)):
646-
if (x.dtype == np.bool_ and
647-
y.dtype == np.bool_): # pragma: no cover
672+
if (com.is_bool_dtype(x.dtype) and com.is_bool_dtype(y.dtype)):
648673
result = op(x, y) # when would this be hit?
649674
else:
650675
x = com._ensure_object(x)
@@ -1046,7 +1071,7 @@ def na_op(x, y):
10461071

10471072
# work only for scalars
10481073
def f(self, other):
1049-
if not np.isscalar(other):
1074+
if not lib.isscalar(other):
10501075
raise ValueError('Simple arithmetic with %s can only be '
10511076
'done with scalar values' %
10521077
self._constructor.__name__)

pandas/lib.pyx

+9-1
Original file line numberDiff line numberDiff line change
@@ -378,7 +378,7 @@ def isnullobj2d_old(ndarray[object, ndim=2] arr):
378378
@cython.boundscheck(False)
379379
cpdef ndarray[object] list_to_object_array(list obj):
380380
'''
381-
Convert list to object ndarray. Seriously can't believe I had to write this
381+
Convert list to object ndarray. Seriously can\'t believe I had to write this
382382
function
383383
'''
384384
cdef:
@@ -682,6 +682,7 @@ def scalar_compare(ndarray[object] values, object val, object op):
682682
cdef:
683683
Py_ssize_t i, n = len(values)
684684
ndarray[uint8_t, cast=True] result
685+
bint isnull_val
685686
int flag
686687
object x
687688

@@ -701,12 +702,15 @@ def scalar_compare(ndarray[object] values, object val, object op):
701702
raise ValueError('Unrecognized operator')
702703

703704
result = np.empty(n, dtype=bool).view(np.uint8)
705+
isnull_val = _checknull(val)
704706

705707
if flag == cpython.Py_NE:
706708
for i in range(n):
707709
x = values[i]
708710
if _checknull(x):
709711
result[i] = True
712+
elif isnull_val:
713+
result[i] = True
710714
else:
711715
try:
712716
result[i] = cpython.PyObject_RichCompareBool(x, val, flag)
@@ -717,6 +721,8 @@ def scalar_compare(ndarray[object] values, object val, object op):
717721
x = values[i]
718722
if _checknull(x):
719723
result[i] = False
724+
elif isnull_val:
725+
result[i] = False
720726
else:
721727
try:
722728
result[i] = cpython.PyObject_RichCompareBool(x, val, flag)
@@ -728,6 +734,8 @@ def scalar_compare(ndarray[object] values, object val, object op):
728734
x = values[i]
729735
if _checknull(x):
730736
result[i] = False
737+
elif isnull_val:
738+
result[i] = False
731739
else:
732740
result[i] = cpython.PyObject_RichCompareBool(x, val, flag)
733741

pandas/tests/test_base.py

+41-1
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
from pandas.tseries.common import is_datetimelike
1313
from pandas import Series, Index, Int64Index, DatetimeIndex, TimedeltaIndex, PeriodIndex, Timedelta
1414
import pandas.tslib as tslib
15+
from pandas import _np_version_under1p9
1516
import nose
1617

1718
import pandas.util.testing as tm
@@ -273,6 +274,45 @@ def setUp(self):
273274
self.is_valid_objs = [ o for o in self.objs if o._allow_index_ops ]
274275
self.not_valid_objs = [ o for o in self.objs if not o._allow_index_ops ]
275276

277+
def test_none_comparison(self):
278+
279+
# bug brought up by #1079
280+
# changed from TypeError in 0.17.0
281+
for o in self.is_valid_objs:
282+
if isinstance(o, Series):
283+
284+
o[0] = np.nan
285+
286+
result = o == None
287+
self.assertFalse(result.iat[0])
288+
self.assertFalse(result.iat[1])
289+
290+
result = o != None
291+
self.assertTrue(result.iat[0])
292+
self.assertTrue(result.iat[1])
293+
294+
result = None == o
295+
self.assertFalse(result.iat[0])
296+
self.assertFalse(result.iat[1])
297+
298+
if _np_version_under1p9:
299+
# fails as this tries not __eq__ which
300+
# is not valid for numpy
301+
pass
302+
else:
303+
result = None != o
304+
self.assertTrue(result.iat[0])
305+
self.assertTrue(result.iat[1])
306+
307+
result = None > o
308+
self.assertFalse(result.iat[0])
309+
self.assertFalse(result.iat[1])
310+
311+
result = o < None
312+
self.assertFalse(result.iat[0])
313+
self.assertFalse(result.iat[1])
314+
315+
276316
def test_ndarray_compat_properties(self):
277317

278318
for o in self.objs:
@@ -513,7 +553,7 @@ def test_value_counts_inferred(self):
513553
expected = Series([4, 3, 2], index=['b', 'a', 'd'])
514554
tm.assert_series_equal(s.value_counts(), expected)
515555

516-
self.assert_numpy_array_equal(s.unique(), np.array(['a', 'b', np.nan, 'd'], dtype='O'))
556+
self.assert_numpy_array_equivalent(s.unique(), np.array(['a', 'b', np.nan, 'd'], dtype='O'))
517557
self.assertEqual(s.nunique(), 3)
518558

519559
s = klass({})

0 commit comments

Comments
 (0)