Skip to content

Commit a653af4

Browse files
committed
Merge pull request #4953 from jreback/bool_intersect
BUG: Make sure series-series boolean comparions are label based (GH4947)
2 parents 6f96d7d + 0de0459 commit a653af4

File tree

6 files changed

+116
-14
lines changed

6 files changed

+116
-14
lines changed

doc/source/release.rst

+3-2
Original file line numberDiff line numberDiff line change
@@ -374,6 +374,8 @@ Bug Fixes
374374
- appending a 0-len table will work correctly (:issue:`4273`)
375375
- ``to_hdf`` was raising when passing both arguments ``append`` and ``table`` (:issue:`4584`)
376376
- reading from a store with duplicate columns across dtypes would raise (:issue:`4767`)
377+
- Fixed a bug where ``ValueError`` wasn't correctly raised when column names
378+
weren't strings (:issue:`4956`)
377379
- Fixed bug in tslib.tz_convert(vals, tz1, tz2): it could raise IndexError exception while
378380
trying to access trans[pos + 1] (:issue:`4496`)
379381
- The ``by`` argument now works correctly with the ``layout`` argument
@@ -500,8 +502,6 @@ Bug Fixes
500502
- Fixed a bug with setting invalid or out-of-range values in indexing
501503
enlargement scenarios (:issue:`4940`)
502504
- Tests for fillna on empty Series (:issue:`4346`), thanks @immerrr
503-
- Fixed a bug where ``ValueError`` wasn't correctly raised when column names
504-
weren't strings (:issue:`4956`)
505505
- Fixed ``copy()`` to shallow copy axes/indices as well and thereby keep
506506
separate metadata. (:issue:`4202`, :issue:`4830`)
507507
- Fixed skiprows option in Python parser for read_csv (:issue:`4382`)
@@ -521,6 +521,7 @@ Bug Fixes
521521
- Fix a bug where reshaping a ``Series`` to its own shape raised ``TypeError`` (:issue:`4554`)
522522
and other reshaping issues.
523523
- Bug in setting with ``ix/loc`` and a mixed int/string index (:issue:`4544`)
524+
- Make sure series-series boolean comparions are label based (:issue:`4947`)
524525

525526
pandas 0.12.0
526527
-------------

pandas/core/ops.py

+13-3
Original file line numberDiff line numberDiff line change
@@ -564,21 +564,31 @@ def na_op(x, y):
564564
y = com._ensure_object(y)
565565
result = lib.vec_binop(x, y, op)
566566
else:
567-
result = lib.scalar_binop(x, y, op)
567+
try:
568+
569+
# let null fall thru
570+
if not isnull(y):
571+
y = bool(y)
572+
result = lib.scalar_binop(x, y, op)
573+
except:
574+
raise TypeError("cannot compare a dtyped [{0}] array with "
575+
"a scalar of type [{1}]".format(x.dtype,type(y).__name__))
568576

569577
return result
570578

571579
def wrapper(self, other):
572580
if isinstance(other, pd.Series):
573581
name = _maybe_match_name(self, other)
582+
583+
other = other.reindex_like(self).fillna(False).astype(bool)
574584
return self._constructor(na_op(self.values, other.values),
575-
index=self.index, name=name)
585+
index=self.index, name=name).fillna(False).astype(bool)
576586
elif isinstance(other, pd.DataFrame):
577587
return NotImplemented
578588
else:
579589
# scalars
580590
return self._constructor(na_op(self.values, other),
581-
index=self.index, name=self.name)
591+
index=self.index, name=self.name).fillna(False).astype(bool)
582592
return wrapper
583593

584594

pandas/core/series.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,8 @@
2121
_values_from_object,
2222
_possibly_cast_to_datetime, _possibly_castable,
2323
_possibly_convert_platform,
24-
ABCSparseArray, _maybe_match_name)
24+
ABCSparseArray, _maybe_match_name, _ensure_object)
25+
2526
from pandas.core.index import (Index, MultiIndex, InvalidIndexError,
2627
_ensure_index, _handle_legacy_indexes)
2728
from pandas.core.indexing import (
@@ -1170,7 +1171,7 @@ def duplicated(self, take_last=False):
11701171
-------
11711172
duplicated : Series
11721173
"""
1173-
keys = com._ensure_object(self.values)
1174+
keys = _ensure_object(self.values)
11741175
duplicated = lib.duplicated(keys, take_last=take_last)
11751176
return self._constructor(duplicated, index=self.index, name=self.name)
11761177

pandas/lib.pyx

+3
Original file line numberDiff line numberDiff line change
@@ -672,6 +672,9 @@ def scalar_binop(ndarray[object] values, object val, object op):
672672
object x
673673

674674
result = np.empty(n, dtype=object)
675+
if util._checknull(val):
676+
result.fill(val)
677+
return result
675678

676679
for i in range(n):
677680
x = values[i]

pandas/tests/test_frame.py

+3-5
Original file line numberDiff line numberDiff line change
@@ -4523,8 +4523,10 @@ def f():
45234523
def test_logical_with_nas(self):
45244524
d = DataFrame({'a': [np.nan, False], 'b': [True, True]})
45254525

4526+
# GH4947
4527+
# bool comparisons should return bool
45264528
result = d['a'] | d['b']
4527-
expected = Series([np.nan, True])
4529+
expected = Series([False, True])
45284530
assert_series_equal(result, expected)
45294531

45304532
# GH4604, automatic casting here
@@ -4533,10 +4535,6 @@ def test_logical_with_nas(self):
45334535
assert_series_equal(result, expected)
45344536

45354537
result = d['a'].fillna(False,downcast=False) | d['b']
4536-
expected = Series([True, True],dtype=object)
4537-
assert_series_equal(result, expected)
4538-
4539-
result = (d['a'].fillna(False,downcast=False) | d['b']).convert_objects()
45404538
expected = Series([True, True])
45414539
assert_series_equal(result, expected)
45424540

pandas/tests/test_series.py

+91-2
Original file line numberDiff line numberDiff line change
@@ -2757,6 +2757,93 @@ def test_comparison_different_length(self):
27572757
b = Series([2, 3, 4])
27582758
self.assertRaises(ValueError, a.__eq__, b)
27592759

2760+
def test_comparison_label_based(self):
2761+
2762+
# GH 4947
2763+
# comparisons should be label based
2764+
2765+
a = Series([True, False, True], list('bca'))
2766+
b = Series([False, True, False], list('abc'))
2767+
2768+
expected = Series([True, False, False], list('bca'))
2769+
result = a & b
2770+
assert_series_equal(result,expected)
2771+
2772+
expected = Series([True, False, True], list('bca'))
2773+
result = a | b
2774+
assert_series_equal(result,expected)
2775+
2776+
expected = Series([False, False, True], list('bca'))
2777+
result = a ^ b
2778+
assert_series_equal(result,expected)
2779+
2780+
# rhs is bigger
2781+
a = Series([True, False, True], list('bca'))
2782+
b = Series([False, True, False, True], list('abcd'))
2783+
2784+
expected = Series([True, False, False], list('bca'))
2785+
result = a & b
2786+
assert_series_equal(result,expected)
2787+
2788+
expected = Series([True, False, True], list('bca'))
2789+
result = a | b
2790+
assert_series_equal(result,expected)
2791+
2792+
# filling
2793+
2794+
# vs empty
2795+
result = a & Series([])
2796+
expected = Series([False, False, False], list('bca'))
2797+
assert_series_equal(result,expected)
2798+
2799+
result = a | Series([])
2800+
expected = Series([True, False, True], list('bca'))
2801+
assert_series_equal(result,expected)
2802+
2803+
# vs non-matching
2804+
result = a & Series([1],['z'])
2805+
expected = Series([False, False, False], list('bca'))
2806+
assert_series_equal(result,expected)
2807+
2808+
result = a | Series([1],['z'])
2809+
expected = Series([True, False, True], list('bca'))
2810+
assert_series_equal(result,expected)
2811+
2812+
# identity
2813+
# we would like s[s|e] == s to hold for any e, whether empty or not
2814+
for e in [Series([]),Series([1],['z']),Series(['z']),Series(np.nan,b.index),Series(np.nan,a.index)]:
2815+
result = a[a | e]
2816+
assert_series_equal(result,a[a])
2817+
2818+
# vs scalars
2819+
index = list('bca')
2820+
t = Series([True,False,True])
2821+
2822+
for v in [True,1,2]:
2823+
result = Series([True,False,True],index=index) | v
2824+
expected = Series([True,True,True],index=index)
2825+
assert_series_equal(result,expected)
2826+
2827+
for v in [np.nan,'foo']:
2828+
self.assertRaises(TypeError, lambda : t | v)
2829+
2830+
for v in [False,0]:
2831+
result = Series([True,False,True],index=index) | v
2832+
expected = Series([True,False,True],index=index)
2833+
assert_series_equal(result,expected)
2834+
2835+
for v in [True,1]:
2836+
result = Series([True,False,True],index=index) & v
2837+
expected = Series([True,False,True],index=index)
2838+
assert_series_equal(result,expected)
2839+
2840+
for v in [False,0]:
2841+
result = Series([True,False,True],index=index) & v
2842+
expected = Series([False,False,False],index=index)
2843+
assert_series_equal(result,expected)
2844+
for v in [np.nan]:
2845+
self.assertRaises(TypeError, lambda : t & v)
2846+
27602847
def test_between(self):
27612848
s = Series(bdate_range('1/1/2000', periods=20).asobject)
27622849
s[::2] = np.nan
@@ -2793,12 +2880,14 @@ def test_scalar_na_cmp_corners(self):
27932880
def tester(a, b):
27942881
return a & b
27952882

2796-
self.assertRaises(ValueError, tester, s, datetime(2005, 1, 1))
2883+
self.assertRaises(TypeError, tester, s, datetime(2005, 1, 1))
27972884

27982885
s = Series([2, 3, 4, 5, 6, 7, 8, 9, datetime(2005, 1, 1)])
27992886
s[::2] = np.nan
28002887

2801-
assert_series_equal(tester(s, list(s)), s)
2888+
expected = Series(True,index=s.index)
2889+
expected[::2] = False
2890+
assert_series_equal(tester(s, list(s)), expected)
28022891

28032892
d = DataFrame({'A': s})
28042893
# TODO: Fix this exception - needs to be fixed! (see GH5035)

0 commit comments

Comments
 (0)