pandas-dev · jreback · Oct 1, 2013 · Sep 23, 2013 · Sep 27, 2013 · hayd
diff --git a/doc/source/release.rst b/doc/source/release.rst
@@ -374,6 +374,8 @@ Bug Fixes
     - appending a 0-len table will work correctly (:issue:`4273`)
     - ``to_hdf`` was raising when passing both arguments ``append`` and ``table`` (:issue:`4584`)
     - reading from a store with duplicate columns across dtypes would raise (:issue:`4767`)
+    - Fixed a bug where ``ValueError`` wasn't correctly raised when column names
+      weren't strings (:issue:`4956`)
   - Fixed bug in tslib.tz_convert(vals, tz1, tz2): it could raise IndexError exception while
     trying to access trans[pos + 1] (:issue:`4496`)
   - The ``by`` argument now works correctly with the ``layout`` argument
@@ -500,8 +502,6 @@ Bug Fixes
   - Fixed a bug with setting invalid or out-of-range values in indexing
     enlargement scenarios (:issue:`4940`)
   - Tests for fillna on empty Series (:issue:`4346`), thanks @immerrr
-  - Fixed a bug where ``ValueError`` wasn't correctly raised when column names
-    weren't strings (:issue:`4956`)
   - Fixed ``copy()`` to shallow copy axes/indices as well and thereby keep
     separate metadata. (:issue:`4202`, :issue:`4830`)
   - Fixed skiprows option in Python parser for read_csv (:issue:`4382`)
@@ -521,6 +521,7 @@ Bug Fixes
   - Fix a bug where reshaping a ``Series`` to its own shape raised ``TypeError`` (:issue:`4554`)
     and other reshaping issues.
   - Bug in setting with ``ix/loc`` and a mixed int/string index (:issue:`4544`)
+  - Make sure series-series boolean comparions are label based (:issue:`4947`)
 
 pandas 0.12.0
 -------------

diff --git a/pandas/core/ops.py b/pandas/core/ops.py
@@ -564,21 +564,31 @@ def na_op(x, y):
                     y = com._ensure_object(y)
                     result = lib.vec_binop(x, y, op)
             else:
-                result = lib.scalar_binop(x, y, op)
+                try:
+
+                    # let null fall thru
+                    if not isnull(y):
+                        y = bool(y)
+                    result = lib.scalar_binop(x, y, op)
+                except:
+                    raise TypeError("cannot compare a dtyped [{0}] array with "
+                                    "a scalar of type [{1}]".format(x.dtype,type(y).__name__))
 
         return result
 
     def wrapper(self, other):
         if isinstance(other, pd.Series):
             name = _maybe_match_name(self, other)
+
+            other = other.reindex_like(self).fillna(False).astype(bool)
             return self._constructor(na_op(self.values, other.values),
-                                     index=self.index, name=name)
+                                     index=self.index, name=name).fillna(False).astype(bool)
         elif isinstance(other, pd.DataFrame):
             return NotImplemented
         else:
             # scalars
             return self._constructor(na_op(self.values, other),
-                                     index=self.index, name=self.name)
+                                     index=self.index, name=self.name).fillna(False).astype(bool)
     return wrapper
 
 

diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -21,7 +21,8 @@
                                 _values_from_object,
                                 _possibly_cast_to_datetime, _possibly_castable,
                                 _possibly_convert_platform,
-                                ABCSparseArray, _maybe_match_name)
+                                ABCSparseArray, _maybe_match_name, _ensure_object)
+
 from pandas.core.index import (Index, MultiIndex, InvalidIndexError,
                                _ensure_index, _handle_legacy_indexes)
 from pandas.core.indexing import (
@@ -1170,7 +1171,7 @@ def duplicated(self, take_last=False):
         -------
         duplicated : Series
         """
-        keys = com._ensure_object(self.values)
+        keys = _ensure_object(self.values)
         duplicated = lib.duplicated(keys, take_last=take_last)
         return self._constructor(duplicated, index=self.index, name=self.name)
 

diff --git a/pandas/lib.pyx b/pandas/lib.pyx
@@ -672,6 +672,9 @@ def scalar_binop(ndarray[object] values, object val, object op):
         object x
 
     result = np.empty(n, dtype=object)
+    if util._checknull(val):
+        result.fill(val)
+        return result
 
     for i in range(n):
         x = values[i]

diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py
@@ -4523,8 +4523,10 @@ def f():
     def test_logical_with_nas(self):
         d = DataFrame({'a': [np.nan, False], 'b': [True, True]})
 
+        # GH4947
+        # bool comparisons should return bool
         result = d['a'] | d['b']
-        expected = Series([np.nan, True])
+        expected = Series([False, True])
         assert_series_equal(result, expected)
 
         # GH4604, automatic casting here
@@ -4533,10 +4535,6 @@ def test_logical_with_nas(self):
         assert_series_equal(result, expected)
 
         result = d['a'].fillna(False,downcast=False) | d['b']
-        expected = Series([True, True],dtype=object)
-        assert_series_equal(result, expected)
-
-        result = (d['a'].fillna(False,downcast=False) | d['b']).convert_objects()
         expected = Series([True, True])
         assert_series_equal(result, expected)
 

diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py
@@ -2757,6 +2757,93 @@ def test_comparison_different_length(self):
         b = Series([2, 3, 4])
         self.assertRaises(ValueError, a.__eq__, b)
 
+    def test_comparison_label_based(self):
+
+        # GH 4947
+        # comparisons should be label based
+
+        a = Series([True, False, True], list('bca'))
+        b = Series([False, True, False], list('abc'))
+
+        expected = Series([True, False, False], list('bca'))
+        result = a & b
+        assert_series_equal(result,expected)
+
+        expected = Series([True, False, True], list('bca'))
+        result = a | b
+        assert_series_equal(result,expected)
+
+        expected = Series([False, False, True], list('bca'))
+        result = a ^ b
+        assert_series_equal(result,expected)
+
+        # rhs is bigger
+        a = Series([True, False, True], list('bca'))
+        b = Series([False, True, False, True], list('abcd'))
+
+        expected = Series([True, False, False], list('bca'))
+        result = a & b
+        assert_series_equal(result,expected)
+
+        expected = Series([True, False, True], list('bca'))
+        result = a | b
+        assert_series_equal(result,expected)
+
+        # filling
+
+        # vs empty
+        result = a & Series([])
+        expected = Series([False, False, False], list('bca'))
+        assert_series_equal(result,expected)
+
+        result = a | Series([])
+        expected = Series([True, False, True], list('bca'))
+        assert_series_equal(result,expected)
+
+        # vs non-matching
+        result = a & Series([1],['z'])
+        expected = Series([False, False, False], list('bca'))
+        assert_series_equal(result,expected)
+
+        result = a | Series([1],['z'])
+        expected = Series([True, False, True], list('bca'))
+        assert_series_equal(result,expected)
+
+        # identity
+        # we would like s[s|e] == s to hold for any e, whether empty or not
+        for e in [Series([]),Series([1],['z']),Series(['z']),Series(np.nan,b.index),Series(np.nan,a.index)]:
+            result = a[a | e]
+            assert_series_equal(result,a[a])
+
+        # vs scalars
+        index = list('bca')
+        t = Series([True,False,True])
+
+        for v in [True,1,2]:
+            result = Series([True,False,True],index=index) | v
+            expected = Series([True,True,True],index=index)
+            assert_series_equal(result,expected)
+
+        for v in [np.nan,'foo']:
+            self.assertRaises(TypeError, lambda : t | v)
+
+        for v in [False,0]:
+            result = Series([True,False,True],index=index) | v
+            expected = Series([True,False,True],index=index)
+            assert_series_equal(result,expected)
+
+        for v in [True,1]:
+            result = Series([True,False,True],index=index) & v
+            expected = Series([True,False,True],index=index)
+            assert_series_equal(result,expected)
+
+        for v in [False,0]:
+            result = Series([True,False,True],index=index) & v
+            expected = Series([False,False,False],index=index)
+            assert_series_equal(result,expected)
+        for v in [np.nan]:
+            self.assertRaises(TypeError, lambda : t & v)
+
     def test_between(self):
         s = Series(bdate_range('1/1/2000', periods=20).asobject)
         s[::2] = np.nan
@@ -2793,12 +2880,14 @@ def test_scalar_na_cmp_corners(self):
         def tester(a, b):
             return a & b
 
-        self.assertRaises(ValueError, tester, s, datetime(2005, 1, 1))
+        self.assertRaises(TypeError, tester, s, datetime(2005, 1, 1))
 
         s = Series([2, 3, 4, 5, 6, 7, 8, 9, datetime(2005, 1, 1)])
         s[::2] = np.nan
 
-        assert_series_equal(tester(s, list(s)), s)
+        expected = Series(True,index=s.index)
+        expected[::2] = False
+        assert_series_equal(tester(s, list(s)), expected)
 
         d = DataFrame({'A': s})
         # TODO: Fix this exception - needs to be fixed! (see GH5035)