From fb2bb5880efbd35f8235a9808fc414d1724808ba Mon Sep 17 00:00:00 2001
From: jreback <jeff@reback.net>
Date: Mon, 23 Sep 2013 14:13:23 -0400
Subject: [PATCH 1/2] BUG: Make sure series-series boolean comparions are label
 based (GH4947)

---
 doc/source/release.rst      |  5 ++--
 pandas/tests/test_frame.py  |  8 ++---
 pandas/tests/test_series.py | 58 +++++++++++++++++++++++++++++++++++++
 3 files changed, 64 insertions(+), 7 deletions(-)

diff --git a/doc/source/release.rst b/doc/source/release.rst
index 65e6ca0e1d95c..026791438a905 100644
--- a/doc/source/release.rst
+++ b/doc/source/release.rst
@@ -374,6 +374,8 @@ Bug Fixes
     - appending a 0-len table will work correctly (:issue:`4273`)
     - ``to_hdf`` was raising when passing both arguments ``append`` and ``table`` (:issue:`4584`)
     - reading from a store with duplicate columns across dtypes would raise (:issue:`4767`)
+    - Fixed a bug where ``ValueError`` wasn't correctly raised when column names
+      weren't strings (:issue:`4956`)
   - Fixed bug in tslib.tz_convert(vals, tz1, tz2): it could raise IndexError exception while
     trying to access trans[pos + 1] (:issue:`4496`)
   - The ``by`` argument now works correctly with the ``layout`` argument
@@ -500,8 +502,6 @@ Bug Fixes
   - Fixed a bug with setting invalid or out-of-range values in indexing
     enlargement scenarios (:issue:`4940`)
   - Tests for fillna on empty Series (:issue:`4346`), thanks @immerrr
-  - Fixed a bug where ``ValueError`` wasn't correctly raised when column names
-    weren't strings (:issue:`4956`)
   - Fixed ``copy()`` to shallow copy axes/indices as well and thereby keep
     separate metadata. (:issue:`4202`, :issue:`4830`)
   - Fixed skiprows option in Python parser for read_csv (:issue:`4382`)
@@ -521,6 +521,7 @@ Bug Fixes
   - Fix a bug where reshaping a ``Series`` to its own shape raised ``TypeError`` (:issue:`4554`)
     and other reshaping issues.
   - Bug in setting with ``ix/loc`` and a mixed int/string index (:issue:`4544`)
+  - Make sure series-series boolean comparions are label based (:issue:`4947`)
 
 pandas 0.12.0
 -------------
diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py
index ce8d84840ed69..f05e520130289 100644
--- a/pandas/tests/test_frame.py
+++ b/pandas/tests/test_frame.py
@@ -4523,8 +4523,10 @@ def f():
     def test_logical_with_nas(self):
         d = DataFrame({'a': [np.nan, False], 'b': [True, True]})
 
+        # GH4947
+        # bool comparisons should return bool
         result = d['a'] | d['b']
-        expected = Series([np.nan, True])
+        expected = Series([True, True])
         assert_series_equal(result, expected)
 
         # GH4604, automatic casting here
@@ -4533,10 +4535,6 @@ def test_logical_with_nas(self):
         assert_series_equal(result, expected)
 
         result = d['a'].fillna(False,downcast=False) | d['b']
-        expected = Series([True, True],dtype=object)
-        assert_series_equal(result, expected)
-
-        result = (d['a'].fillna(False,downcast=False) | d['b']).convert_objects()
         expected = Series([True, True])
         assert_series_equal(result, expected)
 
diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py
index a70f2931e36fe..a2be232cb6a0d 100644
--- a/pandas/tests/test_series.py
+++ b/pandas/tests/test_series.py
@@ -2757,6 +2757,64 @@ def test_comparison_different_length(self):
         b = Series([2, 3, 4])
         self.assertRaises(ValueError, a.__eq__, b)
 
+    def test_comparison_label_based(self):
+
+        # GH 4947
+        # comparisons should be label based
+
+        a = Series([True, False, True], list('bca'))
+        b = Series([False, True, False], list('abc'))
+
+        expected = Series([True, False, False], list('bca'))
+        result = a & b
+        assert_series_equal(result,expected)
+
+        expected = Series([True, False, True], list('bca'))
+        result = a | b
+        assert_series_equal(result,expected)
+
+        expected = Series([False, False, True], list('bca'))
+        result = a ^ b
+        assert_series_equal(result,expected)
+
+        # rhs is bigger
+        a = Series([True, False, True], list('bca'))
+        b = Series([False, True, False, True], list('abcd'))
+
+        expected = Series([True, False, False], list('bca'))
+        result = a & b
+        assert_series_equal(result,expected)
+
+        expected = Series([True, False, True], list('bca'))
+        result = a | b
+        assert_series_equal(result,expected)
+
+        # filling
+
+        # vs empty
+        result = a & Series([])
+        expected = Series([False, False, False], list('bca'))
+        assert_series_equal(result,expected)
+
+        result = a | Series([])
+        expected = Series([True, True, True], list('bca'))
+        assert_series_equal(result,expected)
+
+        # vs non-matching
+        result = a & Series([1],['z'])
+        expected = Series([False, False, False], list('bca'))
+        assert_series_equal(result,expected)
+
+        result = a | Series([1],['z'])
+        expected = Series([True, True, True], list('bca'))
+        assert_series_equal(result,expected)
+
+        # identity
+        # we would like s[s|e] == s to hold for any e, whether empty or not
+        for e in [Series([]),Series([1],['z']),Series(['z']),Series(np.nan,b.index),Series(np.nan,a.index)]:
+            result = a[a | e]
+            assert_series_equal(result,a)
+
     def test_between(self):
         s = Series(bdate_range('1/1/2000', periods=20).asobject)
         s[::2] = np.nan

From 0de04598a0b6cd2bae447b8d0c920c5588c77baf Mon Sep 17 00:00:00 2001
From: jreback <jeff@reback.net>
Date: Fri, 27 Sep 2013 11:43:58 -0400
Subject: [PATCH 2/2] ENH: Series lhs, scalar rhs bool comparison support

---
 pandas/core/ops.py          | 16 ++++++++++++---
 pandas/core/series.py       |  5 +++--
 pandas/lib.pyx              |  3 +++
 pandas/tests/test_frame.py  |  2 +-
 pandas/tests/test_series.py | 41 ++++++++++++++++++++++++++++++++-----
 5 files changed, 56 insertions(+), 11 deletions(-)

diff --git a/pandas/core/ops.py b/pandas/core/ops.py
index 4ce2143fdd92c..c1c6e6e2f83d3 100644
--- a/pandas/core/ops.py
+++ b/pandas/core/ops.py
@@ -564,21 +564,31 @@ def na_op(x, y):
                     y = com._ensure_object(y)
                     result = lib.vec_binop(x, y, op)
             else:
-                result = lib.scalar_binop(x, y, op)
+                try:
+
+                    # let null fall thru
+                    if not isnull(y):
+                        y = bool(y)
+                    result = lib.scalar_binop(x, y, op)
+                except:
+                    raise TypeError("cannot compare a dtyped [{0}] array with "
+                                    "a scalar of type [{1}]".format(x.dtype,type(y).__name__))
 
         return result
 
     def wrapper(self, other):
         if isinstance(other, pd.Series):
             name = _maybe_match_name(self, other)
+
+            other = other.reindex_like(self).fillna(False).astype(bool)
             return self._constructor(na_op(self.values, other.values),
-                                     index=self.index, name=name)
+                                     index=self.index, name=name).fillna(False).astype(bool)
         elif isinstance(other, pd.DataFrame):
             return NotImplemented
         else:
             # scalars
             return self._constructor(na_op(self.values, other),
-                                     index=self.index, name=self.name)
+                                     index=self.index, name=self.name).fillna(False).astype(bool)
     return wrapper
 
 
diff --git a/pandas/core/series.py b/pandas/core/series.py
index 1bc35008cc341..79faad93ff1c1 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -21,7 +21,8 @@
                                 _values_from_object,
                                 _possibly_cast_to_datetime, _possibly_castable,
                                 _possibly_convert_platform,
-                                ABCSparseArray, _maybe_match_name)
+                                ABCSparseArray, _maybe_match_name, _ensure_object)
+
 from pandas.core.index import (Index, MultiIndex, InvalidIndexError,
                                _ensure_index, _handle_legacy_indexes)
 from pandas.core.indexing import (
@@ -1170,7 +1171,7 @@ def duplicated(self, take_last=False):
         -------
         duplicated : Series
         """
-        keys = com._ensure_object(self.values)
+        keys = _ensure_object(self.values)
         duplicated = lib.duplicated(keys, take_last=take_last)
         return self._constructor(duplicated, index=self.index, name=self.name)
 
diff --git a/pandas/lib.pyx b/pandas/lib.pyx
index f5205ae0c3133..56ef9a4fcb160 100644
--- a/pandas/lib.pyx
+++ b/pandas/lib.pyx
@@ -672,6 +672,9 @@ def scalar_binop(ndarray[object] values, object val, object op):
         object x
 
     result = np.empty(n, dtype=object)
+    if util._checknull(val):
+        result.fill(val)
+        return result
 
     for i in range(n):
         x = values[i]
diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py
index f05e520130289..e8d9f3a7fc7cc 100644
--- a/pandas/tests/test_frame.py
+++ b/pandas/tests/test_frame.py
@@ -4526,7 +4526,7 @@ def test_logical_with_nas(self):
         # GH4947
         # bool comparisons should return bool
         result = d['a'] | d['b']
-        expected = Series([True, True])
+        expected = Series([False, True])
         assert_series_equal(result, expected)
 
         # GH4604, automatic casting here
diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py
index a2be232cb6a0d..7f3ea130259dc 100644
--- a/pandas/tests/test_series.py
+++ b/pandas/tests/test_series.py
@@ -2797,7 +2797,7 @@ def test_comparison_label_based(self):
         assert_series_equal(result,expected)
 
         result = a | Series([])
-        expected = Series([True, True, True], list('bca'))
+        expected = Series([True, False, True], list('bca'))
         assert_series_equal(result,expected)
 
         # vs non-matching
@@ -2806,14 +2806,43 @@ def test_comparison_label_based(self):
         assert_series_equal(result,expected)
 
         result = a | Series([1],['z'])
-        expected = Series([True, True, True], list('bca'))
+        expected = Series([True, False, True], list('bca'))
         assert_series_equal(result,expected)
 
         # identity
         # we would like s[s|e] == s to hold for any e, whether empty or not
         for e in [Series([]),Series([1],['z']),Series(['z']),Series(np.nan,b.index),Series(np.nan,a.index)]:
             result = a[a | e]
-            assert_series_equal(result,a)
+            assert_series_equal(result,a[a])
+
+        # vs scalars
+        index = list('bca')
+        t = Series([True,False,True])
+
+        for v in [True,1,2]:
+            result = Series([True,False,True],index=index) | v
+            expected = Series([True,True,True],index=index)
+            assert_series_equal(result,expected)
+
+        for v in [np.nan,'foo']:
+            self.assertRaises(TypeError, lambda : t | v)
+
+        for v in [False,0]:
+            result = Series([True,False,True],index=index) | v
+            expected = Series([True,False,True],index=index)
+            assert_series_equal(result,expected)
+
+        for v in [True,1]:
+            result = Series([True,False,True],index=index) & v
+            expected = Series([True,False,True],index=index)
+            assert_series_equal(result,expected)
+
+        for v in [False,0]:
+            result = Series([True,False,True],index=index) & v
+            expected = Series([False,False,False],index=index)
+            assert_series_equal(result,expected)
+        for v in [np.nan]:
+            self.assertRaises(TypeError, lambda : t & v)
 
     def test_between(self):
         s = Series(bdate_range('1/1/2000', periods=20).asobject)
@@ -2851,12 +2880,14 @@ def test_scalar_na_cmp_corners(self):
         def tester(a, b):
             return a & b
 
-        self.assertRaises(ValueError, tester, s, datetime(2005, 1, 1))
+        self.assertRaises(TypeError, tester, s, datetime(2005, 1, 1))
 
         s = Series([2, 3, 4, 5, 6, 7, 8, 9, datetime(2005, 1, 1)])
         s[::2] = np.nan
 
-        assert_series_equal(tester(s, list(s)), s)
+        expected = Series(True,index=s.index)
+        expected[::2] = False
+        assert_series_equal(tester(s, list(s)), expected)
 
         d = DataFrame({'A': s})
         # TODO: Fix this exception - needs to be fixed! (see GH5035)