From fb2bb5880efbd35f8235a9808fc414d1724808ba Mon Sep 17 00:00:00 2001 From: jreback Date: Mon, 23 Sep 2013 14:13:23 -0400 Subject: [PATCH 1/2] BUG: Make sure series-series boolean comparions are label based (GH4947) --- doc/source/release.rst | 5 ++-- pandas/tests/test_frame.py | 8 ++--- pandas/tests/test_series.py | 58 +++++++++++++++++++++++++++++++++++++ 3 files changed, 64 insertions(+), 7 deletions(-) diff --git a/doc/source/release.rst b/doc/source/release.rst index 65e6ca0e1d95c..026791438a905 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -374,6 +374,8 @@ Bug Fixes - appending a 0-len table will work correctly (:issue:`4273`) - ``to_hdf`` was raising when passing both arguments ``append`` and ``table`` (:issue:`4584`) - reading from a store with duplicate columns across dtypes would raise (:issue:`4767`) + - Fixed a bug where ``ValueError`` wasn't correctly raised when column names + weren't strings (:issue:`4956`) - Fixed bug in tslib.tz_convert(vals, tz1, tz2): it could raise IndexError exception while trying to access trans[pos + 1] (:issue:`4496`) - The ``by`` argument now works correctly with the ``layout`` argument @@ -500,8 +502,6 @@ Bug Fixes - Fixed a bug with setting invalid or out-of-range values in indexing enlargement scenarios (:issue:`4940`) - Tests for fillna on empty Series (:issue:`4346`), thanks @immerrr - - Fixed a bug where ``ValueError`` wasn't correctly raised when column names - weren't strings (:issue:`4956`) - Fixed ``copy()`` to shallow copy axes/indices as well and thereby keep separate metadata. (:issue:`4202`, :issue:`4830`) - Fixed skiprows option in Python parser for read_csv (:issue:`4382`) @@ -521,6 +521,7 @@ Bug Fixes - Fix a bug where reshaping a ``Series`` to its own shape raised ``TypeError`` (:issue:`4554`) and other reshaping issues. - Bug in setting with ``ix/loc`` and a mixed int/string index (:issue:`4544`) + - Make sure series-series boolean comparions are label based (:issue:`4947`) pandas 0.12.0 ------------- diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index ce8d84840ed69..f05e520130289 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -4523,8 +4523,10 @@ def f(): def test_logical_with_nas(self): d = DataFrame({'a': [np.nan, False], 'b': [True, True]}) + # GH4947 + # bool comparisons should return bool result = d['a'] | d['b'] - expected = Series([np.nan, True]) + expected = Series([True, True]) assert_series_equal(result, expected) # GH4604, automatic casting here @@ -4533,10 +4535,6 @@ def test_logical_with_nas(self): assert_series_equal(result, expected) result = d['a'].fillna(False,downcast=False) | d['b'] - expected = Series([True, True],dtype=object) - assert_series_equal(result, expected) - - result = (d['a'].fillna(False,downcast=False) | d['b']).convert_objects() expected = Series([True, True]) assert_series_equal(result, expected) diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py index a70f2931e36fe..a2be232cb6a0d 100644 --- a/pandas/tests/test_series.py +++ b/pandas/tests/test_series.py @@ -2757,6 +2757,64 @@ def test_comparison_different_length(self): b = Series([2, 3, 4]) self.assertRaises(ValueError, a.__eq__, b) + def test_comparison_label_based(self): + + # GH 4947 + # comparisons should be label based + + a = Series([True, False, True], list('bca')) + b = Series([False, True, False], list('abc')) + + expected = Series([True, False, False], list('bca')) + result = a & b + assert_series_equal(result,expected) + + expected = Series([True, False, True], list('bca')) + result = a | b + assert_series_equal(result,expected) + + expected = Series([False, False, True], list('bca')) + result = a ^ b + assert_series_equal(result,expected) + + # rhs is bigger + a = Series([True, False, True], list('bca')) + b = Series([False, True, False, True], list('abcd')) + + expected = Series([True, False, False], list('bca')) + result = a & b + assert_series_equal(result,expected) + + expected = Series([True, False, True], list('bca')) + result = a | b + assert_series_equal(result,expected) + + # filling + + # vs empty + result = a & Series([]) + expected = Series([False, False, False], list('bca')) + assert_series_equal(result,expected) + + result = a | Series([]) + expected = Series([True, True, True], list('bca')) + assert_series_equal(result,expected) + + # vs non-matching + result = a & Series([1],['z']) + expected = Series([False, False, False], list('bca')) + assert_series_equal(result,expected) + + result = a | Series([1],['z']) + expected = Series([True, True, True], list('bca')) + assert_series_equal(result,expected) + + # identity + # we would like s[s|e] == s to hold for any e, whether empty or not + for e in [Series([]),Series([1],['z']),Series(['z']),Series(np.nan,b.index),Series(np.nan,a.index)]: + result = a[a | e] + assert_series_equal(result,a) + def test_between(self): s = Series(bdate_range('1/1/2000', periods=20).asobject) s[::2] = np.nan From 0de04598a0b6cd2bae447b8d0c920c5588c77baf Mon Sep 17 00:00:00 2001 From: jreback Date: Fri, 27 Sep 2013 11:43:58 -0400 Subject: [PATCH 2/2] ENH: Series lhs, scalar rhs bool comparison support --- pandas/core/ops.py | 16 ++++++++++++--- pandas/core/series.py | 5 +++-- pandas/lib.pyx | 3 +++ pandas/tests/test_frame.py | 2 +- pandas/tests/test_series.py | 41 ++++++++++++++++++++++++++++++++----- 5 files changed, 56 insertions(+), 11 deletions(-) diff --git a/pandas/core/ops.py b/pandas/core/ops.py index 4ce2143fdd92c..c1c6e6e2f83d3 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -564,21 +564,31 @@ def na_op(x, y): y = com._ensure_object(y) result = lib.vec_binop(x, y, op) else: - result = lib.scalar_binop(x, y, op) + try: + + # let null fall thru + if not isnull(y): + y = bool(y) + result = lib.scalar_binop(x, y, op) + except: + raise TypeError("cannot compare a dtyped [{0}] array with " + "a scalar of type [{1}]".format(x.dtype,type(y).__name__)) return result def wrapper(self, other): if isinstance(other, pd.Series): name = _maybe_match_name(self, other) + + other = other.reindex_like(self).fillna(False).astype(bool) return self._constructor(na_op(self.values, other.values), - index=self.index, name=name) + index=self.index, name=name).fillna(False).astype(bool) elif isinstance(other, pd.DataFrame): return NotImplemented else: # scalars return self._constructor(na_op(self.values, other), - index=self.index, name=self.name) + index=self.index, name=self.name).fillna(False).astype(bool) return wrapper diff --git a/pandas/core/series.py b/pandas/core/series.py index 1bc35008cc341..79faad93ff1c1 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -21,7 +21,8 @@ _values_from_object, _possibly_cast_to_datetime, _possibly_castable, _possibly_convert_platform, - ABCSparseArray, _maybe_match_name) + ABCSparseArray, _maybe_match_name, _ensure_object) + from pandas.core.index import (Index, MultiIndex, InvalidIndexError, _ensure_index, _handle_legacy_indexes) from pandas.core.indexing import ( @@ -1170,7 +1171,7 @@ def duplicated(self, take_last=False): ------- duplicated : Series """ - keys = com._ensure_object(self.values) + keys = _ensure_object(self.values) duplicated = lib.duplicated(keys, take_last=take_last) return self._constructor(duplicated, index=self.index, name=self.name) diff --git a/pandas/lib.pyx b/pandas/lib.pyx index f5205ae0c3133..56ef9a4fcb160 100644 --- a/pandas/lib.pyx +++ b/pandas/lib.pyx @@ -672,6 +672,9 @@ def scalar_binop(ndarray[object] values, object val, object op): object x result = np.empty(n, dtype=object) + if util._checknull(val): + result.fill(val) + return result for i in range(n): x = values[i] diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index f05e520130289..e8d9f3a7fc7cc 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -4526,7 +4526,7 @@ def test_logical_with_nas(self): # GH4947 # bool comparisons should return bool result = d['a'] | d['b'] - expected = Series([True, True]) + expected = Series([False, True]) assert_series_equal(result, expected) # GH4604, automatic casting here diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py index a2be232cb6a0d..7f3ea130259dc 100644 --- a/pandas/tests/test_series.py +++ b/pandas/tests/test_series.py @@ -2797,7 +2797,7 @@ def test_comparison_label_based(self): assert_series_equal(result,expected) result = a | Series([]) - expected = Series([True, True, True], list('bca')) + expected = Series([True, False, True], list('bca')) assert_series_equal(result,expected) # vs non-matching @@ -2806,14 +2806,43 @@ def test_comparison_label_based(self): assert_series_equal(result,expected) result = a | Series([1],['z']) - expected = Series([True, True, True], list('bca')) + expected = Series([True, False, True], list('bca')) assert_series_equal(result,expected) # identity # we would like s[s|e] == s to hold for any e, whether empty or not for e in [Series([]),Series([1],['z']),Series(['z']),Series(np.nan,b.index),Series(np.nan,a.index)]: result = a[a | e] - assert_series_equal(result,a) + assert_series_equal(result,a[a]) + + # vs scalars + index = list('bca') + t = Series([True,False,True]) + + for v in [True,1,2]: + result = Series([True,False,True],index=index) | v + expected = Series([True,True,True],index=index) + assert_series_equal(result,expected) + + for v in [np.nan,'foo']: + self.assertRaises(TypeError, lambda : t | v) + + for v in [False,0]: + result = Series([True,False,True],index=index) | v + expected = Series([True,False,True],index=index) + assert_series_equal(result,expected) + + for v in [True,1]: + result = Series([True,False,True],index=index) & v + expected = Series([True,False,True],index=index) + assert_series_equal(result,expected) + + for v in [False,0]: + result = Series([True,False,True],index=index) & v + expected = Series([False,False,False],index=index) + assert_series_equal(result,expected) + for v in [np.nan]: + self.assertRaises(TypeError, lambda : t & v) def test_between(self): s = Series(bdate_range('1/1/2000', periods=20).asobject) @@ -2851,12 +2880,14 @@ def test_scalar_na_cmp_corners(self): def tester(a, b): return a & b - self.assertRaises(ValueError, tester, s, datetime(2005, 1, 1)) + self.assertRaises(TypeError, tester, s, datetime(2005, 1, 1)) s = Series([2, 3, 4, 5, 6, 7, 8, 9, datetime(2005, 1, 1)]) s[::2] = np.nan - assert_series_equal(tester(s, list(s)), s) + expected = Series(True,index=s.index) + expected[::2] = False + assert_series_equal(tester(s, list(s)), expected) d = DataFrame({'A': s}) # TODO: Fix this exception - needs to be fixed! (see GH5035)