diff --git a/doc/source/release.rst b/doc/source/release.rst index 9504b7165aa4a..c572aa91c18bb 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -283,6 +283,7 @@ See :ref:`Internal Refactoring` in csv_import (:issue:`4322`) - Fix an issue with CacheableOffset not properly being used by many DateOffset; this prevented the DateOffset from being cached (:issue:`4609`) + - Fix boolean comparison with a DataFrame on the lhs, and a list/tuple on the rhs (:issue:`4576`) pandas 0.12 =========== diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 1d47250eb8233..5a5c541fc3251 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -723,29 +723,65 @@ def eval(self, func, other, raise_on_error=True, try_cast=False): # make sure that we can broadcast is_transposed = False if hasattr(other, 'ndim') and hasattr(values, 'ndim'): - if values.ndim != other.ndim or values.shape == other.shape[::-1]: - values = values.T - is_transposed = True + if values.ndim != other.ndim: + is_transposed = True + else: + if values.shape == other.shape[::-1]: + is_transposed = True + elif values.shape[0] == other.shape[-1]: + is_transposed = True + else: + # this is a broadcast error heree + raise ValueError("cannot broadcast shape [%s] with block values [%s]" + % (values.T.shape,other.shape)) + + transf = (lambda x: x.T) if is_transposed else (lambda x: x) + + # coerce/transpose the args if needed + values, other = self._try_coerce_args(transf(values), other) + + # get the result, may need to transpose the other + def get_result(other): + return self._try_coerce_result(func(values, other)) + + # error handler if we have an issue operating with the function + def handle_error(): - values, other = self._try_coerce_args(values, other) - args = [values, other] - try: - result = self._try_coerce_result(func(*args)) - except (Exception) as detail: if raise_on_error: - raise TypeError('Could not operate [%s] with block values [%s]' + raise TypeError('Could not operate %s with block values %s' % (repr(other), str(detail))) else: # return the values result = np.empty(values.shape, dtype='O') result.fill(np.nan) + return result + + # get the result + try: + result = get_result(other) + + # if we have an invalid shape/broadcast error + # GH4576, so raise instead of allowing to pass thru + except (ValueError) as detail: + raise + except (Exception) as detail: + result = handle_error() + # technically a broadcast error in numpy can 'work' by returning a boolean False if not isinstance(result, np.ndarray): - raise TypeError('Could not compare [%s] with block values' - % repr(other)) + if not isinstance(result, np.ndarray): + + # differentiate between an invalid ndarray-ndarray comparsion and + # an invalid type comparison + if isinstance(values, np.ndarray) and is_list_like(other): + raise ValueError('Invalid broadcasting comparison [%s] with block values' + % repr(other)) + + raise TypeError('Could not compare [%s] with block values' + % repr(other)) - if is_transposed: - result = result.T + # transpose if needed + result = transf(result) # try to cast if requested if try_cast: diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index 517c984fa0e64..d2ca850f0a1c0 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -4709,6 +4709,75 @@ def test_float_none_comparison(self): self.assertRaises(TypeError, df.__eq__, None) + def test_boolean_comparison(self): + + # GH 4576 + # boolean comparisons with a tuple/list give unexpected results + df = DataFrame(np.arange(6).reshape((3,2))) + b = np.array([2, 2]) + b_r = np.atleast_2d([2,2]) + b_c = b_r.T + l = (2,2,2) + tup = tuple(l) + + # gt + expected = DataFrame([[False,False],[False,True],[True,True]]) + result = df>b + assert_frame_equal(result,expected) + + result = df.values>b + assert_array_equal(result,expected.values) + + result = df>l + assert_frame_equal(result,expected) + + result = df>tup + assert_frame_equal(result,expected) + + result = df>b_r + assert_frame_equal(result,expected) + + result = df.values>b_r + assert_array_equal(result,expected.values) + + self.assertRaises(ValueError, df.__gt__, b_c) + self.assertRaises(ValueError, df.values.__gt__, b_c) + + # == + expected = DataFrame([[False,False],[True,False],[False,False]]) + result = df == b + assert_frame_equal(result,expected) + + result = df==l + assert_frame_equal(result,expected) + + result = df==tup + assert_frame_equal(result,expected) + + result = df == b_r + assert_frame_equal(result,expected) + + result = df.values == b_r + assert_array_equal(result,expected.values) + + self.assertRaises(ValueError, lambda : df == b_c) + self.assert_((df.values == b_c) is False) + + # with alignment + df = DataFrame(np.arange(6).reshape((3,2)),columns=list('AB'),index=list('abc')) + expected.index=df.index + expected.columns=df.columns + + result = df==l + assert_frame_equal(result,expected) + + result = df==tup + assert_frame_equal(result,expected) + + # not shape compatible + self.assertRaises(ValueError, lambda : df == (2,2)) + self.assertRaises(ValueError, lambda : df == [2,2]) + def test_to_csv_deprecated_options(self): pname = '__tmp_to_csv_deprecated_options__'