Skip to content

BUG: Fix boolean comparison with a DataFrame on the lhs, and a list/tuple on the rhs GH4576 #4585

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Aug 26, 2013
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/release.rst
Original file line number Diff line number Diff line change
Expand Up @@ -283,6 +283,7 @@ See :ref:`Internal Refactoring<whatsnew_0130.refactoring>`
in csv_import (:issue:`4322`)
- Fix an issue with CacheableOffset not properly being used by many DateOffset; this prevented
the DateOffset from being cached (:issue:`4609`)
- Fix boolean comparison with a DataFrame on the lhs, and a list/tuple on the rhs (:issue:`4576`)

pandas 0.12
===========
Expand Down
62 changes: 49 additions & 13 deletions pandas/core/internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -723,29 +723,65 @@ def eval(self, func, other, raise_on_error=True, try_cast=False):
# make sure that we can broadcast
is_transposed = False
if hasattr(other, 'ndim') and hasattr(values, 'ndim'):
if values.ndim != other.ndim or values.shape == other.shape[::-1]:
values = values.T
is_transposed = True
if values.ndim != other.ndim:
is_transposed = True
else:
if values.shape == other.shape[::-1]:
is_transposed = True
elif values.shape[0] == other.shape[-1]:
is_transposed = True
else:
# this is a broadcast error heree
raise ValueError("cannot broadcast shape [%s] with block values [%s]"
% (values.T.shape,other.shape))

transf = (lambda x: x.T) if is_transposed else (lambda x: x)

# coerce/transpose the args if needed
values, other = self._try_coerce_args(transf(values), other)

# get the result, may need to transpose the other
def get_result(other):
return self._try_coerce_result(func(values, other))

# error handler if we have an issue operating with the function
def handle_error():

values, other = self._try_coerce_args(values, other)
args = [values, other]
try:
result = self._try_coerce_result(func(*args))
except (Exception) as detail:
if raise_on_error:
raise TypeError('Could not operate [%s] with block values [%s]'
raise TypeError('Could not operate %s with block values %s'
% (repr(other), str(detail)))
else:
# return the values
result = np.empty(values.shape, dtype='O')
result.fill(np.nan)
return result

# get the result
try:
result = get_result(other)

# if we have an invalid shape/broadcast error
# GH4576, so raise instead of allowing to pass thru
except (ValueError) as detail:
raise
except (Exception) as detail:
result = handle_error()

# technically a broadcast error in numpy can 'work' by returning a boolean False
if not isinstance(result, np.ndarray):
raise TypeError('Could not compare [%s] with block values'
% repr(other))
if not isinstance(result, np.ndarray):

# differentiate between an invalid ndarray-ndarray comparsion and
# an invalid type comparison
if isinstance(values, np.ndarray) and is_list_like(other):
raise ValueError('Invalid broadcasting comparison [%s] with block values'
% repr(other))

raise TypeError('Could not compare [%s] with block values'
% repr(other))

if is_transposed:
result = result.T
# transpose if needed
result = transf(result)

# try to cast if requested
if try_cast:
Expand Down
69 changes: 69 additions & 0 deletions pandas/tests/test_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -4709,6 +4709,75 @@ def test_float_none_comparison(self):

self.assertRaises(TypeError, df.__eq__, None)

def test_boolean_comparison(self):

# GH 4576
# boolean comparisons with a tuple/list give unexpected results
df = DataFrame(np.arange(6).reshape((3,2)))
b = np.array([2, 2])
b_r = np.atleast_2d([2,2])
b_c = b_r.T
l = (2,2,2)
tup = tuple(l)

# gt
expected = DataFrame([[False,False],[False,True],[True,True]])
result = df>b
assert_frame_equal(result,expected)

result = df.values>b
assert_array_equal(result,expected.values)

result = df>l
assert_frame_equal(result,expected)

result = df>tup
assert_frame_equal(result,expected)

result = df>b_r
assert_frame_equal(result,expected)

result = df.values>b_r
assert_array_equal(result,expected.values)

self.assertRaises(ValueError, df.__gt__, b_c)
self.assertRaises(ValueError, df.values.__gt__, b_c)

# ==
expected = DataFrame([[False,False],[True,False],[False,False]])
result = df == b
assert_frame_equal(result,expected)

result = df==l
assert_frame_equal(result,expected)

result = df==tup
assert_frame_equal(result,expected)

result = df == b_r
assert_frame_equal(result,expected)

result = df.values == b_r
assert_array_equal(result,expected.values)

self.assertRaises(ValueError, lambda : df == b_c)
self.assert_((df.values == b_c) is False)

# with alignment
df = DataFrame(np.arange(6).reshape((3,2)),columns=list('AB'),index=list('abc'))
expected.index=df.index
expected.columns=df.columns

result = df==l
assert_frame_equal(result,expected)

result = df==tup
assert_frame_equal(result,expected)

# not shape compatible
self.assertRaises(ValueError, lambda : df == (2,2))
self.assertRaises(ValueError, lambda : df == [2,2])

def test_to_csv_deprecated_options(self):

pname = '__tmp_to_csv_deprecated_options__'
Expand Down