Skip to content

Commit e25976a

Browse files
committed
Merge pull request #4585 from jreback/boolean_comp
BUG: Fix boolean comparison with a DataFrame on the lhs, and a list/tuple on the rhs GH4576
2 parents 49a21db + 9d1ab40 commit e25976a

File tree

3 files changed

+119
-13
lines changed

3 files changed

+119
-13
lines changed

doc/source/release.rst

+1
Original file line numberDiff line numberDiff line change
@@ -283,6 +283,7 @@ See :ref:`Internal Refactoring<whatsnew_0130.refactoring>`
283283
in csv_import (:issue:`4322`)
284284
- Fix an issue with CacheableOffset not properly being used by many DateOffset; this prevented
285285
the DateOffset from being cached (:issue:`4609`)
286+
- Fix boolean comparison with a DataFrame on the lhs, and a list/tuple on the rhs (:issue:`4576`)
286287

287288
pandas 0.12
288289
===========

pandas/core/internals.py

+49-13
Original file line numberDiff line numberDiff line change
@@ -723,29 +723,65 @@ def eval(self, func, other, raise_on_error=True, try_cast=False):
723723
# make sure that we can broadcast
724724
is_transposed = False
725725
if hasattr(other, 'ndim') and hasattr(values, 'ndim'):
726-
if values.ndim != other.ndim or values.shape == other.shape[::-1]:
727-
values = values.T
728-
is_transposed = True
726+
if values.ndim != other.ndim:
727+
is_transposed = True
728+
else:
729+
if values.shape == other.shape[::-1]:
730+
is_transposed = True
731+
elif values.shape[0] == other.shape[-1]:
732+
is_transposed = True
733+
else:
734+
# this is a broadcast error heree
735+
raise ValueError("cannot broadcast shape [%s] with block values [%s]"
736+
% (values.T.shape,other.shape))
737+
738+
transf = (lambda x: x.T) if is_transposed else (lambda x: x)
739+
740+
# coerce/transpose the args if needed
741+
values, other = self._try_coerce_args(transf(values), other)
742+
743+
# get the result, may need to transpose the other
744+
def get_result(other):
745+
return self._try_coerce_result(func(values, other))
746+
747+
# error handler if we have an issue operating with the function
748+
def handle_error():
729749

730-
values, other = self._try_coerce_args(values, other)
731-
args = [values, other]
732-
try:
733-
result = self._try_coerce_result(func(*args))
734-
except (Exception) as detail:
735750
if raise_on_error:
736-
raise TypeError('Could not operate [%s] with block values [%s]'
751+
raise TypeError('Could not operate %s with block values %s'
737752
% (repr(other), str(detail)))
738753
else:
739754
# return the values
740755
result = np.empty(values.shape, dtype='O')
741756
result.fill(np.nan)
757+
return result
758+
759+
# get the result
760+
try:
761+
result = get_result(other)
762+
763+
# if we have an invalid shape/broadcast error
764+
# GH4576, so raise instead of allowing to pass thru
765+
except (ValueError) as detail:
766+
raise
767+
except (Exception) as detail:
768+
result = handle_error()
742769

770+
# technically a broadcast error in numpy can 'work' by returning a boolean False
743771
if not isinstance(result, np.ndarray):
744-
raise TypeError('Could not compare [%s] with block values'
745-
% repr(other))
772+
if not isinstance(result, np.ndarray):
773+
774+
# differentiate between an invalid ndarray-ndarray comparsion and
775+
# an invalid type comparison
776+
if isinstance(values, np.ndarray) and is_list_like(other):
777+
raise ValueError('Invalid broadcasting comparison [%s] with block values'
778+
% repr(other))
779+
780+
raise TypeError('Could not compare [%s] with block values'
781+
% repr(other))
746782

747-
if is_transposed:
748-
result = result.T
783+
# transpose if needed
784+
result = transf(result)
749785

750786
# try to cast if requested
751787
if try_cast:

pandas/tests/test_frame.py

+69
Original file line numberDiff line numberDiff line change
@@ -4709,6 +4709,75 @@ def test_float_none_comparison(self):
47094709

47104710
self.assertRaises(TypeError, df.__eq__, None)
47114711

4712+
def test_boolean_comparison(self):
4713+
4714+
# GH 4576
4715+
# boolean comparisons with a tuple/list give unexpected results
4716+
df = DataFrame(np.arange(6).reshape((3,2)))
4717+
b = np.array([2, 2])
4718+
b_r = np.atleast_2d([2,2])
4719+
b_c = b_r.T
4720+
l = (2,2,2)
4721+
tup = tuple(l)
4722+
4723+
# gt
4724+
expected = DataFrame([[False,False],[False,True],[True,True]])
4725+
result = df>b
4726+
assert_frame_equal(result,expected)
4727+
4728+
result = df.values>b
4729+
assert_array_equal(result,expected.values)
4730+
4731+
result = df>l
4732+
assert_frame_equal(result,expected)
4733+
4734+
result = df>tup
4735+
assert_frame_equal(result,expected)
4736+
4737+
result = df>b_r
4738+
assert_frame_equal(result,expected)
4739+
4740+
result = df.values>b_r
4741+
assert_array_equal(result,expected.values)
4742+
4743+
self.assertRaises(ValueError, df.__gt__, b_c)
4744+
self.assertRaises(ValueError, df.values.__gt__, b_c)
4745+
4746+
# ==
4747+
expected = DataFrame([[False,False],[True,False],[False,False]])
4748+
result = df == b
4749+
assert_frame_equal(result,expected)
4750+
4751+
result = df==l
4752+
assert_frame_equal(result,expected)
4753+
4754+
result = df==tup
4755+
assert_frame_equal(result,expected)
4756+
4757+
result = df == b_r
4758+
assert_frame_equal(result,expected)
4759+
4760+
result = df.values == b_r
4761+
assert_array_equal(result,expected.values)
4762+
4763+
self.assertRaises(ValueError, lambda : df == b_c)
4764+
self.assert_((df.values == b_c) is False)
4765+
4766+
# with alignment
4767+
df = DataFrame(np.arange(6).reshape((3,2)),columns=list('AB'),index=list('abc'))
4768+
expected.index=df.index
4769+
expected.columns=df.columns
4770+
4771+
result = df==l
4772+
assert_frame_equal(result,expected)
4773+
4774+
result = df==tup
4775+
assert_frame_equal(result,expected)
4776+
4777+
# not shape compatible
4778+
self.assertRaises(ValueError, lambda : df == (2,2))
4779+
self.assertRaises(ValueError, lambda : df == [2,2])
4780+
47124781
def test_to_csv_deprecated_options(self):
47134782

47144783
pname = '__tmp_to_csv_deprecated_options__'

0 commit comments

Comments
 (0)