Skip to content

Commit e96c691

Browse files
jbrockmendelgfyoung
authored andcommitted
Use align_method in comp_method_FRAME (pandas-dev#22880)
Closes pandas-devgh-20090
1 parent 241bde1 commit e96c691

File tree

7 files changed

+123
-175
lines changed

7 files changed

+123
-175
lines changed

doc/source/whatsnew/v0.24.0.txt

+82
Original file line numberDiff line numberDiff line change
@@ -510,6 +510,88 @@ Previous Behavior:
510510
0
511511
0 NaT
512512

513+
.. _whatsnew_0240.api.dataframe_cmp_broadcasting:
514+
515+
DataFrame Comparison Operations Broadcasting Changes
516+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
517+
Previously, the broadcasting behavior of :class:`DataFrame` comparison
518+
operations (``==``, ``!=``, ...) was inconsistent with the behavior of
519+
arithmetic operations (``+``, ``-``, ...). The behavior of the comparison
520+
operations has been changed to match the arithmetic operations in these cases.
521+
(:issue:`22880`)
522+
523+
The affected cases are:
524+
525+
- operating against a 2-dimensional ``np.ndarray`` with either 1 row or 1 column will now broadcast the same way a ``np.ndarray`` would (:issue:`23000`).
526+
- a list or tuple with length matching the number of rows in the :class:`DataFrame` will now raise ``ValueError`` instead of operating column-by-column (:issue:`22880`.
527+
- a list or tuple with length matching the number of columns in the :class:`DataFrame` will now operate row-by-row instead of raising ``ValueError`` (:issue:`22880`).
528+
529+
Previous Behavior:
530+
531+
.. code-block:: ipython
532+
533+
In [3]: arr = np.arange(6).reshape(3, 2)
534+
In [4]: df = pd.DataFrame(arr)
535+
536+
In [5]: df == arr[[0], :]
537+
...: # comparison previously broadcast where arithmetic would raise
538+
Out[5]:
539+
0 1
540+
0 True True
541+
1 False False
542+
2 False False
543+
In [6]: df + arr[[0], :]
544+
...
545+
ValueError: Unable to coerce to DataFrame, shape must be (3, 2): given (1, 2)
546+
547+
In [7]: df == (1, 2)
548+
...: # length matches number of columns;
549+
...: # comparison previously raised where arithmetic would broadcast
550+
...
551+
ValueError: Invalid broadcasting comparison [(1, 2)] with block values
552+
In [8]: df + (1, 2)
553+
Out[8]:
554+
0 1
555+
0 1 3
556+
1 3 5
557+
2 5 7
558+
559+
In [9]: df == (1, 2, 3)
560+
...: # length matches number of rows
561+
...: # comparison previously broadcast where arithmetic would raise
562+
Out[9]:
563+
0 1
564+
0 False True
565+
1 True False
566+
2 False False
567+
In [10]: df + (1, 2, 3)
568+
...
569+
ValueError: Unable to coerce to Series, length must be 2: given 3
570+
571+
*Current Behavior*:
572+
573+
.. ipython:: python
574+
:okexcept:
575+
576+
arr = np.arange(6).reshape(3, 2)
577+
df = pd.DataFrame(arr)
578+
579+
.. ipython:: python
580+
# Comparison operations and arithmetic operations both broadcast.
581+
df == arr[[0], :]
582+
df + arr[[0], :]
583+
584+
.. ipython:: python
585+
# Comparison operations and arithmetic operations both broadcast.
586+
df == (1, 2)
587+
df + (1, 2)
588+
589+
.. ipython:: python
590+
:okexcept:
591+
# Comparison operations and arithmetic opeartions both raise ValueError.
592+
df == (1, 2, 3)
593+
df + (1, 2, 3)
594+
513595

514596
.. _whatsnew_0240.api.dataframe_arithmetic_broadcasting:
515597

pandas/core/frame.py

+2-7
Original file line numberDiff line numberDiff line change
@@ -4948,13 +4948,8 @@ def _combine_match_columns(self, other, func, level=None, try_cast=True):
49484948
return ops.dispatch_to_series(left, right, func, axis="columns")
49494949

49504950
def _combine_const(self, other, func, errors='raise', try_cast=True):
4951-
if lib.is_scalar(other) or np.ndim(other) == 0:
4952-
return ops.dispatch_to_series(self, other, func)
4953-
4954-
new_data = self._data.eval(func=func, other=other,
4955-
errors=errors,
4956-
try_cast=try_cast)
4957-
return self._constructor(new_data)
4951+
assert lib.is_scalar(other) or np.ndim(other) == 0
4952+
return ops.dispatch_to_series(self, other, func)
49584953

49594954
def combine(self, other, func, fill_value=None, overwrite=True):
49604955
"""

pandas/core/internals/blocks.py

-139
Original file line numberDiff line numberDiff line change
@@ -1313,145 +1313,6 @@ def shift(self, periods, axis=0, mgr=None):
13131313

13141314
return [self.make_block(new_values)]
13151315

1316-
def eval(self, func, other, errors='raise', try_cast=False, mgr=None):
1317-
"""
1318-
evaluate the block; return result block from the result
1319-
1320-
Parameters
1321-
----------
1322-
func : how to combine self, other
1323-
other : a ndarray/object
1324-
errors : str, {'raise', 'ignore'}, default 'raise'
1325-
- ``raise`` : allow exceptions to be raised
1326-
- ``ignore`` : suppress exceptions. On error return original object
1327-
1328-
try_cast : try casting the results to the input type
1329-
1330-
Returns
1331-
-------
1332-
a new block, the result of the func
1333-
"""
1334-
orig_other = other
1335-
values = self.values
1336-
1337-
other = getattr(other, 'values', other)
1338-
1339-
# make sure that we can broadcast
1340-
is_transposed = False
1341-
if hasattr(other, 'ndim') and hasattr(values, 'ndim'):
1342-
if values.ndim != other.ndim:
1343-
is_transposed = True
1344-
else:
1345-
if values.shape == other.shape[::-1]:
1346-
is_transposed = True
1347-
elif values.shape[0] == other.shape[-1]:
1348-
is_transposed = True
1349-
else:
1350-
# this is a broadcast error heree
1351-
raise ValueError(
1352-
"cannot broadcast shape [{t_shape}] with "
1353-
"block values [{oth_shape}]".format(
1354-
t_shape=values.T.shape, oth_shape=other.shape))
1355-
1356-
transf = (lambda x: x.T) if is_transposed else (lambda x: x)
1357-
1358-
# coerce/transpose the args if needed
1359-
try:
1360-
values, values_mask, other, other_mask = self._try_coerce_args(
1361-
transf(values), other)
1362-
except TypeError:
1363-
block = self.coerce_to_target_dtype(orig_other)
1364-
return block.eval(func, orig_other,
1365-
errors=errors,
1366-
try_cast=try_cast, mgr=mgr)
1367-
1368-
# get the result, may need to transpose the other
1369-
def get_result(other):
1370-
1371-
# avoid numpy warning of comparisons again None
1372-
if other is None:
1373-
result = not func.__name__ == 'eq'
1374-
1375-
# avoid numpy warning of elementwise comparisons to object
1376-
elif is_numeric_v_string_like(values, other):
1377-
result = False
1378-
1379-
# avoid numpy warning of elementwise comparisons
1380-
elif func.__name__ == 'eq':
1381-
if is_list_like(other) and not isinstance(other, np.ndarray):
1382-
other = np.asarray(other)
1383-
1384-
# if we can broadcast, then ok
1385-
if values.shape[-1] != other.shape[-1]:
1386-
return False
1387-
result = func(values, other)
1388-
else:
1389-
result = func(values, other)
1390-
1391-
# mask if needed
1392-
if isinstance(values_mask, np.ndarray) and values_mask.any():
1393-
result = result.astype('float64', copy=False)
1394-
result[values_mask] = np.nan
1395-
if other_mask is True:
1396-
result = result.astype('float64', copy=False)
1397-
result[:] = np.nan
1398-
elif isinstance(other_mask, np.ndarray) and other_mask.any():
1399-
result = result.astype('float64', copy=False)
1400-
result[other_mask.ravel()] = np.nan
1401-
1402-
return result
1403-
1404-
# error handler if we have an issue operating with the function
1405-
def handle_error():
1406-
1407-
if errors == 'raise':
1408-
# The 'detail' variable is defined in outer scope.
1409-
raise TypeError(
1410-
'Could not operate {other!r} with block values '
1411-
'{detail!s}'.format(other=other, detail=detail)) # noqa
1412-
else:
1413-
# return the values
1414-
result = np.empty(values.shape, dtype='O')
1415-
result.fill(np.nan)
1416-
return result
1417-
1418-
# get the result
1419-
try:
1420-
with np.errstate(all='ignore'):
1421-
result = get_result(other)
1422-
1423-
# if we have an invalid shape/broadcast error
1424-
# GH4576, so raise instead of allowing to pass through
1425-
except ValueError as detail:
1426-
raise
1427-
except Exception as detail:
1428-
result = handle_error()
1429-
1430-
# technically a broadcast error in numpy can 'work' by returning a
1431-
# boolean False
1432-
if not isinstance(result, np.ndarray):
1433-
if not isinstance(result, np.ndarray):
1434-
1435-
# differentiate between an invalid ndarray-ndarray comparison
1436-
# and an invalid type comparison
1437-
if isinstance(values, np.ndarray) and is_list_like(other):
1438-
raise ValueError(
1439-
'Invalid broadcasting comparison [{other!r}] with '
1440-
'block values'.format(other=other))
1441-
1442-
raise TypeError('Could not compare [{other!r}] '
1443-
'with block values'.format(other=other))
1444-
1445-
# transpose if needed
1446-
result = transf(result)
1447-
1448-
# try to cast if requested
1449-
if try_cast:
1450-
result = self._try_cast_result(result)
1451-
1452-
result = _block_shape(result, ndim=self.ndim)
1453-
return [self.make_block(result)]
1454-
14551316
def where(self, other, cond, align=True, errors='raise',
14561317
try_cast=False, axis=0, transpose=False, mgr=None):
14571318
"""

pandas/core/internals/managers.py

-6
Original file line numberDiff line numberDiff line change
@@ -373,9 +373,6 @@ def apply(self, f, axes=None, filter=None, do_integrity_check=False,
373373
align_keys = ['new', 'mask']
374374
else:
375375
align_keys = ['mask']
376-
elif f == 'eval':
377-
align_copy = False
378-
align_keys = ['other']
379376
elif f == 'fillna':
380377
# fillna internally does putmask, maybe it's better to do this
381378
# at mgr, not block level?
@@ -511,9 +508,6 @@ def isna(self, func, **kwargs):
511508
def where(self, **kwargs):
512509
return self.apply('where', **kwargs)
513510

514-
def eval(self, **kwargs):
515-
return self.apply('eval', **kwargs)
516-
517511
def quantile(self, **kwargs):
518512
return self.reduction('quantile', **kwargs)
519513

pandas/core/ops.py

+3
Original file line numberDiff line numberDiff line change
@@ -1934,6 +1934,9 @@ def _comp_method_FRAME(cls, func, special):
19341934

19351935
@Appender('Wrapper for comparison method {name}'.format(name=op_name))
19361936
def f(self, other):
1937+
1938+
other = _align_method_FRAME(self, other, axis=None)
1939+
19371940
if isinstance(other, ABCDataFrame):
19381941
# Another DataFrame
19391942
if not self._indexed_same(other):

pandas/tests/frame/test_arithmetic.py

+12-7
Original file line numberDiff line numberDiff line change
@@ -48,15 +48,20 @@ def test_mixed_comparison(self):
4848
assert result.all().all()
4949

5050
def test_df_boolean_comparison_error(self):
51-
# GH 4576
52-
# boolean comparisons with a tuple/list give unexpected results
51+
# GH#4576, GH#22880
52+
# comparing DataFrame against list/tuple with len(obj) matching
53+
# len(df.columns) is supported as of GH#22800
5354
df = pd.DataFrame(np.arange(6).reshape((3, 2)))
5455

55-
# not shape compatible
56-
with pytest.raises(ValueError):
57-
df == (2, 2)
58-
with pytest.raises(ValueError):
59-
df == [2, 2]
56+
expected = pd.DataFrame([[False, False],
57+
[True, False],
58+
[False, False]])
59+
60+
result = df == (2, 2)
61+
tm.assert_frame_equal(result, expected)
62+
63+
result = df == [2, 2]
64+
tm.assert_frame_equal(result, expected)
6065

6166
def test_df_float_none_comparison(self):
6267
df = pd.DataFrame(np.random.randn(8, 3), index=range(8),

pandas/tests/frame/test_operators.py

+24-16
Original file line numberDiff line numberDiff line change
@@ -752,8 +752,9 @@ def test_comp(func):
752752
result = func(df1, df2)
753753
tm.assert_numpy_array_equal(result.values,
754754
func(df1.values, df2.values))
755+
755756
with tm.assert_raises_regex(ValueError,
756-
'Wrong number of dimensions'):
757+
'dim must be <= 2'):
757758
func(df1, ndim_5)
758759

759760
result2 = func(self.simple, row)
@@ -804,42 +805,49 @@ def test_boolean_comparison(self):
804805
result = df.values > b
805806
assert_numpy_array_equal(result, expected.values)
806807

807-
result = df > l
808-
assert_frame_equal(result, expected)
808+
msg1d = 'Unable to coerce to Series, length must be 2: given 3'
809+
msg2d = 'Unable to coerce to DataFrame, shape must be'
810+
msg2db = 'operands could not be broadcast together with shapes'
811+
with tm.assert_raises_regex(ValueError, msg1d):
812+
# wrong shape
813+
df > l
809814

810-
result = df > tup
811-
assert_frame_equal(result, expected)
815+
with tm.assert_raises_regex(ValueError, msg1d):
816+
# wrong shape
817+
result = df > tup
812818

819+
# broadcasts like ndarray (GH#23000)
813820
result = df > b_r
814821
assert_frame_equal(result, expected)
815822

816823
result = df.values > b_r
817824
assert_numpy_array_equal(result, expected.values)
818825

819-
with pytest.raises(ValueError):
826+
with tm.assert_raises_regex(ValueError, msg2d):
820827
df > b_c
821828

822-
with pytest.raises(ValueError):
829+
with tm.assert_raises_regex(ValueError, msg2db):
823830
df.values > b_c
824831

825832
# ==
826833
expected = DataFrame([[False, False], [True, False], [False, False]])
827834
result = df == b
828835
assert_frame_equal(result, expected)
829836

830-
result = df == l
831-
assert_frame_equal(result, expected)
837+
with tm.assert_raises_regex(ValueError, msg1d):
838+
result = df == l
832839

833-
result = df == tup
834-
assert_frame_equal(result, expected)
840+
with tm.assert_raises_regex(ValueError, msg1d):
841+
result = df == tup
835842

843+
# broadcasts like ndarray (GH#23000)
836844
result = df == b_r
837845
assert_frame_equal(result, expected)
838846

839847
result = df.values == b_r
840848
assert_numpy_array_equal(result, expected.values)
841849

842-
with pytest.raises(ValueError):
850+
with tm.assert_raises_regex(ValueError, msg2d):
843851
df == b_c
844852

845853
assert df.values.shape != b_c.shape
@@ -850,11 +858,11 @@ def test_boolean_comparison(self):
850858
expected.index = df.index
851859
expected.columns = df.columns
852860

853-
result = df == l
854-
assert_frame_equal(result, expected)
861+
with tm.assert_raises_regex(ValueError, msg1d):
862+
result = df == l
855863

856-
result = df == tup
857-
assert_frame_equal(result, expected)
864+
with tm.assert_raises_regex(ValueError, msg1d):
865+
result = df == tup
858866

859867
def test_combine_generic(self):
860868
df1 = self.frame

0 commit comments

Comments
 (0)