Skip to content

Commit b2c6217

Browse files
jbrockmendeltm9k1
authored andcommitted
Use align_method in comp_method_FRAME (pandas-dev#23132)
1 parent 08f9427 commit b2c6217

File tree

7 files changed

+123
-175
lines changed

7 files changed

+123
-175
lines changed

doc/source/whatsnew/v0.24.0.txt

+82
Original file line numberDiff line numberDiff line change
@@ -659,6 +659,88 @@ Previous Behavior:
659659
0
660660
0 NaT
661661

662+
.. _whatsnew_0240.api.dataframe_cmp_broadcasting:
663+
664+
DataFrame Comparison Operations Broadcasting Changes
665+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
666+
Previously, the broadcasting behavior of :class:`DataFrame` comparison
667+
operations (``==``, ``!=``, ...) was inconsistent with the behavior of
668+
arithmetic operations (``+``, ``-``, ...). The behavior of the comparison
669+
operations has been changed to match the arithmetic operations in these cases.
670+
(:issue:`22880`)
671+
672+
The affected cases are:
673+
674+
- operating against a 2-dimensional ``np.ndarray`` with either 1 row or 1 column will now broadcast the same way a ``np.ndarray`` would (:issue:`23000`).
675+
- a list or tuple with length matching the number of rows in the :class:`DataFrame` will now raise ``ValueError`` instead of operating column-by-column (:issue:`22880`.
676+
- a list or tuple with length matching the number of columns in the :class:`DataFrame` will now operate row-by-row instead of raising ``ValueError`` (:issue:`22880`).
677+
678+
Previous Behavior:
679+
680+
.. code-block:: ipython
681+
682+
In [3]: arr = np.arange(6).reshape(3, 2)
683+
In [4]: df = pd.DataFrame(arr)
684+
685+
In [5]: df == arr[[0], :]
686+
...: # comparison previously broadcast where arithmetic would raise
687+
Out[5]:
688+
0 1
689+
0 True True
690+
1 False False
691+
2 False False
692+
In [6]: df + arr[[0], :]
693+
...
694+
ValueError: Unable to coerce to DataFrame, shape must be (3, 2): given (1, 2)
695+
696+
In [7]: df == (1, 2)
697+
...: # length matches number of columns;
698+
...: # comparison previously raised where arithmetic would broadcast
699+
...
700+
ValueError: Invalid broadcasting comparison [(1, 2)] with block values
701+
In [8]: df + (1, 2)
702+
Out[8]:
703+
0 1
704+
0 1 3
705+
1 3 5
706+
2 5 7
707+
708+
In [9]: df == (1, 2, 3)
709+
...: # length matches number of rows
710+
...: # comparison previously broadcast where arithmetic would raise
711+
Out[9]:
712+
0 1
713+
0 False True
714+
1 True False
715+
2 False False
716+
In [10]: df + (1, 2, 3)
717+
...
718+
ValueError: Unable to coerce to Series, length must be 2: given 3
719+
720+
*Current Behavior*:
721+
722+
.. ipython:: python
723+
:okexcept:
724+
725+
arr = np.arange(6).reshape(3, 2)
726+
df = pd.DataFrame(arr)
727+
728+
.. ipython:: python
729+
# Comparison operations and arithmetic operations both broadcast.
730+
df == arr[[0], :]
731+
df + arr[[0], :]
732+
733+
.. ipython:: python
734+
# Comparison operations and arithmetic operations both broadcast.
735+
df == (1, 2)
736+
df + (1, 2)
737+
738+
.. ipython:: python
739+
:okexcept:
740+
# Comparison operations and arithmetic opeartions both raise ValueError.
741+
df == (1, 2, 3)
742+
df + (1, 2, 3)
743+
662744

663745
.. _whatsnew_0240.api.dataframe_arithmetic_broadcasting:
664746

pandas/core/frame.py

+2-7
Original file line numberDiff line numberDiff line change
@@ -4979,13 +4979,8 @@ def _combine_match_columns(self, other, func, level=None, try_cast=True):
49794979
return ops.dispatch_to_series(left, right, func, axis="columns")
49804980

49814981
def _combine_const(self, other, func, errors='raise', try_cast=True):
4982-
if lib.is_scalar(other) or np.ndim(other) == 0:
4983-
return ops.dispatch_to_series(self, other, func)
4984-
4985-
new_data = self._data.eval(func=func, other=other,
4986-
errors=errors,
4987-
try_cast=try_cast)
4988-
return self._constructor(new_data)
4982+
assert lib.is_scalar(other) or np.ndim(other) == 0
4983+
return ops.dispatch_to_series(self, other, func)
49894984

49904985
def combine(self, other, func, fill_value=None, overwrite=True):
49914986
"""

pandas/core/internals/blocks.py

-139
Original file line numberDiff line numberDiff line change
@@ -1318,145 +1318,6 @@ def shift(self, periods, axis=0, mgr=None):
13181318

13191319
return [self.make_block(new_values)]
13201320

1321-
def eval(self, func, other, errors='raise', try_cast=False, mgr=None):
1322-
"""
1323-
evaluate the block; return result block from the result
1324-
1325-
Parameters
1326-
----------
1327-
func : how to combine self, other
1328-
other : a ndarray/object
1329-
errors : str, {'raise', 'ignore'}, default 'raise'
1330-
- ``raise`` : allow exceptions to be raised
1331-
- ``ignore`` : suppress exceptions. On error return original object
1332-
1333-
try_cast : try casting the results to the input type
1334-
1335-
Returns
1336-
-------
1337-
a new block, the result of the func
1338-
"""
1339-
orig_other = other
1340-
values = self.values
1341-
1342-
other = getattr(other, 'values', other)
1343-
1344-
# make sure that we can broadcast
1345-
is_transposed = False
1346-
if hasattr(other, 'ndim') and hasattr(values, 'ndim'):
1347-
if values.ndim != other.ndim:
1348-
is_transposed = True
1349-
else:
1350-
if values.shape == other.shape[::-1]:
1351-
is_transposed = True
1352-
elif values.shape[0] == other.shape[-1]:
1353-
is_transposed = True
1354-
else:
1355-
# this is a broadcast error heree
1356-
raise ValueError(
1357-
"cannot broadcast shape [{t_shape}] with "
1358-
"block values [{oth_shape}]".format(
1359-
t_shape=values.T.shape, oth_shape=other.shape))
1360-
1361-
transf = (lambda x: x.T) if is_transposed else (lambda x: x)
1362-
1363-
# coerce/transpose the args if needed
1364-
try:
1365-
values, values_mask, other, other_mask = self._try_coerce_args(
1366-
transf(values), other)
1367-
except TypeError:
1368-
block = self.coerce_to_target_dtype(orig_other)
1369-
return block.eval(func, orig_other,
1370-
errors=errors,
1371-
try_cast=try_cast, mgr=mgr)
1372-
1373-
# get the result, may need to transpose the other
1374-
def get_result(other):
1375-
1376-
# avoid numpy warning of comparisons again None
1377-
if other is None:
1378-
result = not func.__name__ == 'eq'
1379-
1380-
# avoid numpy warning of elementwise comparisons to object
1381-
elif is_numeric_v_string_like(values, other):
1382-
result = False
1383-
1384-
# avoid numpy warning of elementwise comparisons
1385-
elif func.__name__ == 'eq':
1386-
if is_list_like(other) and not isinstance(other, np.ndarray):
1387-
other = np.asarray(other)
1388-
1389-
# if we can broadcast, then ok
1390-
if values.shape[-1] != other.shape[-1]:
1391-
return False
1392-
result = func(values, other)
1393-
else:
1394-
result = func(values, other)
1395-
1396-
# mask if needed
1397-
if isinstance(values_mask, np.ndarray) and values_mask.any():
1398-
result = result.astype('float64', copy=False)
1399-
result[values_mask] = np.nan
1400-
if other_mask is True:
1401-
result = result.astype('float64', copy=False)
1402-
result[:] = np.nan
1403-
elif isinstance(other_mask, np.ndarray) and other_mask.any():
1404-
result = result.astype('float64', copy=False)
1405-
result[other_mask.ravel()] = np.nan
1406-
1407-
return result
1408-
1409-
# error handler if we have an issue operating with the function
1410-
def handle_error():
1411-
1412-
if errors == 'raise':
1413-
# The 'detail' variable is defined in outer scope.
1414-
raise TypeError(
1415-
'Could not operate {other!r} with block values '
1416-
'{detail!s}'.format(other=other, detail=detail)) # noqa
1417-
else:
1418-
# return the values
1419-
result = np.empty(values.shape, dtype='O')
1420-
result.fill(np.nan)
1421-
return result
1422-
1423-
# get the result
1424-
try:
1425-
with np.errstate(all='ignore'):
1426-
result = get_result(other)
1427-
1428-
# if we have an invalid shape/broadcast error
1429-
# GH4576, so raise instead of allowing to pass through
1430-
except ValueError as detail:
1431-
raise
1432-
except Exception as detail:
1433-
result = handle_error()
1434-
1435-
# technically a broadcast error in numpy can 'work' by returning a
1436-
# boolean False
1437-
if not isinstance(result, np.ndarray):
1438-
if not isinstance(result, np.ndarray):
1439-
1440-
# differentiate between an invalid ndarray-ndarray comparison
1441-
# and an invalid type comparison
1442-
if isinstance(values, np.ndarray) and is_list_like(other):
1443-
raise ValueError(
1444-
'Invalid broadcasting comparison [{other!r}] with '
1445-
'block values'.format(other=other))
1446-
1447-
raise TypeError('Could not compare [{other!r}] '
1448-
'with block values'.format(other=other))
1449-
1450-
# transpose if needed
1451-
result = transf(result)
1452-
1453-
# try to cast if requested
1454-
if try_cast:
1455-
result = self._try_cast_result(result)
1456-
1457-
result = _block_shape(result, ndim=self.ndim)
1458-
return [self.make_block(result)]
1459-
14601321
def where(self, other, cond, align=True, errors='raise',
14611322
try_cast=False, axis=0, transpose=False, mgr=None):
14621323
"""

pandas/core/internals/managers.py

-6
Original file line numberDiff line numberDiff line change
@@ -373,9 +373,6 @@ def apply(self, f, axes=None, filter=None, do_integrity_check=False,
373373
align_keys = ['new', 'mask']
374374
else:
375375
align_keys = ['mask']
376-
elif f == 'eval':
377-
align_copy = False
378-
align_keys = ['other']
379376
elif f == 'fillna':
380377
# fillna internally does putmask, maybe it's better to do this
381378
# at mgr, not block level?
@@ -511,9 +508,6 @@ def isna(self, func, **kwargs):
511508
def where(self, **kwargs):
512509
return self.apply('where', **kwargs)
513510

514-
def eval(self, **kwargs):
515-
return self.apply('eval', **kwargs)
516-
517511
def quantile(self, **kwargs):
518512
return self.reduction('quantile', **kwargs)
519513

pandas/core/ops.py

+3
Original file line numberDiff line numberDiff line change
@@ -1923,6 +1923,9 @@ def _comp_method_FRAME(cls, func, special):
19231923

19241924
@Appender('Wrapper for comparison method {name}'.format(name=op_name))
19251925
def f(self, other):
1926+
1927+
other = _align_method_FRAME(self, other, axis=None)
1928+
19261929
if isinstance(other, ABCDataFrame):
19271930
# Another DataFrame
19281931
if not self._indexed_same(other):

pandas/tests/frame/test_arithmetic.py

+12-7
Original file line numberDiff line numberDiff line change
@@ -48,15 +48,20 @@ def test_mixed_comparison(self):
4848
assert result.all().all()
4949

5050
def test_df_boolean_comparison_error(self):
51-
# GH 4576
52-
# boolean comparisons with a tuple/list give unexpected results
51+
# GH#4576, GH#22880
52+
# comparing DataFrame against list/tuple with len(obj) matching
53+
# len(df.columns) is supported as of GH#22800
5354
df = pd.DataFrame(np.arange(6).reshape((3, 2)))
5455

55-
# not shape compatible
56-
with pytest.raises(ValueError):
57-
df == (2, 2)
58-
with pytest.raises(ValueError):
59-
df == [2, 2]
56+
expected = pd.DataFrame([[False, False],
57+
[True, False],
58+
[False, False]])
59+
60+
result = df == (2, 2)
61+
tm.assert_frame_equal(result, expected)
62+
63+
result = df == [2, 2]
64+
tm.assert_frame_equal(result, expected)
6065

6166
def test_df_float_none_comparison(self):
6267
df = pd.DataFrame(np.random.randn(8, 3), index=range(8),

pandas/tests/frame/test_operators.py

+24-16
Original file line numberDiff line numberDiff line change
@@ -752,8 +752,9 @@ def test_comp(func):
752752
result = func(df1, df2)
753753
tm.assert_numpy_array_equal(result.values,
754754
func(df1.values, df2.values))
755+
755756
with tm.assert_raises_regex(ValueError,
756-
'Wrong number of dimensions'):
757+
'dim must be <= 2'):
757758
func(df1, ndim_5)
758759

759760
result2 = func(self.simple, row)
@@ -804,42 +805,49 @@ def test_boolean_comparison(self):
804805
result = df.values > b
805806
assert_numpy_array_equal(result, expected.values)
806807

807-
result = df > lst
808-
assert_frame_equal(result, expected)
808+
msg1d = 'Unable to coerce to Series, length must be 2: given 3'
809+
msg2d = 'Unable to coerce to DataFrame, shape must be'
810+
msg2db = 'operands could not be broadcast together with shapes'
811+
with tm.assert_raises_regex(ValueError, msg1d):
812+
# wrong shape
813+
df > lst
809814

810-
result = df > tup
811-
assert_frame_equal(result, expected)
815+
with tm.assert_raises_regex(ValueError, msg1d):
816+
# wrong shape
817+
result = df > tup
812818

819+
# broadcasts like ndarray (GH#23000)
813820
result = df > b_r
814821
assert_frame_equal(result, expected)
815822

816823
result = df.values > b_r
817824
assert_numpy_array_equal(result, expected.values)
818825

819-
with pytest.raises(ValueError):
826+
with tm.assert_raises_regex(ValueError, msg2d):
820827
df > b_c
821828

822-
with pytest.raises(ValueError):
829+
with tm.assert_raises_regex(ValueError, msg2db):
823830
df.values > b_c
824831

825832
# ==
826833
expected = DataFrame([[False, False], [True, False], [False, False]])
827834
result = df == b
828835
assert_frame_equal(result, expected)
829836

830-
result = df == lst
831-
assert_frame_equal(result, expected)
837+
with tm.assert_raises_regex(ValueError, msg1d):
838+
result = df == lst
832839

833-
result = df == tup
834-
assert_frame_equal(result, expected)
840+
with tm.assert_raises_regex(ValueError, msg1d):
841+
result = df == tup
835842

843+
# broadcasts like ndarray (GH#23000)
836844
result = df == b_r
837845
assert_frame_equal(result, expected)
838846

839847
result = df.values == b_r
840848
assert_numpy_array_equal(result, expected.values)
841849

842-
with pytest.raises(ValueError):
850+
with tm.assert_raises_regex(ValueError, msg2d):
843851
df == b_c
844852

845853
assert df.values.shape != b_c.shape
@@ -850,11 +858,11 @@ def test_boolean_comparison(self):
850858
expected.index = df.index
851859
expected.columns = df.columns
852860

853-
result = df == lst
854-
assert_frame_equal(result, expected)
861+
with tm.assert_raises_regex(ValueError, msg1d):
862+
result = df == lst
855863

856-
result = df == tup
857-
assert_frame_equal(result, expected)
864+
with tm.assert_raises_regex(ValueError, msg1d):
865+
result = df == tup
858866

859867
def test_combine_generic(self):
860868
df1 = self.frame

0 commit comments

Comments
 (0)