Skip to content

Commit 3feaa79

Browse files
jorisvandenbosschetm9k1
authored andcommitted
Revert "Use align_method in comp_method_FRAME (pandas-dev#22880)" (pandas-dev#23120)
This reverts commit e96c691.
1 parent 3e7f099 commit 3feaa79

File tree

7 files changed

+175
-123
lines changed

7 files changed

+175
-123
lines changed

doc/source/whatsnew/v0.24.0.txt

-82
Original file line numberDiff line numberDiff line change
@@ -511,88 +511,6 @@ Previous Behavior:
511511
0
512512
0 NaT
513513

514-
.. _whatsnew_0240.api.dataframe_cmp_broadcasting:
515-
516-
DataFrame Comparison Operations Broadcasting Changes
517-
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
518-
Previously, the broadcasting behavior of :class:`DataFrame` comparison
519-
operations (``==``, ``!=``, ...) was inconsistent with the behavior of
520-
arithmetic operations (``+``, ``-``, ...). The behavior of the comparison
521-
operations has been changed to match the arithmetic operations in these cases.
522-
(:issue:`22880`)
523-
524-
The affected cases are:
525-
526-
- operating against a 2-dimensional ``np.ndarray`` with either 1 row or 1 column will now broadcast the same way a ``np.ndarray`` would (:issue:`23000`).
527-
- a list or tuple with length matching the number of rows in the :class:`DataFrame` will now raise ``ValueError`` instead of operating column-by-column (:issue:`22880`.
528-
- a list or tuple with length matching the number of columns in the :class:`DataFrame` will now operate row-by-row instead of raising ``ValueError`` (:issue:`22880`).
529-
530-
Previous Behavior:
531-
532-
.. code-block:: ipython
533-
534-
In [3]: arr = np.arange(6).reshape(3, 2)
535-
In [4]: df = pd.DataFrame(arr)
536-
537-
In [5]: df == arr[[0], :]
538-
...: # comparison previously broadcast where arithmetic would raise
539-
Out[5]:
540-
0 1
541-
0 True True
542-
1 False False
543-
2 False False
544-
In [6]: df + arr[[0], :]
545-
...
546-
ValueError: Unable to coerce to DataFrame, shape must be (3, 2): given (1, 2)
547-
548-
In [7]: df == (1, 2)
549-
...: # length matches number of columns;
550-
...: # comparison previously raised where arithmetic would broadcast
551-
...
552-
ValueError: Invalid broadcasting comparison [(1, 2)] with block values
553-
In [8]: df + (1, 2)
554-
Out[8]:
555-
0 1
556-
0 1 3
557-
1 3 5
558-
2 5 7
559-
560-
In [9]: df == (1, 2, 3)
561-
...: # length matches number of rows
562-
...: # comparison previously broadcast where arithmetic would raise
563-
Out[9]:
564-
0 1
565-
0 False True
566-
1 True False
567-
2 False False
568-
In [10]: df + (1, 2, 3)
569-
...
570-
ValueError: Unable to coerce to Series, length must be 2: given 3
571-
572-
*Current Behavior*:
573-
574-
.. ipython:: python
575-
:okexcept:
576-
577-
arr = np.arange(6).reshape(3, 2)
578-
df = pd.DataFrame(arr)
579-
580-
.. ipython:: python
581-
# Comparison operations and arithmetic operations both broadcast.
582-
df == arr[[0], :]
583-
df + arr[[0], :]
584-
585-
.. ipython:: python
586-
# Comparison operations and arithmetic operations both broadcast.
587-
df == (1, 2)
588-
df + (1, 2)
589-
590-
.. ipython:: python
591-
:okexcept:
592-
# Comparison operations and arithmetic opeartions both raise ValueError.
593-
df == (1, 2, 3)
594-
df + (1, 2, 3)
595-
596514

597515
.. _whatsnew_0240.api.dataframe_arithmetic_broadcasting:
598516

pandas/core/frame.py

+7-2
Original file line numberDiff line numberDiff line change
@@ -4948,8 +4948,13 @@ def _combine_match_columns(self, other, func, level=None, try_cast=True):
49484948
return ops.dispatch_to_series(left, right, func, axis="columns")
49494949

49504950
def _combine_const(self, other, func, errors='raise', try_cast=True):
4951-
assert lib.is_scalar(other) or np.ndim(other) == 0
4952-
return ops.dispatch_to_series(self, other, func)
4951+
if lib.is_scalar(other) or np.ndim(other) == 0:
4952+
return ops.dispatch_to_series(self, other, func)
4953+
4954+
new_data = self._data.eval(func=func, other=other,
4955+
errors=errors,
4956+
try_cast=try_cast)
4957+
return self._constructor(new_data)
49534958

49544959
def combine(self, other, func, fill_value=None, overwrite=True):
49554960
"""

pandas/core/internals/blocks.py

+139
Original file line numberDiff line numberDiff line change
@@ -1313,6 +1313,145 @@ def shift(self, periods, axis=0, mgr=None):
13131313

13141314
return [self.make_block(new_values)]
13151315

1316+
def eval(self, func, other, errors='raise', try_cast=False, mgr=None):
1317+
"""
1318+
evaluate the block; return result block from the result
1319+
1320+
Parameters
1321+
----------
1322+
func : how to combine self, other
1323+
other : a ndarray/object
1324+
errors : str, {'raise', 'ignore'}, default 'raise'
1325+
- ``raise`` : allow exceptions to be raised
1326+
- ``ignore`` : suppress exceptions. On error return original object
1327+
1328+
try_cast : try casting the results to the input type
1329+
1330+
Returns
1331+
-------
1332+
a new block, the result of the func
1333+
"""
1334+
orig_other = other
1335+
values = self.values
1336+
1337+
other = getattr(other, 'values', other)
1338+
1339+
# make sure that we can broadcast
1340+
is_transposed = False
1341+
if hasattr(other, 'ndim') and hasattr(values, 'ndim'):
1342+
if values.ndim != other.ndim:
1343+
is_transposed = True
1344+
else:
1345+
if values.shape == other.shape[::-1]:
1346+
is_transposed = True
1347+
elif values.shape[0] == other.shape[-1]:
1348+
is_transposed = True
1349+
else:
1350+
# this is a broadcast error heree
1351+
raise ValueError(
1352+
"cannot broadcast shape [{t_shape}] with "
1353+
"block values [{oth_shape}]".format(
1354+
t_shape=values.T.shape, oth_shape=other.shape))
1355+
1356+
transf = (lambda x: x.T) if is_transposed else (lambda x: x)
1357+
1358+
# coerce/transpose the args if needed
1359+
try:
1360+
values, values_mask, other, other_mask = self._try_coerce_args(
1361+
transf(values), other)
1362+
except TypeError:
1363+
block = self.coerce_to_target_dtype(orig_other)
1364+
return block.eval(func, orig_other,
1365+
errors=errors,
1366+
try_cast=try_cast, mgr=mgr)
1367+
1368+
# get the result, may need to transpose the other
1369+
def get_result(other):
1370+
1371+
# avoid numpy warning of comparisons again None
1372+
if other is None:
1373+
result = not func.__name__ == 'eq'
1374+
1375+
# avoid numpy warning of elementwise comparisons to object
1376+
elif is_numeric_v_string_like(values, other):
1377+
result = False
1378+
1379+
# avoid numpy warning of elementwise comparisons
1380+
elif func.__name__ == 'eq':
1381+
if is_list_like(other) and not isinstance(other, np.ndarray):
1382+
other = np.asarray(other)
1383+
1384+
# if we can broadcast, then ok
1385+
if values.shape[-1] != other.shape[-1]:
1386+
return False
1387+
result = func(values, other)
1388+
else:
1389+
result = func(values, other)
1390+
1391+
# mask if needed
1392+
if isinstance(values_mask, np.ndarray) and values_mask.any():
1393+
result = result.astype('float64', copy=False)
1394+
result[values_mask] = np.nan
1395+
if other_mask is True:
1396+
result = result.astype('float64', copy=False)
1397+
result[:] = np.nan
1398+
elif isinstance(other_mask, np.ndarray) and other_mask.any():
1399+
result = result.astype('float64', copy=False)
1400+
result[other_mask.ravel()] = np.nan
1401+
1402+
return result
1403+
1404+
# error handler if we have an issue operating with the function
1405+
def handle_error():
1406+
1407+
if errors == 'raise':
1408+
# The 'detail' variable is defined in outer scope.
1409+
raise TypeError(
1410+
'Could not operate {other!r} with block values '
1411+
'{detail!s}'.format(other=other, detail=detail)) # noqa
1412+
else:
1413+
# return the values
1414+
result = np.empty(values.shape, dtype='O')
1415+
result.fill(np.nan)
1416+
return result
1417+
1418+
# get the result
1419+
try:
1420+
with np.errstate(all='ignore'):
1421+
result = get_result(other)
1422+
1423+
# if we have an invalid shape/broadcast error
1424+
# GH4576, so raise instead of allowing to pass through
1425+
except ValueError as detail:
1426+
raise
1427+
except Exception as detail:
1428+
result = handle_error()
1429+
1430+
# technically a broadcast error in numpy can 'work' by returning a
1431+
# boolean False
1432+
if not isinstance(result, np.ndarray):
1433+
if not isinstance(result, np.ndarray):
1434+
1435+
# differentiate between an invalid ndarray-ndarray comparison
1436+
# and an invalid type comparison
1437+
if isinstance(values, np.ndarray) and is_list_like(other):
1438+
raise ValueError(
1439+
'Invalid broadcasting comparison [{other!r}] with '
1440+
'block values'.format(other=other))
1441+
1442+
raise TypeError('Could not compare [{other!r}] '
1443+
'with block values'.format(other=other))
1444+
1445+
# transpose if needed
1446+
result = transf(result)
1447+
1448+
# try to cast if requested
1449+
if try_cast:
1450+
result = self._try_cast_result(result)
1451+
1452+
result = _block_shape(result, ndim=self.ndim)
1453+
return [self.make_block(result)]
1454+
13161455
def where(self, other, cond, align=True, errors='raise',
13171456
try_cast=False, axis=0, transpose=False, mgr=None):
13181457
"""

pandas/core/internals/managers.py

+6
Original file line numberDiff line numberDiff line change
@@ -373,6 +373,9 @@ def apply(self, f, axes=None, filter=None, do_integrity_check=False,
373373
align_keys = ['new', 'mask']
374374
else:
375375
align_keys = ['mask']
376+
elif f == 'eval':
377+
align_copy = False
378+
align_keys = ['other']
376379
elif f == 'fillna':
377380
# fillna internally does putmask, maybe it's better to do this
378381
# at mgr, not block level?
@@ -508,6 +511,9 @@ def isna(self, func, **kwargs):
508511
def where(self, **kwargs):
509512
return self.apply('where', **kwargs)
510513

514+
def eval(self, **kwargs):
515+
return self.apply('eval', **kwargs)
516+
511517
def quantile(self, **kwargs):
512518
return self.reduction('quantile', **kwargs)
513519

pandas/core/ops.py

-3
Original file line numberDiff line numberDiff line change
@@ -1934,9 +1934,6 @@ def _comp_method_FRAME(cls, func, special):
19341934

19351935
@Appender('Wrapper for comparison method {name}'.format(name=op_name))
19361936
def f(self, other):
1937-
1938-
other = _align_method_FRAME(self, other, axis=None)
1939-
19401937
if isinstance(other, ABCDataFrame):
19411938
# Another DataFrame
19421939
if not self._indexed_same(other):

pandas/tests/frame/test_arithmetic.py

+7-12
Original file line numberDiff line numberDiff line change
@@ -48,20 +48,15 @@ def test_mixed_comparison(self):
4848
assert result.all().all()
4949

5050
def test_df_boolean_comparison_error(self):
51-
# GH#4576, GH#22880
52-
# comparing DataFrame against list/tuple with len(obj) matching
53-
# len(df.columns) is supported as of GH#22800
51+
# GH 4576
52+
# boolean comparisons with a tuple/list give unexpected results
5453
df = pd.DataFrame(np.arange(6).reshape((3, 2)))
5554

56-
expected = pd.DataFrame([[False, False],
57-
[True, False],
58-
[False, False]])
59-
60-
result = df == (2, 2)
61-
tm.assert_frame_equal(result, expected)
62-
63-
result = df == [2, 2]
64-
tm.assert_frame_equal(result, expected)
55+
# not shape compatible
56+
with pytest.raises(ValueError):
57+
df == (2, 2)
58+
with pytest.raises(ValueError):
59+
df == [2, 2]
6560

6661
def test_df_float_none_comparison(self):
6762
df = pd.DataFrame(np.random.randn(8, 3), index=range(8),

pandas/tests/frame/test_operators.py

+16-24
Original file line numberDiff line numberDiff line change
@@ -752,9 +752,8 @@ def test_comp(func):
752752
result = func(df1, df2)
753753
tm.assert_numpy_array_equal(result.values,
754754
func(df1.values, df2.values))
755-
756755
with tm.assert_raises_regex(ValueError,
757-
'dim must be <= 2'):
756+
'Wrong number of dimensions'):
758757
func(df1, ndim_5)
759758

760759
result2 = func(self.simple, row)
@@ -805,49 +804,42 @@ def test_boolean_comparison(self):
805804
result = df.values > b
806805
assert_numpy_array_equal(result, expected.values)
807806

808-
msg1d = 'Unable to coerce to Series, length must be 2: given 3'
809-
msg2d = 'Unable to coerce to DataFrame, shape must be'
810-
msg2db = 'operands could not be broadcast together with shapes'
811-
with tm.assert_raises_regex(ValueError, msg1d):
812-
# wrong shape
813-
df > l
807+
result = df > l
808+
assert_frame_equal(result, expected)
814809

815-
with tm.assert_raises_regex(ValueError, msg1d):
816-
# wrong shape
817-
result = df > tup
810+
result = df > tup
811+
assert_frame_equal(result, expected)
818812

819-
# broadcasts like ndarray (GH#23000)
820813
result = df > b_r
821814
assert_frame_equal(result, expected)
822815

823816
result = df.values > b_r
824817
assert_numpy_array_equal(result, expected.values)
825818

826-
with tm.assert_raises_regex(ValueError, msg2d):
819+
with pytest.raises(ValueError):
827820
df > b_c
828821

829-
with tm.assert_raises_regex(ValueError, msg2db):
822+
with pytest.raises(ValueError):
830823
df.values > b_c
831824

832825
# ==
833826
expected = DataFrame([[False, False], [True, False], [False, False]])
834827
result = df == b
835828
assert_frame_equal(result, expected)
836829

837-
with tm.assert_raises_regex(ValueError, msg1d):
838-
result = df == l
830+
result = df == l
831+
assert_frame_equal(result, expected)
839832

840-
with tm.assert_raises_regex(ValueError, msg1d):
841-
result = df == tup
833+
result = df == tup
834+
assert_frame_equal(result, expected)
842835

843-
# broadcasts like ndarray (GH#23000)
844836
result = df == b_r
845837
assert_frame_equal(result, expected)
846838

847839
result = df.values == b_r
848840
assert_numpy_array_equal(result, expected.values)
849841

850-
with tm.assert_raises_regex(ValueError, msg2d):
842+
with pytest.raises(ValueError):
851843
df == b_c
852844

853845
assert df.values.shape != b_c.shape
@@ -858,11 +850,11 @@ def test_boolean_comparison(self):
858850
expected.index = df.index
859851
expected.columns = df.columns
860852

861-
with tm.assert_raises_regex(ValueError, msg1d):
862-
result = df == l
853+
result = df == l
854+
assert_frame_equal(result, expected)
863855

864-
with tm.assert_raises_regex(ValueError, msg1d):
865-
result = df == tup
856+
result = df == tup
857+
assert_frame_equal(result, expected)
866858

867859
def test_combine_generic(self):
868860
df1 = self.frame

0 commit comments

Comments
 (0)