Skip to content

Commit 94d73e4

Browse files
jbrockmendelproost
authored andcommitted
BUG: Fix DataFrame logical ops Series inconsistency (pandas-dev#28741)
1 parent ea392f0 commit 94d73e4

File tree

6 files changed

+49
-25
lines changed

6 files changed

+49
-25
lines changed

doc/source/whatsnew/v1.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -211,6 +211,7 @@ Numeric
211211
^^^^^^^
212212
- Bug in :meth:`DataFrame.quantile` with zero-column :class:`DataFrame` incorrectly raising (:issue:`23925`)
213213
- :class:`DataFrame` inequality comparisons with object-dtype and ``complex`` entries failing to raise ``TypeError`` like their :class:`Series` counterparts (:issue:`28079`)
214+
- Bug in :class:`DataFrame` logical operations (`&`, `|`, `^`) not matching :class:`Series` behavior by filling NA values (:issue:`28741`)
214215
-
215216

216217
Conversion

pandas/core/frame.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -5286,7 +5286,7 @@ def _arith_op(left, right):
52865286
def _combine_match_index(self, other, func):
52875287
# at this point we have `self.index.equals(other.index)`
52885288

5289-
if self._is_mixed_type or other._is_mixed_type:
5289+
if ops.should_series_dispatch(self, other, func):
52905290
# operate column-wise; avoid costly object-casting in `.values`
52915291
new_data = ops.dispatch_to_series(self, other, func)
52925292
else:

pandas/core/ops/__init__.py

+3
Original file line numberDiff line numberDiff line change
@@ -692,6 +692,7 @@ def _arith_method_FRAME(cls, op, special):
692692
default_axis = _get_frame_op_default_axis(op_name)
693693

694694
na_op = define_na_arithmetic_op(op, str_rep, eval_kwargs)
695+
is_logical = str_rep in ["&", "|", "^"]
695696

696697
if op_name in _op_descriptions:
697698
# i.e. include "add" but not "__add__"
@@ -707,11 +708,13 @@ def f(self, other, axis=default_axis, level=None, fill_value=None):
707708
if isinstance(other, ABCDataFrame):
708709
# Another DataFrame
709710
pass_op = op if should_series_dispatch(self, other, op) else na_op
711+
pass_op = pass_op if not is_logical else op
710712
return self._combine_frame(other, pass_op, fill_value, level)
711713
elif isinstance(other, ABCSeries):
712714
# For these values of `axis`, we end up dispatching to Series op,
713715
# so do not want the masked op.
714716
pass_op = op if axis in [0, "columns", None] else na_op
717+
pass_op = pass_op if not is_logical else op
715718
return _combine_series_frame(
716719
self, other, pass_op, fill_value=fill_value, axis=axis, level=level
717720
)

pandas/core/ops/dispatch.py

+11-1
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ def should_series_dispatch(left, right, op):
5656
Parameters
5757
----------
5858
left : DataFrame
59-
right : DataFrame
59+
right : DataFrame or Series
6060
op : binary operator
6161
6262
Returns
@@ -66,6 +66,16 @@ def should_series_dispatch(left, right, op):
6666
if left._is_mixed_type or right._is_mixed_type:
6767
return True
6868

69+
if op.__name__.strip("_") in ["and", "or", "xor", "rand", "ror", "rxor"]:
70+
# TODO: GH references for what this fixes
71+
# Note: this check must come before the check for nonempty columns.
72+
return True
73+
74+
if right.ndim == 1:
75+
# operating with Series, short-circuit checks that would fail
76+
# with AttributeError.
77+
return False
78+
6979
if not len(left.columns) or not len(right.columns):
7080
# ensure obj.dtypes[0] exists for each obj
7181
return False

pandas/tests/frame/test_operators.py

+7-2
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,8 @@ def test_logical_ops_empty_frame(self):
127127
dfa = DataFrame(index=[1], columns=["A"])
128128

129129
result = dfa & dfa
130-
assert_frame_equal(result, dfa)
130+
expected = DataFrame(False, index=[1], columns=["A"])
131+
assert_frame_equal(result, expected)
131132

132133
def test_logical_ops_bool_frame(self):
133134
# GH#5808
@@ -145,7 +146,11 @@ def test_logical_ops_int_frame(self):
145146
df1a_bool = DataFrame(True, index=[1], columns=["A"])
146147

147148
result = df1a_int | df1a_bool
148-
assert_frame_equal(result, df1a_int)
149+
assert_frame_equal(result, df1a_bool)
150+
151+
# Check that this matches Series behavior
152+
res_ser = df1a_int["A"] | df1a_bool["A"]
153+
assert_series_equal(res_ser, df1a_bool["A"])
149154

150155
def test_logical_ops_invalid(self):
151156
# GH#5808

pandas/tests/series/test_operators.py

+26-21
Original file line numberDiff line numberDiff line change
@@ -250,12 +250,20 @@ def test_scalar_na_logical_ops_corners(self):
250250

251251
with pytest.raises(TypeError):
252252
d.__and__(s, axis="columns")
253+
with pytest.raises(TypeError):
254+
d.__and__(s, axis=1)
253255

254256
with pytest.raises(TypeError):
255257
s & d
258+
with pytest.raises(TypeError):
259+
d & s
260+
261+
expected = (s & s).to_frame("A")
262+
result = d.__and__(s, axis="index")
263+
tm.assert_frame_equal(result, expected)
256264

257-
# this is wrong as its not a boolean result
258-
# result = d.__and__(s,axis='index')
265+
result = d.__and__(s, axis=0)
266+
tm.assert_frame_equal(result, expected)
259267

260268
@pytest.mark.parametrize("op", [operator.and_, operator.or_, operator.xor])
261269
def test_logical_ops_with_index(self, op):
@@ -436,20 +444,19 @@ def test_logical_ops_df_compat(self):
436444
assert_series_equal(s2 & s1, exp)
437445

438446
# True | np.nan => True
439-
exp = pd.Series([True, True, True, False], index=list("ABCD"), name="x")
440-
assert_series_equal(s1 | s2, exp)
447+
exp_or1 = pd.Series([True, True, True, False], index=list("ABCD"), name="x")
448+
assert_series_equal(s1 | s2, exp_or1)
441449
# np.nan | True => np.nan, filled with False
442-
exp = pd.Series([True, True, False, False], index=list("ABCD"), name="x")
443-
assert_series_equal(s2 | s1, exp)
450+
exp_or = pd.Series([True, True, False, False], index=list("ABCD"), name="x")
451+
assert_series_equal(s2 | s1, exp_or)
444452

445453
# DataFrame doesn't fill nan with False
446-
exp = pd.DataFrame({"x": [True, False, np.nan, np.nan]}, index=list("ABCD"))
447-
assert_frame_equal(s1.to_frame() & s2.to_frame(), exp)
448-
assert_frame_equal(s2.to_frame() & s1.to_frame(), exp)
454+
assert_frame_equal(s1.to_frame() & s2.to_frame(), exp.to_frame())
455+
assert_frame_equal(s2.to_frame() & s1.to_frame(), exp.to_frame())
449456

450457
exp = pd.DataFrame({"x": [True, True, np.nan, np.nan]}, index=list("ABCD"))
451-
assert_frame_equal(s1.to_frame() | s2.to_frame(), exp)
452-
assert_frame_equal(s2.to_frame() | s1.to_frame(), exp)
458+
assert_frame_equal(s1.to_frame() | s2.to_frame(), exp_or1.to_frame())
459+
assert_frame_equal(s2.to_frame() | s1.to_frame(), exp_or.to_frame())
453460

454461
# different length
455462
s3 = pd.Series([True, False, True], index=list("ABC"), name="x")
@@ -460,19 +467,17 @@ def test_logical_ops_df_compat(self):
460467
assert_series_equal(s4 & s3, exp)
461468

462469
# np.nan | True => np.nan, filled with False
463-
exp = pd.Series([True, True, True, False], index=list("ABCD"), name="x")
464-
assert_series_equal(s3 | s4, exp)
470+
exp_or1 = pd.Series([True, True, True, False], index=list("ABCD"), name="x")
471+
assert_series_equal(s3 | s4, exp_or1)
465472
# True | np.nan => True
466-
exp = pd.Series([True, True, True, True], index=list("ABCD"), name="x")
467-
assert_series_equal(s4 | s3, exp)
473+
exp_or = pd.Series([True, True, True, True], index=list("ABCD"), name="x")
474+
assert_series_equal(s4 | s3, exp_or)
468475

469-
exp = pd.DataFrame({"x": [True, False, True, np.nan]}, index=list("ABCD"))
470-
assert_frame_equal(s3.to_frame() & s4.to_frame(), exp)
471-
assert_frame_equal(s4.to_frame() & s3.to_frame(), exp)
476+
assert_frame_equal(s3.to_frame() & s4.to_frame(), exp.to_frame())
477+
assert_frame_equal(s4.to_frame() & s3.to_frame(), exp.to_frame())
472478

473-
exp = pd.DataFrame({"x": [True, True, True, np.nan]}, index=list("ABCD"))
474-
assert_frame_equal(s3.to_frame() | s4.to_frame(), exp)
475-
assert_frame_equal(s4.to_frame() | s3.to_frame(), exp)
479+
assert_frame_equal(s3.to_frame() | s4.to_frame(), exp_or1.to_frame())
480+
assert_frame_equal(s4.to_frame() | s3.to_frame(), exp_or.to_frame())
476481

477482

478483
class TestSeriesComparisons:

0 commit comments

Comments
 (0)