Skip to content

OPS: Remove mask_cmp_op fallback behavior #28601

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Sep 26, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v1.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,7 @@ Timezones
Numeric
^^^^^^^
- Bug in :meth:`DataFrame.quantile` with zero-column :class:`DataFrame` incorrectly raising (:issue:`23925`)
-
- :class:`DataFrame` inequality comparisons with object-dtype and ``complex`` entries failing to raise ``TypeError`` like their :class:`Series` counterparts (:issue:`28079`)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

entries -> dtypes (can do in later PR)

-

Conversion
Expand Down
48 changes: 4 additions & 44 deletions pandas/core/ops/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
ABCIndexClass,
ABCSeries,
)
from pandas.core.dtypes.missing import isna, notna
from pandas.core.dtypes.missing import isna

from pandas._typing import ArrayLike
from pandas.core.construction import array, extract_array
Expand Down Expand Up @@ -354,38 +354,6 @@ def fill_binop(left, right, fill_value):
return left, right


def mask_cmp_op(x, y, op):
"""
Apply the function `op` to only non-null points in x and y.

Parameters
----------
x : array-like
y : array-like
op : binary operation

Returns
-------
result : ndarray[bool]
"""
xrav = x.ravel()
result = np.empty(x.size, dtype=bool)
if isinstance(y, (np.ndarray, ABCSeries)):
yrav = y.ravel()
mask = notna(xrav) & notna(yrav)
result[mask] = op(np.array(list(xrav[mask])), np.array(list(yrav[mask])))
else:
mask = notna(xrav)
result[mask] = op(np.array(list(xrav[mask])), y)

if op == operator.ne: # pragma: no cover
np.putmask(result, ~mask, True)
else:
np.putmask(result, ~mask, False)
result = result.reshape(x.shape)
return result


# -----------------------------------------------------------------------------
# Dispatch logic

Expand Down Expand Up @@ -905,14 +873,6 @@ def _flex_comp_method_FRAME(cls, op, special):
op_name = _get_op_name(op, special)
default_axis = _get_frame_op_default_axis(op_name)

def na_op(x, y):
try:
with np.errstate(invalid="ignore"):
result = op(x, y)
except TypeError:
result = mask_cmp_op(x, y, op)
return result

doc = _flex_comp_doc_FRAME.format(
op_name=op_name, desc=_op_descriptions[op_name]["desc"]
)
Expand All @@ -926,16 +886,16 @@ def f(self, other, axis=default_axis, level=None):
# Another DataFrame
if not self._indexed_same(other):
self, other = self.align(other, "outer", level=level, copy=False)
new_data = dispatch_to_series(self, other, na_op, str_rep)
new_data = dispatch_to_series(self, other, op, str_rep)
return self._construct_result(new_data)

elif isinstance(other, ABCSeries):
return _combine_series_frame(
self, other, na_op, fill_value=None, axis=axis, level=level
self, other, op, fill_value=None, axis=axis, level=level
)
else:
# in this case we always have `np.ndim(other) == 0`
new_data = dispatch_to_series(self, other, na_op)
new_data = dispatch_to_series(self, other, op)
return self._construct_result(new_data)

f.__name__ = op_name
Expand Down
33 changes: 29 additions & 4 deletions pandas/tests/frame/test_arithmetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -235,21 +235,46 @@ def _test_seq(df, idx_ser, col_ser):
rs = df.le(df)
assert not rs.loc[0, 0]

def test_bool_flex_frame_complex_dtype(self):
# complex
arr = np.array([np.nan, 1, 6, np.nan])
arr2 = np.array([2j, np.nan, 7, None])
df = pd.DataFrame({"a": arr})
df2 = pd.DataFrame({"a": arr2})
rs = df.gt(df2)
assert not rs.values.any()

msg = "|".join(
[
"'>' not supported between instances of '.*' and 'complex'",
r"unorderable types: .*complex\(\)", # PY35
]
)
with pytest.raises(TypeError, match=msg):
# inequalities are not well-defined for complex numbers
df.gt(df2)
with pytest.raises(TypeError, match=msg):
# regression test that we get the same behavior for Series
df["a"].gt(df2["a"])
with pytest.raises(TypeError, match=msg):
# Check that we match numpy behavior here
df.values > df2.values

rs = df.ne(df2)
assert rs.values.all()

arr3 = np.array([2j, np.nan, None])
df3 = pd.DataFrame({"a": arr3})
rs = df3.gt(2j)
assert not rs.values.any()

with pytest.raises(TypeError, match=msg):
# inequalities are not well-defined for complex numbers
df3.gt(2j)
with pytest.raises(TypeError, match=msg):
# regression test that we get the same behavior for Series
df3["a"].gt(2j)
with pytest.raises(TypeError, match=msg):
# Check that we match numpy behavior here
df3.values > 2j

def test_bool_flex_frame_object_dtype(self):
# corner, dtype=object
df1 = pd.DataFrame({"col": ["foo", np.nan, "bar"]})
df2 = pd.DataFrame({"col": ["foo", datetime.now(), "bar"]})
Expand Down