From 3cdbc2f2a7c8b31dd8cd0df1271044e30d6100ab Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 23 Sep 2019 18:25:26 -0700 Subject: [PATCH 1/4] Remove mask_cmp_op fallback behavior --- pandas/core/ops/__init__.py | 48 +++------------------------ pandas/tests/frame/test_arithmetic.py | 28 +++++++++++++--- 2 files changed, 28 insertions(+), 48 deletions(-) diff --git a/pandas/core/ops/__init__.py b/pandas/core/ops/__init__.py index 0c1e1e90c003b..bf3f07af4972d 100644 --- a/pandas/core/ops/__init__.py +++ b/pandas/core/ops/__init__.py @@ -354,38 +354,6 @@ def fill_binop(left, right, fill_value): return left, right -def mask_cmp_op(x, y, op): - """ - Apply the function `op` to only non-null points in x and y. - - Parameters - ---------- - x : array-like - y : array-like - op : binary operation - - Returns - ------- - result : ndarray[bool] - """ - xrav = x.ravel() - result = np.empty(x.size, dtype=bool) - if isinstance(y, (np.ndarray, ABCSeries)): - yrav = y.ravel() - mask = notna(xrav) & notna(yrav) - result[mask] = op(np.array(list(xrav[mask])), np.array(list(yrav[mask]))) - else: - mask = notna(xrav) - result[mask] = op(np.array(list(xrav[mask])), y) - - if op == operator.ne: # pragma: no cover - np.putmask(result, ~mask, True) - else: - np.putmask(result, ~mask, False) - result = result.reshape(x.shape) - return result - - # ----------------------------------------------------------------------------- # Dispatch logic @@ -904,14 +872,6 @@ def _flex_comp_method_FRAME(cls, op, special): op_name = _get_op_name(op, special) default_axis = _get_frame_op_default_axis(op_name) - def na_op(x, y): - try: - with np.errstate(invalid="ignore"): - result = op(x, y) - except TypeError: - result = mask_cmp_op(x, y, op) - return result - doc = _flex_comp_doc_FRAME.format( op_name=op_name, desc=_op_descriptions[op_name]["desc"] ) @@ -925,16 +885,16 @@ def f(self, other, axis=default_axis, level=None): # Another DataFrame if not self._indexed_same(other): self, other = self.align(other, "outer", level=level, copy=False) - new_data = dispatch_to_series(self, other, na_op, str_rep) - return self._construct_result(other, new_data, na_op) + new_data = dispatch_to_series(self, other, op, str_rep) + return self._construct_result(other, new_data, op) elif isinstance(other, ABCSeries): return _combine_series_frame( - self, other, na_op, fill_value=None, axis=axis, level=level + self, other, op, fill_value=None, axis=axis, level=level ) else: # in this case we always have `np.ndim(other) == 0` - return self._combine_const(other, na_op) + return self._combine_const(other, op) f.__name__ = op_name diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index fc3640503e385..fdaa389edf7e3 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -235,21 +235,41 @@ def _test_seq(df, idx_ser, col_ser): rs = df.le(df) assert not rs.loc[0, 0] + def test_bool_flex_frame_complex_dtype(self): # complex arr = np.array([np.nan, 1, 6, np.nan]) arr2 = np.array([2j, np.nan, 7, None]) df = pd.DataFrame({"a": arr}) df2 = pd.DataFrame({"a": arr2}) - rs = df.gt(df2) - assert not rs.values.any() + + msg = "'>' not supported between instances of '.*' and 'complex'" + with pytest.raises(TypeError, match=msg): + # inequalities are not well-defined for complex numbers + df.gt(df2) + with pytest.raises(TypeError, match=msg): + # regression test that we get the same behavior for Series + df["a"].gt(df2["a"]) + with pytest.raises(TypeError, match=msg): + # Check that we match numpy behavior here + df.values > df2.values + rs = df.ne(df2) assert rs.values.all() arr3 = np.array([2j, np.nan, None]) df3 = pd.DataFrame({"a": arr3}) - rs = df3.gt(2j) - assert not rs.values.any() + with pytest.raises(TypeError, match=msg): + # inequalities are not well-defined for complex numbers + df3.gt(2j) + with pytest.raises(TypeError, match=msg): + # regression test that we get the same behavior for Series + df3["a"].gt(2j) + with pytest.raises(TypeError, match=msg): + # Check that we match numpy behavior here + df3.values > 2j + + def test_bool_flex_frame_object_dtype(self): # corner, dtype=object df1 = pd.DataFrame({"col": ["foo", np.nan, "bar"]}) df2 = pd.DataFrame({"col": ["foo", datetime.now(), "bar"]}) From 35bca22673aff7246c7698d50a19a4ece9ba651a Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 24 Sep 2019 15:46:55 -0700 Subject: [PATCH 2/4] py35 compat msg --- pandas/tests/frame/test_arithmetic.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index fdaa389edf7e3..3b46e834933b3 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -242,7 +242,12 @@ def test_bool_flex_frame_complex_dtype(self): df = pd.DataFrame({"a": arr}) df2 = pd.DataFrame({"a": arr2}) - msg = "'>' not supported between instances of '.*' and 'complex'" + msg = "|".join( + [ + "'>' not supported between instances of '.*' and 'complex'", + r"unorderable types: .*complex\(\)", # PY35 + ] + ) with pytest.raises(TypeError, match=msg): # inequalities are not well-defined for complex numbers df.gt(df2) From 9496fe19968203c8d55d1c8a8c99af82bcde46cb Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 24 Sep 2019 15:48:05 -0700 Subject: [PATCH 3/4] fixups --- pandas/core/ops/__init__.py | 2 +- pandas/tests/groupby/test_categorical.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/ops/__init__.py b/pandas/core/ops/__init__.py index d64d7acaa77f8..eb901630b753a 100644 --- a/pandas/core/ops/__init__.py +++ b/pandas/core/ops/__init__.py @@ -28,7 +28,7 @@ ABCIndexClass, ABCSeries, ) -from pandas.core.dtypes.missing import isna, notna +from pandas.core.dtypes.missing import isna from pandas._typing import ArrayLike from pandas.core.construction import array, extract_array diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py index e09af3fd48ee6..fcc0aa3b1c015 100644 --- a/pandas/tests/groupby/test_categorical.py +++ b/pandas/tests/groupby/test_categorical.py @@ -782,7 +782,7 @@ def test_categorical_no_compress(): def test_sort(): - # http://stackoverflow.com/questions/23814368/sorting-pandas-categorical-labels-after-groupby # noqa: flake8 + # http://stackoverflow.com/questions/23814368/sorting-pandas-categorical-labels-after-groupby # noqa: E501 # This should result in a properly sorted Series so that the plot # has a sorted x axis # self.cat.groupby(['value_group'])['value_group'].count().plot(kind='bar') From 48978cdc5bc1d370eaff9b30411509b1333a8441 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 25 Sep 2019 11:32:46 -0700 Subject: [PATCH 4/4] whatsnew --- doc/source/whatsnew/v1.0.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index a3d75d69e1e82..a78bc07ac2715 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -199,7 +199,7 @@ Timezones Numeric ^^^^^^^ - Bug in :meth:`DataFrame.quantile` with zero-column :class:`DataFrame` incorrectly raising (:issue:`23925`) -- +- :class:`DataFrame` inequality comparisons with object-dtype and ``complex`` entries failing to raise ``TypeError`` like their :class:`Series` counterparts (:issue:`28079`) - Conversion