From cb56aa792c60cab614868e7147ac132043d1c207 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 5 Aug 2019 19:20:24 -0700 Subject: [PATCH 1/5] Make CategoricalIndex comparison defer to Categorical comparison --- pandas/core/arrays/categorical.py | 3 +++ pandas/core/indexes/base.py | 8 ++++++-- pandas/core/indexes/category.py | 24 ++---------------------- pandas/tests/indexes/test_category.py | 5 +++++ 4 files changed, 16 insertions(+), 24 deletions(-) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index d22b4bd4d3f2b..984f1835bd078 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -89,6 +89,9 @@ def f(self, other): return NotImplemented other = lib.item_from_zerodim(other) + if is_list_like(other) and len(other) != len(self): + # TODO: Could this fail if the categories are listlike objects? + raise ValueError("Lengths must match.") if not self.ordered: if op in ["__lt__", "__gt__", "__le__", "__ge__"]: diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 356ae20b2240a..bd998656914c6 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -48,6 +48,7 @@ ) from pandas.core.dtypes.concat import concat_compat from pandas.core.dtypes.generic import ( + ABCCategorical, ABCDataFrame, ABCDateOffset, ABCDatetimeArray, @@ -99,11 +100,14 @@ def _make_comparison_op(op, cls): def cmp_method(self, other): - if isinstance(other, (np.ndarray, Index, ABCSeries)): + if isinstance(other, (np.ndarray, Index, ABCSeries, ExtensionArray)): if other.ndim > 0 and len(self) != len(other): raise ValueError("Lengths must match to compare") - if is_object_dtype(self) and not isinstance(self, ABCMultiIndex): + if is_object_dtype(self) and isinstance(other, ABCCategorical): + left = type(other)(self._values, dtype=other.dtype) + return op(left, other) + elif is_object_dtype(self) and not isinstance(self, ABCMultiIndex): # don't pass MultiIndex with np.errstate(all="ignore"): result = ops._comp_method_OBJECT_ARRAY(op, self.values, other) diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index 0f6aa711adc90..b1b4966b4d3b2 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -899,31 +899,11 @@ def _make_compare(op): opname = "__{op}__".format(op=op.__name__) def _evaluate_compare(self, other): - - # if we have a Categorical type, then must have the same - # categories - if isinstance(other, CategoricalIndex): - other = other._values - elif isinstance(other, Index): - other = self._create_categorical(other._values, dtype=self.dtype) - - if isinstance(other, (ABCCategorical, np.ndarray, ABCSeries)): - if len(self.values) != len(other): - raise ValueError("Lengths must match to compare") - - if isinstance(other, ABCCategorical): - if not self.values.is_dtype_equal(other): - raise TypeError( - "categorical index comparisons must " - "have the same categories and ordered " - "attributes" - ) - - result = op(self.values, other) + result = op(self.array, other) if isinstance(result, ABCSeries): # Dispatch to pd.Categorical returned NotImplemented # and we got a Series back; down-cast to ndarray - result = result.values + result = result._values return result return compat.set_function_name(_evaluate_compare, opname, cls) diff --git a/pandas/tests/indexes/test_category.py b/pandas/tests/indexes/test_category.py index 280b0a99c7e68..67bf9bd20e716 100644 --- a/pandas/tests/indexes/test_category.py +++ b/pandas/tests/indexes/test_category.py @@ -823,6 +823,11 @@ def test_equals_categorical(self): msg = ( "categorical index comparisons must have the same categories" " and ordered attributes" + "|" + "Categoricals can only be compared if 'categories' are the same. " + "Categories are different lengths" + "|" + "Categoricals can only be compared if 'ordered' is the same" ) with pytest.raises(TypeError, match=msg): ci1 == ci2 From c0fd1c79db615188239a50f1f2e4a8d2a20cbff4 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 5 Aug 2019 19:51:11 -0700 Subject: [PATCH 2/5] suppress elementwise comparison warning --- pandas/core/indexes/category.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index b1b4966b4d3b2..8bfa7e8d20b4f 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -899,7 +899,8 @@ def _make_compare(op): opname = "__{op}__".format(op=op.__name__) def _evaluate_compare(self, other): - result = op(self.array, other) + with np.errstate(all="ignore"): + result = op(self.array, other) if isinstance(result, ABCSeries): # Dispatch to pd.Categorical returned NotImplemented # and we got a Series back; down-cast to ndarray From 8a43bd9de00ee93df2e1ce71c221a7ddd2981057 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 5 Aug 2019 20:32:53 -0700 Subject: [PATCH 3/5] raise earlier --- pandas/core/ops/__init__.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pandas/core/ops/__init__.py b/pandas/core/ops/__init__.py index 4ab1941e3493f..b84a7a5815777 100644 --- a/pandas/core/ops/__init__.py +++ b/pandas/core/ops/__init__.py @@ -1038,7 +1038,10 @@ def wrapper(self, other, axis=None): # Defer to DataFrame implementation; fail early return NotImplemented - elif isinstance(other, ABCSeries) and not self._indexed_same(other): + if is_list_like(other) and len(other) != len(self): + raise ValueError("Lengths must match") + + if isinstance(other, ABCSeries) and not self._indexed_same(other): raise ValueError("Can only compare identically-labeled Series objects") elif is_categorical_dtype(self): From a987d8dd958372e3a3a5833be1835d560aef9df4 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 6 Aug 2019 07:09:43 -0700 Subject: [PATCH 4/5] raise later to correct error message --- pandas/core/ops/__init__.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pandas/core/ops/__init__.py b/pandas/core/ops/__init__.py index b84a7a5815777..3f9ccbc58b8a0 100644 --- a/pandas/core/ops/__init__.py +++ b/pandas/core/ops/__init__.py @@ -1038,11 +1038,10 @@ def wrapper(self, other, axis=None): # Defer to DataFrame implementation; fail early return NotImplemented - if is_list_like(other) and len(other) != len(self): - raise ValueError("Lengths must match") - if isinstance(other, ABCSeries) and not self._indexed_same(other): raise ValueError("Can only compare identically-labeled Series objects") + elif is_list_like(other) and len(other) != len(self): + raise ValueError("Lengths must match") elif is_categorical_dtype(self): # Dispatch to Categorical implementation; CategoricalIndex From e695f3655d33719616f03bebf1869d9700e24ac9 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 6 Aug 2019 10:25:14 -0700 Subject: [PATCH 5/5] kludge --- pandas/core/ops/__init__.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pandas/core/ops/__init__.py b/pandas/core/ops/__init__.py index 3f9ccbc58b8a0..c78d5c79453ab 100644 --- a/pandas/core/ops/__init__.py +++ b/pandas/core/ops/__init__.py @@ -1040,7 +1040,11 @@ def wrapper(self, other, axis=None): if isinstance(other, ABCSeries) and not self._indexed_same(other): raise ValueError("Can only compare identically-labeled Series objects") - elif is_list_like(other) and len(other) != len(self): + elif ( + is_list_like(other) + and len(other) != len(self) + and not isinstance(other, (set, frozenset)) + ): raise ValueError("Lengths must match") elif is_categorical_dtype(self):