diff --git a/doc/source/categorical.rst b/doc/source/categorical.rst index a7091d6ab38fb..2a4c78ad837d1 100644 --- a/doc/source/categorical.rst +++ b/doc/source/categorical.rst @@ -328,13 +328,23 @@ old categories must be included in the new categories and no new categories are Comparisons ----------- -Comparing `Categoricals` with other objects is possible in two cases: +Comparing categorical data with other objects is possible in three cases: - * comparing a categorical Series to another categorical Series, when `categories` and `ordered` is - the same or - * comparing a categorical Series to a scalar. + * comparing equality (``==`` and ``!=``) to a list-like object (list, Series, array, + ...) of the same length as the categorical data or + * all comparisons (``==``, ``!=``, ``>``, ``>=``, ``<``, and ``<=``) of categorical data to + another categorical Series, when ``ordered==True`` and the `categories` are the same or + * all comparisons of a categorical data to a scalar. -All other comparisons will raise a TypeError. +All other comparisons, especially "non-equality" comparisons of two categoricals with different +categories or a categorical with any list-like object, will raise a TypeError. + +.. note:: + + Any "non-equality" comparisons of categorical data with a `Series`, `np.array`, `list` or + categorical data with different categories or ordering will raise an `TypeError` because custom + categories ordering could be interpreted in two ways: one with taking in account the + ordering and one without. .. ipython:: python @@ -353,6 +363,13 @@ Comparing to a categorical with the same categories and ordering or to a scalar cat > cat_base cat > 2 +Equality comparisons work with any list-like object of same length and scalars: + +.. ipython:: python + + cat == cat_base2 + cat == 2 + This doesn't work because the categories are not the same: .. ipython:: python @@ -362,13 +379,9 @@ This doesn't work because the categories are not the same: except TypeError as e: print("TypeError: " + str(e)) -.. note:: - - Comparisons with `Series`, `np.array` or a `Categorical` with different categories or ordering - will raise an `TypeError` because custom categories ordering could be interpreted in two ways: - one with taking in account the ordering and one without. If you want to compare a categorical - series with such a type, you need to be explicit and convert the categorical data back to the - original values: +If you want to do a "non-equality" comparison of a categorical series with a list-like object +which is not categorical data, you need to be explicit and convert the categorical data back to +the original values: .. ipython:: python diff --git a/doc/source/whatsnew/v0.15.2.txt b/doc/source/whatsnew/v0.15.2.txt index 11cf2450d2f28..e61ae93ca49c0 100644 --- a/doc/source/whatsnew/v0.15.2.txt +++ b/doc/source/whatsnew/v0.15.2.txt @@ -59,6 +59,8 @@ API changes p = pd.Panel(np.random.rand(2, 5, 4) > 0.1) p.all() +- Allow equality comparisons of Series with a categorical dtype and object dtype; previously these would raise ``TypeError`` (:issue:`8938`) + .. _whatsnew_0152.enhancements: Enhancements diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py index 5b3e9e8a22b12..ff1051dc00a00 100644 --- a/pandas/core/categorical.py +++ b/pandas/core/categorical.py @@ -64,6 +64,12 @@ def f(self, other): else: return np.repeat(False, len(self)) else: + + # allow categorical vs object dtype array comparisons for equality + # these are only positional comparisons + if op in ['__eq__','__ne__']: + return getattr(np.array(self),op)(np.array(other)) + msg = "Cannot compare a Categorical for op {op} with type {typ}. If you want to \n" \ "compare values, use 'np.asarray(cat) other'." raise TypeError(msg.format(op=op,typ=type(other))) diff --git a/pandas/core/ops.py b/pandas/core/ops.py index 068cdff7fcf2d..a3154ff9df9a1 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -541,10 +541,13 @@ def _comp_method_SERIES(op, name, str_rep, masker=False): """ def na_op(x, y): - if com.is_categorical_dtype(x) != (not np.isscalar(y) and com.is_categorical_dtype(y)): - msg = "Cannot compare a Categorical for op {op} with type {typ}. If you want to \n" \ - "compare values, use 'series np.asarray(cat)'." - raise TypeError(msg.format(op=op,typ=type(y))) + # dispatch to the categorical if we have a categorical + # in either operand + if com.is_categorical_dtype(x): + return op(x,y) + elif com.is_categorical_dtype(y) and not lib.isscalar(y): + return op(y,x) + if x.dtype == np.object_: if isinstance(y, list): y = lib.list_to_object_array(y) @@ -586,33 +589,33 @@ def wrapper(self, other): msg = "Cannot compare a Categorical for op {op} with Series of dtype {typ}.\n"\ "If you want to compare values, use 'series np.asarray(other)'." raise TypeError(msg.format(op=op,typ=self.dtype)) - else: - mask = isnull(self) - values = self.get_values() - other = _index.convert_scalar(values,_values_from_object(other)) + mask = isnull(self) - if issubclass(values.dtype.type, (np.datetime64, np.timedelta64)): - values = values.view('i8') + values = self.get_values() + other = _index.convert_scalar(values,_values_from_object(other)) - # scalars - res = na_op(values, other) - if np.isscalar(res): - raise TypeError('Could not compare %s type with Series' - % type(other)) + if issubclass(values.dtype.type, (np.datetime64, np.timedelta64)): + values = values.view('i8') - # always return a full value series here - res = _values_from_object(res) + # scalars + res = na_op(values, other) + if np.isscalar(res): + raise TypeError('Could not compare %s type with Series' + % type(other)) - res = pd.Series(res, index=self.index, name=self.name, - dtype='bool') + # always return a full value series here + res = _values_from_object(res) - # mask out the invalids - if mask.any(): - res[mask] = masker + res = pd.Series(res, index=self.index, name=self.name, + dtype='bool') + + # mask out the invalids + if mask.any(): + res[mask] = masker - return res + return res return wrapper diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py index 196ad8b7680b9..4c202a525863d 100644 --- a/pandas/tests/test_categorical.py +++ b/pandas/tests/test_categorical.py @@ -2211,11 +2211,63 @@ def f(): tm.assert_series_equal(res, exp) # And test NaN handling... - cat = pd.Series(pd.Categorical(["a","b","c", np.nan])) + cat = Series(Categorical(["a","b","c", np.nan])) exp = Series([True, True, True, False]) res = (cat == cat) tm.assert_series_equal(res, exp) + def test_cat_equality(self): + + # GH 8938 + # allow equality comparisons + a = Series(list('abc'),dtype="category") + b = Series(list('abc'),dtype="object") + c = Series(['a','b','cc'],dtype="object") + d = Series(list('acb'),dtype="object") + e = Categorical(list('abc')) + f = Categorical(list('acb')) + + # vs scalar + self.assertFalse((a=='a').all()) + self.assertTrue(((a!='a') == ~(a=='a')).all()) + + self.assertFalse(('a'==a).all()) + self.assertTrue((a=='a')[0]) + self.assertTrue(('a'==a)[0]) + self.assertFalse(('a'!=a)[0]) + + # vs list-like + self.assertTrue((a==a).all()) + self.assertFalse((a!=a).all()) + + self.assertTrue((a==list(a)).all()) + self.assertTrue((a==b).all()) + self.assertTrue((b==a).all()) + self.assertTrue(((~(a==b))==(a!=b)).all()) + self.assertTrue(((~(b==a))==(b!=a)).all()) + + self.assertFalse((a==c).all()) + self.assertFalse((c==a).all()) + self.assertFalse((a==d).all()) + self.assertFalse((d==a).all()) + + # vs a cat-like + self.assertTrue((a==e).all()) + self.assertTrue((e==a).all()) + self.assertFalse((a==f).all()) + self.assertFalse((f==a).all()) + + self.assertTrue(((~(a==e)==(a!=e)).all())) + self.assertTrue(((~(e==a)==(e!=a)).all())) + self.assertTrue(((~(a==f)==(a!=f)).all())) + self.assertTrue(((~(f==a)==(f!=a)).all())) + + # non-equality is not comparable + self.assertRaises(TypeError, lambda: a < b) + self.assertRaises(TypeError, lambda: b < a) + self.assertRaises(TypeError, lambda: a > b) + self.assertRaises(TypeError, lambda: b > a) + def test_concat(self): cat = pd.Categorical(["a","b"], categories=["a","b"]) vals = [1,2]