Skip to content

Doc for GH 8946 #8952

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 25 additions & 12 deletions doc/source/categorical.rst
Original file line number Diff line number Diff line change
Expand Up @@ -328,13 +328,23 @@ old categories must be included in the new categories and no new categories are
Comparisons
-----------

Comparing `Categoricals` with other objects is possible in two cases:
Comparing categorical data with other objects is possible in three cases:

* comparing a categorical Series to another categorical Series, when `categories` and `ordered` is
the same or
* comparing a categorical Series to a scalar.
* comparing equality (``==`` and ``!=``) to a list-like object (list, Series, array,
...) of the same length as the categorical data or
* all comparisons (``==``, ``!=``, ``>``, ``>=``, ``<``, and ``<=``) of categorical data to
another categorical Series, when ``ordered==True`` and the `categories` are the same or
* all comparisons of a categorical data to a scalar.

All other comparisons will raise a TypeError.
All other comparisons, especially "non-equality" comparisons of two categoricals with different
categories or a categorical with any list-like object, will raise a TypeError.

.. note::

Any "non-equality" comparisons of categorical data with a `Series`, `np.array`, `list` or
categorical data with different categories or ordering will raise an `TypeError` because custom
categories ordering could be interpreted in two ways: one with taking in account the
ordering and one without.

.. ipython:: python

Expand All @@ -353,6 +363,13 @@ Comparing to a categorical with the same categories and ordering or to a scalar
cat > cat_base
cat > 2

Equality comparisons work with any list-like object of same length and scalars:

.. ipython:: python

cat == cat_base2
cat == 2

This doesn't work because the categories are not the same:

.. ipython:: python
Expand All @@ -362,13 +379,9 @@ This doesn't work because the categories are not the same:
except TypeError as e:
print("TypeError: " + str(e))

.. note::

Comparisons with `Series`, `np.array` or a `Categorical` with different categories or ordering
will raise an `TypeError` because custom categories ordering could be interpreted in two ways:
one with taking in account the ordering and one without. If you want to compare a categorical
series with such a type, you need to be explicit and convert the categorical data back to the
original values:
If you want to do a "non-equality" comparison of a categorical series with a list-like object
which is not categorical data, you need to be explicit and convert the categorical data back to
the original values:

.. ipython:: python

Expand Down
2 changes: 2 additions & 0 deletions doc/source/whatsnew/v0.15.2.txt
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,8 @@ API changes
p = pd.Panel(np.random.rand(2, 5, 4) > 0.1)
p.all()

- Allow equality comparisons of Series with a categorical dtype and object dtype; previously these would raise ``TypeError`` (:issue:`8938`)

.. _whatsnew_0152.enhancements:

Enhancements
Expand Down
6 changes: 6 additions & 0 deletions pandas/core/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,12 @@ def f(self, other):
else:
return np.repeat(False, len(self))
else:

# allow categorical vs object dtype array comparisons for equality
# these are only positional comparisons
if op in ['__eq__','__ne__']:
return getattr(np.array(self),op)(np.array(other))

msg = "Cannot compare a Categorical for op {op} with type {typ}. If you want to \n" \
"compare values, use 'np.asarray(cat) <op> other'."
raise TypeError(msg.format(op=op,typ=type(other)))
Expand Down
49 changes: 26 additions & 23 deletions pandas/core/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -541,10 +541,13 @@ def _comp_method_SERIES(op, name, str_rep, masker=False):
"""
def na_op(x, y):

if com.is_categorical_dtype(x) != (not np.isscalar(y) and com.is_categorical_dtype(y)):
msg = "Cannot compare a Categorical for op {op} with type {typ}. If you want to \n" \
"compare values, use 'series <op> np.asarray(cat)'."
raise TypeError(msg.format(op=op,typ=type(y)))
# dispatch to the categorical if we have a categorical
# in either operand
if com.is_categorical_dtype(x):
return op(x,y)
elif com.is_categorical_dtype(y) and not lib.isscalar(y):
return op(y,x)

if x.dtype == np.object_:
if isinstance(y, list):
y = lib.list_to_object_array(y)
Expand Down Expand Up @@ -586,33 +589,33 @@ def wrapper(self, other):
msg = "Cannot compare a Categorical for op {op} with Series of dtype {typ}.\n"\
"If you want to compare values, use 'series <op> np.asarray(other)'."
raise TypeError(msg.format(op=op,typ=self.dtype))
else:

mask = isnull(self)

values = self.get_values()
other = _index.convert_scalar(values,_values_from_object(other))
mask = isnull(self)

if issubclass(values.dtype.type, (np.datetime64, np.timedelta64)):
values = values.view('i8')
values = self.get_values()
other = _index.convert_scalar(values,_values_from_object(other))

# scalars
res = na_op(values, other)
if np.isscalar(res):
raise TypeError('Could not compare %s type with Series'
% type(other))
if issubclass(values.dtype.type, (np.datetime64, np.timedelta64)):
values = values.view('i8')

# always return a full value series here
res = _values_from_object(res)
# scalars
res = na_op(values, other)
if np.isscalar(res):
raise TypeError('Could not compare %s type with Series'
% type(other))

res = pd.Series(res, index=self.index, name=self.name,
dtype='bool')
# always return a full value series here
res = _values_from_object(res)

# mask out the invalids
if mask.any():
res[mask] = masker
res = pd.Series(res, index=self.index, name=self.name,
dtype='bool')

# mask out the invalids
if mask.any():
res[mask] = masker

return res
return res
return wrapper


Expand Down
54 changes: 53 additions & 1 deletion pandas/tests/test_categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -2211,11 +2211,63 @@ def f():
tm.assert_series_equal(res, exp)

# And test NaN handling...
cat = pd.Series(pd.Categorical(["a","b","c", np.nan]))
cat = Series(Categorical(["a","b","c", np.nan]))
exp = Series([True, True, True, False])
res = (cat == cat)
tm.assert_series_equal(res, exp)

def test_cat_equality(self):

# GH 8938
# allow equality comparisons
a = Series(list('abc'),dtype="category")
b = Series(list('abc'),dtype="object")
c = Series(['a','b','cc'],dtype="object")
d = Series(list('acb'),dtype="object")
e = Categorical(list('abc'))
f = Categorical(list('acb'))

# vs scalar
self.assertFalse((a=='a').all())
self.assertTrue(((a!='a') == ~(a=='a')).all())

self.assertFalse(('a'==a).all())
self.assertTrue((a=='a')[0])
self.assertTrue(('a'==a)[0])
self.assertFalse(('a'!=a)[0])

# vs list-like
self.assertTrue((a==a).all())
self.assertFalse((a!=a).all())

self.assertTrue((a==list(a)).all())
self.assertTrue((a==b).all())
self.assertTrue((b==a).all())
self.assertTrue(((~(a==b))==(a!=b)).all())
self.assertTrue(((~(b==a))==(b!=a)).all())

self.assertFalse((a==c).all())
self.assertFalse((c==a).all())
self.assertFalse((a==d).all())
self.assertFalse((d==a).all())

# vs a cat-like
self.assertTrue((a==e).all())
self.assertTrue((e==a).all())
self.assertFalse((a==f).all())
self.assertFalse((f==a).all())

self.assertTrue(((~(a==e)==(a!=e)).all()))
self.assertTrue(((~(e==a)==(e!=a)).all()))
self.assertTrue(((~(a==f)==(a!=f)).all()))
self.assertTrue(((~(f==a)==(f!=a)).all()))

# non-equality is not comparable
self.assertRaises(TypeError, lambda: a < b)
self.assertRaises(TypeError, lambda: b < a)
self.assertRaises(TypeError, lambda: a > b)
self.assertRaises(TypeError, lambda: b > a)

def test_concat(self):
cat = pd.Categorical(["a","b"], categories=["a","b"])
vals = [1,2]
Expand Down