Skip to content

Commit 880dcb7

Browse files
committed
fixup! DOC: Update docs to use CategoricalDtype
1 parent e8ad6ad commit 880dcb7

File tree

6 files changed

+40
-22
lines changed

6 files changed

+40
-22
lines changed

doc/source/categorical.rst

+17-5
Original file line numberDiff line numberDiff line change
@@ -96,12 +96,17 @@ By passing a :class:`pandas.Categorical` object to a `Series` or assigning it to
9696
df["B"] = raw_cat
9797
df
9898
99-
You can also specify differently ordered categories or make the resulting data
100-
ordered by passing a :class:`CategoricalDtype`:
99+
Anywhere above we passed a keyword ``dtype='category'``, we used the default behavior of
100+
101+
1. categories are inferred from the data
102+
2. categories are unordered.
103+
104+
To control those behaviors, instead of passing ``'category'``, use an instance
105+
of :class:`CategoricalDtype`.
101106

102107
.. ipython:: python
103108
104-
s = pd.Series(["a","b","c","a"])
109+
s = pd.Series(["a", "b", "c", "a"])
105110
cat_type = pd.CategoricalDtype(categories=["b", "c", "d"], ordered=False)
106111
s_cat = s.astype(cat_type)
107112
s_cat
@@ -145,7 +150,7 @@ constructor to save the factorize step during normal constructor mode:
145150
CategoricalDtype
146151
----------------
147152

148-
.. versionadded:: 0.21.0
153+
.. versionchanged:: 0.21.0
149154

150155
A categorical's type is fully described by 1.) its categories (an iterable with
151156
unique values and no missing values), and 2.) its orderedness (a boolean).
@@ -184,12 +189,19 @@ order of the ``categories`` is not considered
184189
# Unequal, since the second CategoricalDtype is ordered
185190
c1 == pd.CategoricalDtype(['a', 'b', 'c'], ordered=True)
186191
187-
Finally, all instances of ``CategoricalDtype`` compare equal to the string ``'category'``
192+
All instances of ``CategoricalDtype`` compare equal to the string ``'category'``
188193

189194
.. ipython:: python
190195
191196
c1 == 'category'
192197
198+
199+
.. warning::
200+
201+
Since ``dtype='category'`` is essentially ``CategoricalDtype(None, False)``,
202+
and since all instances ``CategoricalDtype`` compare equal to ``'`category'``,
203+
all instances of ``CategoricalDtype`` compare equal to a ``CategoricalDtype(None)``
204+
193205
Description
194206
-----------
195207

doc/source/whatsnew/v0.21.0.txt

+2
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,8 @@ string data to a ``Categorical``:
128128
The ``.dtype`` property of a ``Categorical``, ``CategoricalIndex`` or a
129129
``Series`` with categorical type will now return an instance of ``CategoricalDtype``.
130130

131+
See :ref:`CategoricalDtype <categorical.categoricaldtype>` for more.
132+
131133
.. _whatsnew_0210.enhancements.other:
132134

133135
Other Enhancements

pandas/core/dtypes/common.py

+15
Original file line numberDiff line numberDiff line change
@@ -692,6 +692,21 @@ def is_dtype_equal(source, target):
692692
return False
693693

694694

695+
def _is_dtype_union_equal(source, target):
696+
"""
697+
Check whether two arrays have compatible dtypes to do an intersection.
698+
numpy types are checked with ``is_dtype_equal``. Extension types are
699+
checked separately.
700+
"""
701+
source = _get_dtype(source)
702+
target = _get_dtype(target)
703+
if source == 'category' and target == 'category':
704+
# ordered False for both
705+
return source.ordered is target.ordered
706+
else:
707+
return is_dtype_equal(source, target)
708+
709+
695710
def is_any_int_dtype(arr_or_dtype):
696711
"""
697712
DEPRECATED: This function will be removed in a future version.

pandas/core/indexes/base.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
is_integer,
2828
is_float,
2929
is_dtype_equal,
30+
_is_dtype_union_equal,
3031
is_object_dtype,
3132
is_categorical_dtype,
3233
is_interval_dtype,
@@ -2177,7 +2178,11 @@ def union(self, other):
21772178
if len(self) == 0:
21782179
return other._get_consensus_name(self)
21792180

2180-
if not is_dtype_equal(self.dtype, other.dtype):
2181+
# TODO: _is_dtype_union_equal is a hack around lack of
2182+
# 1. buggy Multiset joins
2183+
# 2. CategoricalIndex lacking setops
2184+
# I'd like to fix those before merging CategoricalDtype
2185+
if not _is_dtype_union_equal(self.dtype, other.dtype):
21812186
this = self.astype('O')
21822187
other = other.astype('O')
21832188
return this.union(other)

pandas/core/indexes/category.py

-14
Original file line numberDiff line numberDiff line change
@@ -687,20 +687,6 @@ def _evaluate_compare(self, other):
687687
cls.__le__ = _make_compare('__le__')
688688
cls.__ge__ = _make_compare('__ge__')
689689

690-
def union(self, other):
691-
"""
692-
Set union of a CategoricalIndex with some iterable
693-
"""
694-
from pandas.api.types import union_categoricals
695-
696-
if isinstance(other, CategoricalIndex):
697-
categories = union_categoricals([self, other]).categories
698-
left = self.set_categories(categories)
699-
right = other.set_categories(categories)
700-
else:
701-
left, right = self, other
702-
return super(CategoricalIndex, left).union(right)
703-
704690
def _delegate_method(self, name, *args, **kwargs):
705691
""" method delegation to the ._values """
706692
method = getattr(self._values, name)

pandas/tests/test_categorical.py

-2
Original file line numberDiff line numberDiff line change
@@ -142,8 +142,6 @@ def test_constructor_tuples_datetimes(self):
142142
(Timestamp('2010-01-02'),)], tupleize_cols=False)
143143
tm.assert_index_equal(result.categories, expected)
144144

145-
>>>>>>> REF/ENH/API: Add parametrized CategoricalDtype
146-
147145
def test_constructor_unsortable(self):
148146

149147
# it works!

0 commit comments

Comments
 (0)