Skip to content

Commit ef20980

Browse files
chris-b1jreback
authored andcommitted
BUG: union_categorical with Series and cat idx
closes pandas-dev#14173 Author: Chris <[email protected]> Closes pandas-dev#14199 from chris-b1/concat-cat-types and squashes the following commits: b04ea4d [Chris] BUG: union_categorical with Series and cat idx
1 parent 289cd6d commit ef20980

File tree

4 files changed

+74
-9
lines changed

4 files changed

+74
-9
lines changed

doc/source/categorical.rst

+34
Original file line numberDiff line numberDiff line change
@@ -695,6 +695,40 @@ The below raises ``TypeError`` because the categories are ordered and not identi
695695
Out[3]:
696696
TypeError: to union ordered Categoricals, all categories must be the same
697697
698+
``union_categoricals`` also works with a ``CategoricalIndex``, or ``Series`` containing
699+
categorical data, but note that the resulting array will always be a plain ``Categorical``
700+
701+
.. ipython:: python
702+
703+
a = pd.Series(["b", "c"], dtype='category')
704+
b = pd.Series(["a", "b"], dtype='category')
705+
union_categoricals([a, b])
706+
707+
.. note::
708+
709+
``union_categoricals`` may recode the integer codes for categories
710+
when combining categoricals. This is likely what you want,
711+
but if you are relying on the exact numbering of the categories, be
712+
aware.
713+
714+
.. ipython:: python
715+
716+
c1 = pd.Categorical(["b", "c"])
717+
c2 = pd.Categorical(["a", "b"])
718+
719+
c1
720+
# "b" is coded to 0
721+
c1.codes
722+
723+
c2
724+
# "b" is coded to 1
725+
c2.codes
726+
727+
c = union_categoricals([c1, c2])
728+
c
729+
# "b" is coded to 0 throughout, same as c1, different from c2
730+
c.codes
731+
698732
.. _categorical.concat:
699733

700734
Concatenation

doc/source/whatsnew/v0.19.0.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -287,7 +287,7 @@ Individual columns can be parsed as a ``Categorical`` using a dict specification
287287
Categorical Concatenation
288288
^^^^^^^^^^^^^^^^^^^^^^^^^
289289

290-
- A function :func:`union_categoricals` has been added for combining categoricals, see :ref:`Unioning Categoricals<categorical.union>` (:issue:`13361`, :issue:`:13763`, issue:`13846`)
290+
- A function :func:`union_categoricals` has been added for combining categoricals, see :ref:`Unioning Categoricals<categorical.union>` (:issue:`13361`, :issue:`:13763`, issue:`13846`, :issue:`14173`)
291291

292292
.. ipython:: python
293293

pandas/tools/tests/test_concat.py

+26-5
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
from pandas import (DataFrame, concat,
1010
read_csv, isnull, Series, date_range,
1111
Index, Panel, MultiIndex, Timestamp,
12-
DatetimeIndex, Categorical)
12+
DatetimeIndex, Categorical, CategoricalIndex)
1313
from pandas.types.concat import union_categoricals
1414
from pandas.util import testing as tm
1515
from pandas.util.testing import (assert_frame_equal,
@@ -1539,10 +1539,12 @@ def test_union_categorical(self):
15391539
]
15401540

15411541
for a, b, combined in data:
1542-
result = union_categoricals([Categorical(a), Categorical(b)])
1543-
expected = Categorical(combined)
1544-
tm.assert_categorical_equal(result, expected,
1545-
check_category_order=True)
1542+
for box in [Categorical, CategoricalIndex, Series]:
1543+
result = union_categoricals([box(Categorical(a)),
1544+
box(Categorical(b))])
1545+
expected = Categorical(combined)
1546+
tm.assert_categorical_equal(result, expected,
1547+
check_category_order=True)
15461548

15471549
# new categories ordered by appearance
15481550
s = Categorical(['x', 'y', 'z'])
@@ -1771,6 +1773,25 @@ def test_union_categoricals_sort_false(self):
17711773
categories=['b', 'a', 'c'], ordered=True)
17721774
tm.assert_categorical_equal(result, expected)
17731775

1776+
def test_union_categorical_unwrap(self):
1777+
# GH 14173
1778+
c1 = Categorical(['a', 'b'])
1779+
c2 = pd.Series(['b', 'c'], dtype='category')
1780+
result = union_categoricals([c1, c2])
1781+
expected = Categorical(['a', 'b', 'b', 'c'])
1782+
tm.assert_categorical_equal(result, expected)
1783+
1784+
c2 = CategoricalIndex(c2)
1785+
result = union_categoricals([c1, c2])
1786+
tm.assert_categorical_equal(result, expected)
1787+
1788+
c1 = Series(c1)
1789+
result = union_categoricals([c1, c2])
1790+
tm.assert_categorical_equal(result, expected)
1791+
1792+
with tm.assertRaises(TypeError):
1793+
union_categoricals([c1, ['a', 'b', 'c']])
1794+
17741795
def test_concat_bug_1719(self):
17751796
ts1 = tm.makeTimeSeries()
17761797
ts2 = tm.makeTimeSeries()[::2]

pandas/types/concat.py

+13-3
Original file line numberDiff line numberDiff line change
@@ -210,14 +210,15 @@ def _concat_asobject(to_concat):
210210

211211
def union_categoricals(to_union, sort_categories=False):
212212
"""
213-
Combine list-like of Categoricals, unioning categories. All
213+
Combine list-like of Categorical-like, unioning categories. All
214214
categories must have the same dtype.
215215
216216
.. versionadded:: 0.19.0
217217
218218
Parameters
219219
----------
220-
to_union : list-like of Categoricals
220+
to_union : list-like of Categorical, CategoricalIndex,
221+
or Series with dtype='category'
221222
sort_categories : boolean, default False
222223
If true, resulting categories will be lexsorted, otherwise
223224
they will be ordered as they appear in the data.
@@ -236,11 +237,20 @@ def union_categoricals(to_union, sort_categories=False):
236237
ValueError
237238
Emmpty list of categoricals passed
238239
"""
239-
from pandas import Index, Categorical
240+
from pandas import Index, Categorical, CategoricalIndex, Series
240241

241242
if len(to_union) == 0:
242243
raise ValueError('No Categoricals to union')
243244

245+
def _maybe_unwrap(x):
246+
if isinstance(x, (CategoricalIndex, Series)):
247+
return x.values
248+
elif isinstance(x, Categorical):
249+
return x
250+
else:
251+
raise TypeError("all components to combine must be Categorical")
252+
253+
to_union = [_maybe_unwrap(x) for x in to_union]
244254
first = to_union[0]
245255

246256
if not all(is_dtype_equal(other.categories.dtype, first.categories.dtype)

0 commit comments

Comments
 (0)