Skip to content

Commit 14fee4f

Browse files
Justin Solinskyjreback
Justin Solinsky
authored andcommitted
ENH union_categoricals supports ignore_order GH13410
xref pandas-dev#13410 (ignore_order portion) Author: Justin Solinsky <[email protected]> Closes pandas-dev#15219 from js3711/GH13410-ENHunion_categoricals and squashes the following commits: e9d00de [Justin Solinsky] GH15219 Documentation fixes based on feedback d278d62 [Justin Solinsky] ENH union_categoricals supports ignore_order GH13410 9b827ef [Justin Solinsky] ENH union_categoricals supports ignore_order GH13410
1 parent 486e384 commit 14fee4f

File tree

4 files changed

+79
-4
lines changed

4 files changed

+79
-4
lines changed

doc/source/categorical.rst

+11
Original file line numberDiff line numberDiff line change
@@ -693,6 +693,17 @@ The below raises ``TypeError`` because the categories are ordered and not identi
693693
Out[3]:
694694
TypeError: to union ordered Categoricals, all categories must be the same
695695
696+
.. versionadded:: 0.20.0
697+
698+
Ordered categoricals with different categories or orderings can be combined by
699+
using the ``ignore_ordered=True`` argument.
700+
701+
.. ipython:: python
702+
703+
a = pd.Categorical(["a", "b", "c"], ordered=True)
704+
b = pd.Categorical(["c", "b", "a"], ordered=True)
705+
union_categoricals([a, b], ignore_order=True)
706+
696707
``union_categoricals`` also works with a ``CategoricalIndex``, or ``Series`` containing
697708
categorical data, but note that the resulting array will always be a plain ``Categorical``
698709

doc/source/whatsnew/v0.20.0.txt

+2
Original file line numberDiff line numberDiff line change
@@ -156,9 +156,11 @@ Other enhancements
156156
- ``DataFrame.to_excel()`` has a new ``freeze_panes`` parameter to turn on Freeze Panes when exporting to Excel (:issue:`15160`)
157157
- HTML table output skips ``colspan`` or ``rowspan`` attribute if equal to 1. (:issue:`15403`)
158158
- ``pd.TimedeltaIndex`` now has a custom datetick formatter specifically designed for nanosecond level precision (:issue:`8711`)
159+
- ``pd.types.concat.union_categoricals`` gained the ``ignore_ordered`` argument to allow ignoring the ordered attribute of unioned categoricals (:issue:`13410`). See the :ref:`categorical union docs <categorical.union>` for more information.
159160

160161
.. _ISO 8601 duration: https://en.wikipedia.org/wiki/ISO_8601#Durations
161162

163+
162164
.. _whatsnew_0200.api_breaking:
163165

164166

pandas/tests/tools/test_concat.py

+54
Original file line numberDiff line numberDiff line change
@@ -1662,6 +1662,60 @@ def test_union_categoricals_ordered(self):
16621662
with tm.assertRaisesRegexp(TypeError, msg):
16631663
union_categoricals([c1, c2])
16641664

1665+
def test_union_categoricals_ignore_order(self):
1666+
# GH 15219
1667+
c1 = Categorical([1, 2, 3], ordered=True)
1668+
c2 = Categorical([1, 2, 3], ordered=False)
1669+
1670+
res = union_categoricals([c1, c2], ignore_order=True)
1671+
exp = Categorical([1, 2, 3, 1, 2, 3])
1672+
tm.assert_categorical_equal(res, exp)
1673+
1674+
msg = 'Categorical.ordered must be the same'
1675+
with tm.assertRaisesRegexp(TypeError, msg):
1676+
union_categoricals([c1, c2], ignore_order=False)
1677+
1678+
res = union_categoricals([c1, c1], ignore_order=True)
1679+
exp = Categorical([1, 2, 3, 1, 2, 3])
1680+
tm.assert_categorical_equal(res, exp)
1681+
1682+
res = union_categoricals([c1, c1], ignore_order=False)
1683+
exp = Categorical([1, 2, 3, 1, 2, 3],
1684+
categories=[1, 2, 3], ordered=True)
1685+
tm.assert_categorical_equal(res, exp)
1686+
1687+
c1 = Categorical([1, 2, 3, np.nan], ordered=True)
1688+
c2 = Categorical([3, 2], categories=[1, 2, 3], ordered=True)
1689+
1690+
res = union_categoricals([c1, c2], ignore_order=True)
1691+
exp = Categorical([1, 2, 3, np.nan, 3, 2])
1692+
tm.assert_categorical_equal(res, exp)
1693+
1694+
c1 = Categorical([1, 2, 3], ordered=True)
1695+
c2 = Categorical([1, 2, 3], categories=[3, 2, 1], ordered=True)
1696+
1697+
res = union_categoricals([c1, c2], ignore_order=True)
1698+
exp = Categorical([1, 2, 3, 1, 2, 3])
1699+
tm.assert_categorical_equal(res, exp)
1700+
1701+
res = union_categoricals([c2, c1], ignore_order=True,
1702+
sort_categories=True)
1703+
exp = Categorical([1, 2, 3, 1, 2, 3], categories=[1, 2, 3])
1704+
tm.assert_categorical_equal(res, exp)
1705+
1706+
c1 = Categorical([1, 2, 3], ordered=True)
1707+
c2 = Categorical([4, 5, 6], ordered=True)
1708+
result = union_categoricals([c1, c2], ignore_order=True)
1709+
expected = Categorical([1, 2, 3, 4, 5, 6])
1710+
tm.assert_categorical_equal(result, expected)
1711+
1712+
msg = "to union ordered Categoricals, all categories must be the same"
1713+
with tm.assertRaisesRegexp(TypeError, msg):
1714+
union_categoricals([c1, c2], ignore_order=False)
1715+
1716+
with tm.assertRaisesRegexp(TypeError, msg):
1717+
union_categoricals([c1, c2])
1718+
16651719
def test_union_categoricals_sort(self):
16661720
# GH 13846
16671721
c1 = Categorical(['x', 'y', 'z'])

pandas/types/concat.py

+12-4
Original file line numberDiff line numberDiff line change
@@ -208,7 +208,7 @@ def _concat_asobject(to_concat):
208208
return _concat_asobject(to_concat)
209209

210210

211-
def union_categoricals(to_union, sort_categories=False):
211+
def union_categoricals(to_union, sort_categories=False, ignore_order=False):
212212
"""
213213
Combine list-like of Categorical-like, unioning categories. All
214214
categories must have the same dtype.
@@ -222,6 +222,11 @@ def union_categoricals(to_union, sort_categories=False):
222222
sort_categories : boolean, default False
223223
If true, resulting categories will be lexsorted, otherwise
224224
they will be ordered as they appear in the data.
225+
ignore_order: boolean, default False
226+
If true, the ordered attribute of the Categoricals will be ignored.
227+
Results in an unordered categorical.
228+
229+
.. versionadded:: 0.20.0
225230
226231
Returns
227232
-------
@@ -235,7 +240,7 @@ def union_categoricals(to_union, sort_categories=False):
235240
- all inputs are ordered and their categories are not identical
236241
- sort_categories=True and Categoricals are ordered
237242
ValueError
238-
Emmpty list of categoricals passed
243+
Empty list of categoricals passed
239244
"""
240245
from pandas import Index, Categorical, CategoricalIndex, Series
241246

@@ -264,15 +269,15 @@ def _maybe_unwrap(x):
264269
ordered = first.ordered
265270
new_codes = np.concatenate([c.codes for c in to_union])
266271

267-
if sort_categories and ordered:
272+
if sort_categories and not ignore_order and ordered:
268273
raise TypeError("Cannot use sort_categories=True with "
269274
"ordered Categoricals")
270275

271276
if sort_categories and not categories.is_monotonic_increasing:
272277
categories = categories.sort_values()
273278
indexer = categories.get_indexer(first.categories)
274279
new_codes = take_1d(indexer, new_codes, fill_value=-1)
275-
elif all(not c.ordered for c in to_union):
280+
elif ignore_order or all(not c.ordered for c in to_union):
276281
# different categories - union and recode
277282
cats = first.categories.append([c.categories for c in to_union[1:]])
278283
categories = Index(cats.unique())
@@ -297,6 +302,9 @@ def _maybe_unwrap(x):
297302
else:
298303
raise TypeError('Categorical.ordered must be the same')
299304

305+
if ignore_order:
306+
ordered = False
307+
300308
return Categorical(new_codes, categories=categories, ordered=ordered,
301309
fastpath=True)
302310

0 commit comments

Comments
 (0)