Skip to content

Commit ecb2ae9

Browse files
committed
more tests; handle sorth with ordered
1 parent eea1777 commit ecb2ae9

File tree

2 files changed

+64
-2
lines changed

2 files changed

+64
-2
lines changed

pandas/tools/tests/test_concat.py

+58-1
Original file line numberDiff line numberDiff line change
@@ -990,7 +990,7 @@ def test_union_categoricals_ordered(self):
990990
union_categoricals([c1, c2])
991991

992992
def test_union_categoricals_sort(self):
993-
# GH 13763
993+
# GH 13846
994994
c1 = Categorical(['x', 'y', 'z'])
995995
c2 = Categorical(['a', 'b', 'c'])
996996
result = union_categoricals([c1, c2], sort_categories=True)
@@ -1033,6 +1033,63 @@ def test_union_categoricals_sort(self):
10331033
expected = Categorical([])
10341034
tm.assert_categorical_equal(result, expected)
10351035

1036+
c1 = Categorical(['b', 'a'], categories=['b', 'a', 'c'], ordered=True)
1037+
c2 = Categorical(['a', 'c'], categories=['b', 'a', 'c'], ordered=True)
1038+
with tm.assertRaises(TypeError):
1039+
union_categoricals([c1, c2], sort_categories=True)
1040+
1041+
def test_union_categoricals_sort_false(self):
1042+
# GH 13846
1043+
c1 = Categorical(['x', 'y', 'z'])
1044+
c2 = Categorical(['a', 'b', 'c'])
1045+
result = union_categoricals([c1, c2], sort_categories=False)
1046+
expected = Categorical(['x', 'y', 'z', 'a', 'b', 'c'],
1047+
categories=['x', 'y', 'z', 'a', 'b', 'c'])
1048+
tm.assert_categorical_equal(result, expected)
1049+
1050+
# fastpath
1051+
c1 = Categorical(['a', 'b'], categories=['b', 'a', 'c'])
1052+
c2 = Categorical(['b', 'c'], categories=['b', 'a', 'c'])
1053+
result = union_categoricals([c1, c2], sort_categories=False)
1054+
expected = Categorical(['a', 'b', 'b', 'c'],
1055+
categories=['b', 'a', 'c'])
1056+
tm.assert_categorical_equal(result, expected)
1057+
1058+
# fastpath - skip resort
1059+
c1 = Categorical(['a', 'b'], categories=['a', 'b', 'c'])
1060+
c2 = Categorical(['b', 'c'], categories=['a', 'b', 'c'])
1061+
result = union_categoricals([c1, c2], sort_categories=False)
1062+
expected = Categorical(['a', 'b', 'b', 'c'],
1063+
categories=['a', 'b', 'c'])
1064+
tm.assert_categorical_equal(result, expected)
1065+
1066+
c1 = Categorical(['x', np.nan])
1067+
c2 = Categorical([np.nan, 'b'])
1068+
result = union_categoricals([c1, c2], sort_categories=False)
1069+
expected = Categorical(['x', np.nan, np.nan, 'b'],
1070+
categories=['x', 'b'])
1071+
tm.assert_categorical_equal(result, expected)
1072+
1073+
c1 = Categorical([np.nan])
1074+
c2 = Categorical([np.nan])
1075+
result = union_categoricals([c1, c2], sort_categories=False)
1076+
expected = Categorical([np.nan, np.nan], categories=[])
1077+
tm.assert_categorical_equal(result, expected)
1078+
1079+
c1 = Categorical([])
1080+
c2 = Categorical([])
1081+
result = union_categoricals([c1, c2], sort_categories=False)
1082+
expected = Categorical([])
1083+
tm.assert_categorical_equal(result, expected)
1084+
1085+
c1 = Categorical(['b', 'a'], categories=['b', 'a', 'c'], ordered=True)
1086+
c2 = Categorical(['a', 'c'], categories=['b', 'a', 'c'], ordered=True)
1087+
result = union_categoricals([c1, c2], sort_categories=False)
1088+
expected = Categorical(['b', 'a', 'a', 'c'],
1089+
categories=['b', 'a', 'c'], ordered=True)
1090+
tm.assert_categorical_equal(result, expected)
1091+
1092+
10361093
def test_concat_bug_1719(self):
10371094
ts1 = tm.makeTimeSeries()
10381095
ts2 = tm.makeTimeSeries()[::2]

pandas/types/concat.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -223,7 +223,7 @@ def union_categoricals(to_union, sort_categories=False):
223223
to_union : list-like of Categoricals
224224
sort_categories : boolean, default False
225225
If true, resulting categories will be lexsorted, otherwise
226-
they will be ordered as they appear in the data
226+
they will be ordered as they appear in the data.
227227
228228
Returns
229229
-------
@@ -235,6 +235,7 @@ def union_categoricals(to_union, sort_categories=False):
235235
- all inputs do not have the same dtype
236236
- all inputs do not have the same ordered property
237237
- all inputs are ordered and their categories are not identical
238+
- sort_categories=True and Categoricals are ordered
238239
ValueError
239240
Emmpty list of categoricals passed
240241
"""
@@ -256,6 +257,10 @@ def union_categoricals(to_union, sort_categories=False):
256257
ordered = first.ordered
257258
new_codes = np.concatenate([c.codes for c in to_union])
258259

260+
if sort_categories and ordered:
261+
raise TypeError("Cannot use sort_categories=True with "
262+
"ordered Categoricals")
263+
259264
if sort_categories and not categories.is_monotonic_increasing:
260265
categories = categories.sort_values()
261266
indexer = first.categories.get_indexer(categories)

0 commit comments

Comments
 (0)