Skip to content

Commit 4499cda

Browse files
committed
move tests, adress feedback
1 parent 77e7963 commit 4499cda

File tree

3 files changed

+39
-42
lines changed

3 files changed

+39
-42
lines changed

pandas/tests/test_categorical.py

-32
Original file line numberDiff line numberDiff line change
@@ -3943,38 +3943,6 @@ def f():
39433943
'category', categories=list('cab'))})
39443944
tm.assert_frame_equal(result, expected)
39453945

3946-
def test_union(self):
3947-
from pandas.types.concat import union_categoricals
3948-
3949-
s = Categorical(list('abc'))
3950-
s2 = Categorical(list('abd'))
3951-
result = union_categoricals([s, s2])
3952-
expected = Categorical(list('abcabd'))
3953-
tm.assert_categorical_equal(result, expected, ignore_order=True)
3954-
3955-
s = Categorical([0,1,2])
3956-
s2 = Categorical([2,3,4])
3957-
result = union_categoricals([s, s2])
3958-
expected = Categorical([0,1,2,2,3,4])
3959-
tm.assert_categorical_equal(result, expected, ignore_order=True)
3960-
3961-
s = Categorical([0,1.2,2])
3962-
s2 = Categorical([2,3.4,4])
3963-
result = union_categoricals([s, s2])
3964-
expected = Categorical([0,1.2,2,2,3.4,4])
3965-
tm.assert_categorical_equal(result, expected, ignore_order=True)
3966-
3967-
# can't be ordered
3968-
s = Categorical([0,1.2,2], ordered=True)
3969-
with tm.assertRaises(TypeError):
3970-
union_categoricals([s, s2])
3971-
3972-
# must exactly match types
3973-
s = Categorical([0,1.2,2])
3974-
s2 = Categorical([2,3,4])
3975-
with tm.assertRaises(TypeError):
3976-
union_categoricals([s, s2])
3977-
39783946
def test_categorical_index_preserver(self):
39793947

39803948
a = Series(np.arange(6, dtype='int64'))

pandas/tools/tests/test_concat.py

+33-1
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,8 @@
99
from pandas import (DataFrame, concat,
1010
read_csv, isnull, Series, date_range,
1111
Index, Panel, MultiIndex, Timestamp,
12-
DatetimeIndex)
12+
DatetimeIndex, Categorical)
13+
from pandas.types.concat import union_categoricals
1314
from pandas.util import testing as tm
1415
from pandas.util.testing import (assert_frame_equal,
1516
makeCustomDataframe as mkdf,
@@ -919,6 +920,37 @@ def test_concat_keys_with_none(self):
919920
keys=['b', 'c', 'd', 'e'])
920921
tm.assert_frame_equal(result, expected)
921922

923+
def test_union_categorical(self):
924+
# GH 13361
925+
s = Categorical(list('abc'))
926+
s2 = Categorical(list('abd'))
927+
result = union_categoricals([s, s2])
928+
expected = Categorical(list('abcabd'))
929+
tm.assert_categorical_equal(result, expected, ignore_order=True)
930+
931+
s = Categorical([0, 1, 2])
932+
s2 = Categorical([2, 3, 4])
933+
result = union_categoricals([s, s2])
934+
expected = Categorical([0, 1, 2, 2, 3, 4])
935+
tm.assert_categorical_equal(result, expected, ignore_order=True)
936+
937+
s = Categorical([0, 1.2, 2])
938+
s2 = Categorical([2, 3.4, 4])
939+
result = union_categoricals([s, s2])
940+
expected = Categorical([0, 1.2, 2, 2, 3.4, 4])
941+
tm.assert_categorical_equal(result, expected, ignore_order=True)
942+
943+
# can't be ordered
944+
s = Categorical([0, 1.2, 2], ordered=True)
945+
with tm.assertRaises(TypeError):
946+
union_categoricals([s, s2])
947+
948+
# must exactly match types
949+
s = Categorical([0, 1.2, 2])
950+
s2 = Categorical([2, 3, 4])
951+
with tm.assertRaises(TypeError):
952+
union_categoricals([s, s2])
953+
922954
def test_concat_bug_1719(self):
923955
ts1 = tm.makeTimeSeries()
924956
ts2 = tm.makeTimeSeries()[::2]

pandas/types/concat.py

+6-9
Original file line numberDiff line numberDiff line change
@@ -216,7 +216,7 @@ def union_categoricals(to_union):
216216
A single array, categories will be ordered as they
217217
appear in the list
218218
"""
219-
from pandas import Index, Categorical
219+
from pandas import Index, Categorical, unique
220220

221221
if any(c.ordered for c in to_union):
222222
raise TypeError("Can only combine unordered Categoricals")
@@ -226,19 +226,16 @@ def union_categoricals(to_union):
226226
for c in to_union):
227227
raise TypeError("dtype of categories must be the same")
228228

229-
for i, c in enumerate(to_union):
230-
if i == 0:
231-
cats = c.categories.tolist()
232-
else:
233-
cats = cats + c.categories.difference(Index(cats)).tolist()
229+
unique_cats = unique(np.concatenate([c.categories for c in to_union]))
230+
categories = Index(unique_cats)
234231

235-
cats = Index(cats)
236232
new_codes = []
237233
for c in to_union:
238-
indexer = cats.get_indexer(c.categories)
234+
indexer = categories.get_indexer(c.categories)
239235
new_codes.append(indexer.take(c.codes))
240236
codes = np.concatenate(new_codes)
241-
return Categorical.from_codes(codes, cats)
237+
return Categorical(codes, categories=categories, ordered=False,
238+
fastpath=True)
242239

243240

244241
def _concat_datetime(to_concat, axis=0, typs=None):

0 commit comments

Comments
 (0)