Skip to content

Commit 17209f9

Browse files
committed
Doc updates; use Index.append
1 parent 4499cda commit 17209f9

File tree

4 files changed

+73
-29
lines changed

4 files changed

+73
-29
lines changed

doc/source/categorical.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -668,7 +668,7 @@ will be the union of the categories being combined.
668668
.. note::
669669

670670
`union_categoricals` only works with unordered categoricals
671-
and will raise if any are orderd.
671+
and will raise if any are ordered.
672672

673673

674674
Getting Data In/Out

pandas/tools/tests/test_concat.py

+31-17
Original file line numberDiff line numberDiff line change
@@ -922,26 +922,40 @@ def test_concat_keys_with_none(self):
922922

923923
def test_union_categorical(self):
924924
# GH 13361
925-
s = Categorical(list('abc'))
926-
s2 = Categorical(list('abd'))
927-
result = union_categoricals([s, s2])
928-
expected = Categorical(list('abcabd'))
929-
tm.assert_categorical_equal(result, expected, ignore_order=True)
930-
931-
s = Categorical([0, 1, 2])
932-
s2 = Categorical([2, 3, 4])
933-
result = union_categoricals([s, s2])
934-
expected = Categorical([0, 1, 2, 2, 3, 4])
935-
tm.assert_categorical_equal(result, expected, ignore_order=True)
936-
937-
s = Categorical([0, 1.2, 2])
938-
s2 = Categorical([2, 3.4, 4])
939-
result = union_categoricals([s, s2])
940-
expected = Categorical([0, 1.2, 2, 2, 3.4, 4])
941-
tm.assert_categorical_equal(result, expected, ignore_order=True)
925+
data = [
926+
(list('abc'), list('abd'), list('abcabd')),
927+
([0, 1, 2], [2, 3, 4], [0, 1, 2, 2, 3, 4]),
928+
([0, 1.2, 2], [2, 3.4, 4], [0, 1.2, 2, 2, 3.4, 4]),
929+
930+
(pd.date_range('2014-01-01', '2014-01-05'),
931+
pd.date_range('2014-01-06', '2014-01-07'),
932+
pd.date_range('2014-01-01', '2014-01-07')),
933+
934+
(pd.date_range('2014-01-01', '2014-01-05', tz='US/Central'),
935+
pd.date_range('2014-01-06', '2014-01-07', tz='US/Central'),
936+
pd.date_range('2014-01-01', '2014-01-07', tz='US/Central')),
937+
938+
(pd.period_range('2014-01-01', '2014-01-05'),
939+
pd.period_range('2014-01-06', '2014-01-07'),
940+
pd.period_range('2014-01-01', '2014-01-07')),
941+
]
942+
943+
for a, b, combined in data:
944+
result = union_categoricals([Categorical(a), Categorical(b)])
945+
expected = Categorical(combined)
946+
tm.assert_categorical_equal(result, expected,
947+
check_category_order=True)
948+
949+
# new categories ordered by appearance
950+
s = Categorical(['x', 'y', 'z'])
951+
s2 = Categorical(['a', 'b', 'c'])
952+
result = union_categoricals([s, s2]).categories
953+
expected = Index(['x', 'y', 'z', 'a', 'b', 'c'])
954+
tm.assert_index_equal(result, expected)
942955

943956
# can't be ordered
944957
s = Categorical([0, 1.2, 2], ordered=True)
958+
s2 = Categorical([0, 1.2, 2], ordered=True)
945959
with tm.assertRaises(TypeError):
946960
union_categoricals([s, s2])
947961

pandas/types/concat.py

+13-4
Original file line numberDiff line numberDiff line change
@@ -206,27 +206,36 @@ def union_categoricals(to_union):
206206
Combine list-like of Categoricals, unioning categories. All
207207
must have the same dtype, and none can be ordered.
208208
209+
.. versionadded 0.18.2
210+
209211
Parameters
210212
----------
211-
to_union : list like of Categorical
213+
to_union : list-like of Categoricals
212214
213215
Returns
214216
-------
215217
Categorical
216218
A single array, categories will be ordered as they
217219
appear in the list
220+
221+
Raises
222+
------
223+
TypeError
224+
If any of the categoricals are ordered or all do not
225+
have the same dtype
218226
"""
219-
from pandas import Index, Categorical, unique
227+
from pandas import Index, Categorical
220228

221229
if any(c.ordered for c in to_union):
222230
raise TypeError("Can only combine unordered Categoricals")
223231

224232
first = to_union[0]
225-
if not all(com.is_dtype_equal(c.categories, first.categories)
233+
if not all(com.is_dtype_equal(c.categories.dtype, first.categories.dtype)
226234
for c in to_union):
227235
raise TypeError("dtype of categories must be the same")
228236

229-
unique_cats = unique(np.concatenate([c.categories for c in to_union]))
237+
cats = first.categories
238+
unique_cats = cats.append([c.categories for c in to_union[1:]]).unique()
230239
categories = Index(unique_cats)
231240

232241
new_codes = []

pandas/util/testing.py

+28-7
Original file line numberDiff line numberDiff line change
@@ -963,19 +963,40 @@ def assertNotIsInstance(obj, cls, msg=''):
963963

964964

965965
def assert_categorical_equal(left, right, check_dtype=True,
966-
obj='Categorical', ignore_order=False):
966+
obj='Categorical', check_category_order=True):
967+
"""Test that categoricals are eqivalent
968+
969+
Parameters
970+
----------
971+
left, right : Categorical
972+
Categoricals to compare
973+
check_dtype : bool, default True
974+
Check that integer dtype of the codes are the same
975+
obj : str, default 'Categorical'
976+
Specify object name being compared, internally used to show appropriate
977+
assertion message
978+
check_category_order : bool, default True
979+
Whether the order of the categories should be compared, which
980+
implies identical integer codes. If False, only the resulting
981+
values are compared. The ordered attribute is
982+
checked regardless.
983+
"""
967984
assertIsInstance(left, pd.Categorical, '[Categorical] ')
968985
assertIsInstance(right, pd.Categorical, '[Categorical] ')
969986

970-
if ignore_order:
971-
assert_index_equal(left.categories.sort_values(),
972-
right.categories.sort_values(),
987+
if check_category_order:
988+
assert_index_equal(left.categories, right.categories,
973989
obj='{0}.categories'.format(obj))
990+
assert_numpy_array_equal(left.codes, right.codes,
991+
check_dtype=check_dtype,
992+
obj='{0}.codes'.format(obj))
974993
else:
975-
assert_index_equal(left.categories, right.categories,
994+
assert_index_equal(left.categories.sort_values(),
995+
right.categories.sort_values(),
976996
obj='{0}.categories'.format(obj))
977-
assert_numpy_array_equal(left.codes, right.codes, check_dtype=check_dtype,
978-
obj='{0}.codes'.format(obj))
997+
assert_index_equal(left.categories.take(left.codes),
998+
right.categories.take(right.codes),
999+
obj='{0}.values'.format(obj))
9791000

9801001
assert_attr_equal('ordered', left, right, obj=obj)
9811002

0 commit comments

Comments
 (0)