diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py index ddff78c9d511f..292d5f608d4cb 100644 --- a/pandas/core/dtypes/concat.py +++ b/pandas/core/dtypes/concat.py @@ -242,6 +242,74 @@ def union_categoricals(to_union, sort_categories=False, ignore_order=False): - sort_categories=True and Categoricals are ordered ValueError Empty list of categoricals passed + + Notes + ----- + + To learn more about categories, see `link + `__ + + Examples + -------- + + >>> from pandas.api.types import union_categoricals + + If you want to combine categoricals that do not necessarily have + the same categories, `union_categoricals` will combine a list-like + of categoricals. The new categories will be the union of the + categories being combined. + + >>> a = pd.Categorical(["b", "c"]) + >>> b = pd.Categorical(["a", "b"]) + >>> union_categoricals([a, b]) + [b, c, a, b] + Categories (3, object): [b, c, a] + + By default, the resulting categories will be ordered as they appear + in the `categories` of the data. If you want the categories to be + lexsorted, use `sort_categories=True` argument. + + >>> union_categoricals([a, b], sort_categories=True) + [b, c, a, b] + Categories (3, object): [a, b, c] + + `union_categoricals` also works with the case of combining two + categoricals of the same categories and order information (e.g. what + you could also `append` for). + + >>> a = pd.Categorical(["a", "b"], ordered=True) + >>> b = pd.Categorical(["a", "b", "a"], ordered=True) + >>> union_categoricals([a, b]) + [a, b, a, b, a] + Categories (2, object): [a < b] + + Raises `TypeError` because the categories are ordered and not identical. + + >>> a = pd.Categorical(["a", "b"], ordered=True) + >>> b = pd.Categorical(["a", "b", "c"], ordered=True) + >>> union_categoricals([a, b]) + TypeError: to union ordered Categoricals, all categories must be the same + + New in version 0.20.0 + + Ordered categoricals with different categories or orderings can be + combined by using the `ignore_ordered=True` argument. + + >>> a = pd.Categorical(["a", "b", "c"], ordered=True) + >>> b = pd.Categorical(["c", "b", "a"], ordered=True) + >>> union_categoricals([a, b], ignore_order=True) + [a, b, c, c, b, a] + Categories (3, object): [a, b, c] + + `union_categoricals` also works with a `CategoricalIndex`, or `Series` + containing categorical data, but note that the resulting array will + always be a plain `Categorical` + + >>> a = pd.Series(["b", "c"], dtype='category') + >>> b = pd.Series(["a", "b"], dtype='category') + >>> union_categoricals([a, b]) + [b, c, a, b] + Categories (3, object): [b, c, a] """ from pandas import Index, Categorical, CategoricalIndex, Series