Skip to content

REF: matching-dtype case first in concat_compat #33530

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Apr 17, 2020
29 changes: 15 additions & 14 deletions pandas/core/dtypes/concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,15 @@ def is_nonempty(x) -> bool:
_contains_datetime = any(typ.startswith("datetime") for typ in typs)
_contains_period = any(typ.startswith("period") for typ in typs)

if "category" in typs:
all_empty = not len(non_empties)
single_dtype = len({x.dtype for x in to_concat}) == 1
any_ea = any(is_extension_array_dtype(x.dtype) for x in to_concat)

if any_ea and single_dtype and axis == 0:
cls = type(to_concat[0])
return cls._concat_same_type(to_concat)

elif "category" in typs:
# this must be prior to concat_datetime,
# to support Categorical + datetime-like
return concat_categorical(to_concat, axis=axis)
Expand All @@ -117,18 +125,11 @@ def is_nonempty(x) -> bool:
elif "sparse" in typs:
return _concat_sparse(to_concat, axis=axis, typs=typs)

all_empty = not len(non_empties)
single_dtype = len({x.dtype for x in to_concat}) == 1
any_ea = any(is_extension_array_dtype(x.dtype) for x in to_concat)

if any_ea and axis == 1:
elif any_ea and axis == 1:
to_concat = [np.atleast_2d(x.astype("object")) for x in to_concat]
return np.concatenate(to_concat, axis=axis)

elif any_ea and single_dtype and axis == 0:
cls = type(to_concat[0])
return cls._concat_same_type(to_concat)

if all_empty:
elif all_empty:
# we have all empties, but may need to coerce the result dtype to
# object if we have non-numeric type operands (numpy would otherwise
# cast this to float)
Expand Down Expand Up @@ -292,15 +293,15 @@ def union_categoricals(
[b, c, a, b]
Categories (3, object): [b, c, a]
"""
from pandas import Index, Categorical
from pandas import Categorical
from pandas.core.arrays.categorical import recode_for_categories

if len(to_union) == 0:
raise ValueError("No Categoricals to union")

def _maybe_unwrap(x):
if isinstance(x, (ABCCategoricalIndex, ABCSeries)):
return x.values
return x._values
elif isinstance(x, Categorical):
return x
else:
Expand Down Expand Up @@ -343,7 +344,7 @@ def _maybe_unwrap(x):
elif ignore_order or all(not c.ordered for c in to_union):
# different categories - union and recode
cats = first.categories.append([c.categories for c in to_union[1:]])
categories = Index(cats.unique())
categories = cats.unique()
if sort_categories:
categories = categories.sort_values()

Expand Down