Skip to content

Commit facda93

Browse files
jbrockmendelfeefladder
authored andcommitted
PERF: DataFrame(dict_of_categoricals) (pandas-dev#43237)
1 parent b0a1b53 commit facda93

File tree

1 file changed

+11
-3
lines changed

1 file changed

+11
-3
lines changed

pandas/core/internals/managers.py

+11-3
Original file line numberDiff line numberDiff line change
@@ -1859,12 +1859,20 @@ def construction_error(
18591859
# -----------------------------------------------------------------------
18601860

18611861

1862-
def _grouping_func(tup: tuple[int, ArrayLike]) -> tuple[bool, DtypeObj]:
1862+
def _grouping_func(tup: tuple[int, ArrayLike]) -> tuple[int, bool, DtypeObj]:
18631863
# compat for numpy<1.21, in which comparing a np.dtype with an ExtensionDtype
18641864
# raises instead of returning False. Once earlier numpy versions are dropped,
18651865
# this can be simplified to `return tup[1].dtype`
18661866
dtype = tup[1].dtype
1867-
return isinstance(dtype, np.dtype), dtype
1867+
1868+
if is_1d_only_ea_dtype(dtype):
1869+
# We know these won't be consolidated, so don't need to group these.
1870+
# This avoids expensive comparisons of CategoricalDtype objects
1871+
sep = id(dtype)
1872+
else:
1873+
sep = 0
1874+
1875+
return sep, isinstance(dtype, np.dtype), dtype
18681876

18691877

18701878
def _form_blocks(arrays: list[ArrayLike], consolidate: bool) -> list[Block]:
@@ -1878,7 +1886,7 @@ def _form_blocks(arrays: list[ArrayLike], consolidate: bool) -> list[Block]:
18781886
grouper = itertools.groupby(tuples, _grouping_func)
18791887

18801888
nbs = []
1881-
for (_, dtype), tup_block in grouper:
1889+
for (_, _, dtype), tup_block in grouper:
18821890
block_type = get_block_type(None, dtype)
18831891

18841892
if isinstance(dtype, np.dtype):

0 commit comments

Comments
 (0)