diff --git a/asv_bench/benchmarks/frame_ctor.py b/asv_bench/benchmarks/frame_ctor.py index 7fbe249788a98..5db01989cbb6a 100644 --- a/asv_bench/benchmarks/frame_ctor.py +++ b/asv_bench/benchmarks/frame_ctor.py @@ -2,6 +2,7 @@ import pandas as pd from pandas import ( + Categorical, DataFrame, MultiIndex, Series, @@ -31,6 +32,9 @@ def setup(self): self.dict_list = frame.to_dict(orient="records") self.data2 = {i: {j: float(j) for j in range(100)} for i in range(2000)} + # arrays which we wont consolidate + self.dict_of_categoricals = {i: Categorical(np.arange(N)) for i in range(K)} + def time_list_of_dict(self): DataFrame(self.dict_list) @@ -50,6 +54,10 @@ def time_nested_dict_int64(self): # nested dict, integer indexes, regression described in #621 DataFrame(self.data2) + def time_dict_of_categoricals(self): + # dict of arrays that we wont consolidate + DataFrame(self.dict_of_categoricals) + class FromSeries: def setup(self): diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index a02426648cef0..86834a8dccf40 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -129,6 +129,7 @@ def arrays_to_mgr( else: index = ensure_index(index) + arrays = [extract_array(x, extract_numpy=True) for x in arrays] # Reached via DataFrame._from_arrays; we do validation here for arr in arrays: diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index a41745b8f6820..8f5d6d6bb0ef4 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -1809,21 +1809,21 @@ def create_block_manager_from_blocks( def create_block_manager_from_column_arrays( - arrays, + arrays: list[ArrayLike], axes: list[Index], consolidate: bool = True, ) -> BlockManager: # Assertions disabled for performance (caller is responsible for verifying) # assert isinstance(axes, list) # assert all(isinstance(x, Index) for x in axes) + # assert all(isinstance(x, (np.ndarray, ExtensionArray)) for x in arrays) + # assert all(type(x) is not PandasArray for x in arrays) # assert all(x.ndim == 1 for x in arrays) # assert all(len(x) == len(axes[1]) for x in arrays) # assert len(arrays) == len(axes[0]) # These last three are sufficient to allow us to safely pass # verify_integrity=False below. - arrays = [extract_array(x, extract_numpy=True) for x in arrays] - try: blocks = _form_blocks(arrays, consolidate) mgr = BlockManager(blocks, axes, verify_integrity=False)