Skip to content

Commit 8b90070

Browse files
authored
PERF: avoid extraneous extract_array (#43147)
1 parent a8d992a commit 8b90070

File tree

3 files changed

+12
-3
lines changed

3 files changed

+12
-3
lines changed

asv_bench/benchmarks/frame_ctor.py

+8
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
import pandas as pd
44
from pandas import (
5+
Categorical,
56
DataFrame,
67
MultiIndex,
78
Series,
@@ -31,6 +32,9 @@ def setup(self):
3132
self.dict_list = frame.to_dict(orient="records")
3233
self.data2 = {i: {j: float(j) for j in range(100)} for i in range(2000)}
3334

35+
# arrays which we wont consolidate
36+
self.dict_of_categoricals = {i: Categorical(np.arange(N)) for i in range(K)}
37+
3438
def time_list_of_dict(self):
3539
DataFrame(self.dict_list)
3640

@@ -50,6 +54,10 @@ def time_nested_dict_int64(self):
5054
# nested dict, integer indexes, regression described in #621
5155
DataFrame(self.data2)
5256

57+
def time_dict_of_categoricals(self):
58+
# dict of arrays that we wont consolidate
59+
DataFrame(self.dict_of_categoricals)
60+
5361

5462
class FromSeries:
5563
def setup(self):

pandas/core/internals/construction.py

+1
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,7 @@ def arrays_to_mgr(
129129

130130
else:
131131
index = ensure_index(index)
132+
arrays = [extract_array(x, extract_numpy=True) for x in arrays]
132133

133134
# Reached via DataFrame._from_arrays; we do validation here
134135
for arr in arrays:

pandas/core/internals/managers.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -1809,21 +1809,21 @@ def create_block_manager_from_blocks(
18091809

18101810

18111811
def create_block_manager_from_column_arrays(
1812-
arrays,
1812+
arrays: list[ArrayLike],
18131813
axes: list[Index],
18141814
consolidate: bool = True,
18151815
) -> BlockManager:
18161816
# Assertions disabled for performance (caller is responsible for verifying)
18171817
# assert isinstance(axes, list)
18181818
# assert all(isinstance(x, Index) for x in axes)
1819+
# assert all(isinstance(x, (np.ndarray, ExtensionArray)) for x in arrays)
1820+
# assert all(type(x) is not PandasArray for x in arrays)
18191821
# assert all(x.ndim == 1 for x in arrays)
18201822
# assert all(len(x) == len(axes[1]) for x in arrays)
18211823
# assert len(arrays) == len(axes[0])
18221824
# These last three are sufficient to allow us to safely pass
18231825
# verify_integrity=False below.
18241826

1825-
arrays = [extract_array(x, extract_numpy=True) for x in arrays]
1826-
18271827
try:
18281828
blocks = _form_blocks(arrays, consolidate)
18291829
mgr = BlockManager(blocks, axes, verify_integrity=False)

0 commit comments

Comments
 (0)