Skip to content

Commit 7a9bb25

Browse files
jbrockmendelfeefladder
authored andcommitted
REF: names no longer needed in _form_blocks (pandas-dev#43114)
1 parent d834295 commit 7a9bb25

File tree

4 files changed

+141
-43
lines changed

4 files changed

+141
-43
lines changed

pandas/core/internals/__init__.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
1-
from pandas.core.internals.api import make_block # pseudo-public version
1+
from pandas.core.internals.api import (
2+
create_block_manager_from_arrays,
3+
make_block,
4+
)
25
from pandas.core.internals.array_manager import (
36
ArrayManager,
47
SingleArrayManager,
@@ -18,7 +21,6 @@
1821
from pandas.core.internals.managers import (
1922
BlockManager,
2023
SingleBlockManager,
21-
create_block_manager_from_arrays,
2224
create_block_manager_from_blocks,
2325
)
2426

pandas/core/internals/api.py

+124-1
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,16 @@
88
"""
99
from __future__ import annotations
1010

11+
from collections import defaultdict
12+
from typing import DefaultDict
13+
1114
import numpy as np
1215

1316
from pandas._libs.internals import BlockPlacement
14-
from pandas._typing import Dtype
17+
from pandas._typing import (
18+
ArrayLike,
19+
Dtype,
20+
)
1521

1622
from pandas.core.dtypes.common import (
1723
is_datetime64tz_dtype,
@@ -20,14 +26,24 @@
2026

2127
from pandas.core.arrays import DatetimeArray
2228
from pandas.core.construction import extract_array
29+
from pandas.core.indexes.api import Index
2330
from pandas.core.internals.blocks import (
2431
Block,
32+
CategoricalBlock,
2533
DatetimeTZBlock,
34+
ExtensionBlock,
2635
check_ndim,
2736
ensure_block_shape,
2837
extract_pandas_array,
2938
get_block_type,
3039
maybe_coerce_values,
40+
new_block,
41+
)
42+
from pandas.core.internals.managers import (
43+
BlockManager,
44+
construction_error,
45+
multi_blockify,
46+
simple_blockify,
3147
)
3248

3349

@@ -86,3 +102,110 @@ def maybe_infer_ndim(values, placement: BlockPlacement, ndim: int | None) -> int
86102
else:
87103
ndim = values.ndim
88104
return ndim
105+
106+
107+
def create_block_manager_from_arrays(
108+
arrays,
109+
names: Index,
110+
axes: list[Index],
111+
consolidate: bool = True,
112+
) -> BlockManager:
113+
# Assertions disabled for performance
114+
# assert isinstance(names, Index)
115+
# assert isinstance(axes, list)
116+
# assert all(isinstance(x, Index) for x in axes)
117+
118+
arrays = [extract_array(x, extract_numpy=True) for x in arrays]
119+
120+
try:
121+
blocks = _form_blocks(arrays, names, axes, consolidate)
122+
mgr = BlockManager(blocks, axes)
123+
except ValueError as e:
124+
raise construction_error(len(arrays), arrays[0].shape, axes, e)
125+
if consolidate:
126+
mgr._consolidate_inplace()
127+
return mgr
128+
129+
130+
def _form_blocks(
131+
arrays: list[ArrayLike], names: Index, axes: list[Index], consolidate: bool
132+
) -> list[Block]:
133+
# put "leftover" items in float bucket, where else?
134+
# generalize?
135+
items_dict: DefaultDict[str, list] = defaultdict(list)
136+
extra_locs = []
137+
138+
names_idx = names
139+
if names_idx.equals(axes[0]):
140+
names_indexer = np.arange(len(names_idx))
141+
else:
142+
# Assertion disabled for performance
143+
# assert names_idx.intersection(axes[0]).is_unique
144+
names_indexer = names_idx.get_indexer_for(axes[0])
145+
146+
for i, name_idx in enumerate(names_indexer):
147+
if name_idx == -1:
148+
extra_locs.append(i)
149+
continue
150+
151+
v = arrays[name_idx]
152+
153+
block_type = get_block_type(v)
154+
items_dict[block_type.__name__].append((i, v))
155+
156+
blocks: list[Block] = []
157+
if len(items_dict["NumericBlock"]):
158+
numeric_blocks = multi_blockify(
159+
items_dict["NumericBlock"], consolidate=consolidate
160+
)
161+
blocks.extend(numeric_blocks)
162+
163+
if len(items_dict["DatetimeLikeBlock"]):
164+
dtlike_blocks = multi_blockify(
165+
items_dict["DatetimeLikeBlock"], consolidate=consolidate
166+
)
167+
blocks.extend(dtlike_blocks)
168+
169+
if len(items_dict["DatetimeTZBlock"]):
170+
dttz_blocks = [
171+
DatetimeTZBlock(
172+
ensure_block_shape(extract_array(array), 2),
173+
placement=BlockPlacement(i),
174+
ndim=2,
175+
)
176+
for i, array in items_dict["DatetimeTZBlock"]
177+
]
178+
blocks.extend(dttz_blocks)
179+
180+
if len(items_dict["ObjectBlock"]) > 0:
181+
object_blocks = simple_blockify(
182+
items_dict["ObjectBlock"], np.object_, consolidate=consolidate
183+
)
184+
blocks.extend(object_blocks)
185+
186+
if len(items_dict["CategoricalBlock"]) > 0:
187+
cat_blocks = [
188+
CategoricalBlock(array, placement=BlockPlacement(i), ndim=2)
189+
for i, array in items_dict["CategoricalBlock"]
190+
]
191+
blocks.extend(cat_blocks)
192+
193+
if len(items_dict["ExtensionBlock"]):
194+
external_blocks = [
195+
ExtensionBlock(array, placement=BlockPlacement(i), ndim=2)
196+
for i, array in items_dict["ExtensionBlock"]
197+
]
198+
199+
blocks.extend(external_blocks)
200+
201+
if len(extra_locs):
202+
shape = (len(extra_locs),) + tuple(len(x) for x in axes[1:])
203+
204+
# empty items -> dtype object
205+
block_values = np.empty(shape, dtype=object)
206+
block_values.fill(np.nan)
207+
208+
na_block = new_block(block_values, placement=extra_locs, ndim=2)
209+
blocks.append(na_block)
210+
211+
return blocks

pandas/core/internals/construction.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -85,8 +85,8 @@
8585
from pandas.core.internals.managers import (
8686
BlockManager,
8787
SingleBlockManager,
88-
create_block_manager_from_arrays,
8988
create_block_manager_from_blocks,
89+
create_block_manager_from_column_arrays,
9090
)
9191

9292
if TYPE_CHECKING:
@@ -131,8 +131,8 @@ def arrays_to_mgr(
131131
axes = [columns, index]
132132

133133
if typ == "block":
134-
return create_block_manager_from_arrays(
135-
arrays, columns, axes, consolidate=consolidate
134+
return create_block_manager_from_column_arrays(
135+
arrays, axes, consolidate=consolidate
136136
)
137137
elif typ == "array":
138138
if len(columns) != len(arrays):

pandas/core/internals/managers.py

+10-37
Original file line numberDiff line numberDiff line change
@@ -1808,21 +1808,19 @@ def create_block_manager_from_blocks(
18081808
return mgr
18091809

18101810

1811-
def create_block_manager_from_arrays(
1811+
def create_block_manager_from_column_arrays(
18121812
arrays,
1813-
names: Index,
18141813
axes: list[Index],
18151814
consolidate: bool = True,
18161815
) -> BlockManager:
18171816
# Assertions disabled for performance
1818-
# assert isinstance(names, Index)
18191817
# assert isinstance(axes, list)
18201818
# assert all(isinstance(x, Index) for x in axes)
18211819

18221820
arrays = [extract_array(x, extract_numpy=True) for x in arrays]
18231821

18241822
try:
1825-
blocks = _form_blocks(arrays, names, axes, consolidate)
1823+
blocks = _form_blocks(arrays, consolidate)
18261824
mgr = BlockManager(blocks, axes)
18271825
except ValueError as e:
18281826
raise construction_error(len(arrays), arrays[0].shape, axes, e)
@@ -1860,26 +1858,11 @@ def construction_error(
18601858
# -----------------------------------------------------------------------
18611859

18621860

1863-
def _form_blocks(
1864-
arrays: list[ArrayLike], names: Index, axes: list[Index], consolidate: bool
1865-
) -> list[Block]:
1866-
# put "leftover" items in float bucket, where else?
1867-
# generalize?
1868-
items_dict: DefaultDict[str, list] = defaultdict(list)
1869-
extra_locs = []
1861+
def _form_blocks(arrays: list[ArrayLike], consolidate: bool) -> list[Block]:
18701862

1871-
names_idx = names
1872-
if names_idx.equals(axes[0]):
1873-
names_indexer = np.arange(len(names_idx))
1874-
else:
1875-
# Assertion disabled for performance
1876-
# assert names_idx.intersection(axes[0]).is_unique
1877-
names_indexer = names_idx.get_indexer_for(axes[0])
1863+
items_dict: DefaultDict[str, list] = defaultdict(list)
18781864

1879-
for i, name_idx in enumerate(names_indexer):
1880-
if name_idx == -1:
1881-
extra_locs.append(i)
1882-
continue
1865+
for i, name_idx in enumerate(range(len(arrays))):
18831866

18841867
v = arrays[name_idx]
18851868

@@ -1888,13 +1871,13 @@ def _form_blocks(
18881871

18891872
blocks: list[Block] = []
18901873
if len(items_dict["NumericBlock"]):
1891-
numeric_blocks = _multi_blockify(
1874+
numeric_blocks = multi_blockify(
18921875
items_dict["NumericBlock"], consolidate=consolidate
18931876
)
18941877
blocks.extend(numeric_blocks)
18951878

18961879
if len(items_dict["DatetimeLikeBlock"]):
1897-
dtlike_blocks = _multi_blockify(
1880+
dtlike_blocks = multi_blockify(
18981881
items_dict["DatetimeLikeBlock"], consolidate=consolidate
18991882
)
19001883
blocks.extend(dtlike_blocks)
@@ -1911,7 +1894,7 @@ def _form_blocks(
19111894
blocks.extend(dttz_blocks)
19121895

19131896
if len(items_dict["ObjectBlock"]) > 0:
1914-
object_blocks = _simple_blockify(
1897+
object_blocks = simple_blockify(
19151898
items_dict["ObjectBlock"], np.object_, consolidate=consolidate
19161899
)
19171900
blocks.extend(object_blocks)
@@ -1931,20 +1914,10 @@ def _form_blocks(
19311914

19321915
blocks.extend(external_blocks)
19331916

1934-
if len(extra_locs):
1935-
shape = (len(extra_locs),) + tuple(len(x) for x in axes[1:])
1936-
1937-
# empty items -> dtype object
1938-
block_values = np.empty(shape, dtype=object)
1939-
block_values.fill(np.nan)
1940-
1941-
na_block = new_block(block_values, placement=extra_locs, ndim=2)
1942-
blocks.append(na_block)
1943-
19441917
return blocks
19451918

19461919

1947-
def _simple_blockify(tuples, dtype, consolidate: bool) -> list[Block]:
1920+
def simple_blockify(tuples, dtype, consolidate: bool) -> list[Block]:
19481921
"""
19491922
return a single array of a block that has a single dtype; if dtype is
19501923
not None, coerce to this dtype
@@ -1962,7 +1935,7 @@ def _simple_blockify(tuples, dtype, consolidate: bool) -> list[Block]:
19621935
return [block]
19631936

19641937

1965-
def _multi_blockify(tuples, dtype: DtypeObj | None = None, consolidate: bool = True):
1938+
def multi_blockify(tuples, dtype: DtypeObj | None = None, consolidate: bool = True):
19661939
"""return an array of blocks that potentially have different dtypes"""
19671940

19681941
if not consolidate:

0 commit comments

Comments
 (0)