Skip to content

Commit 6a9cc5c

Browse files
authored
REF: share code ArrayManager/BlockManager (#40338)
1 parent d7c8ae7 commit 6a9cc5c

File tree

4 files changed

+53
-71
lines changed

4 files changed

+53
-71
lines changed

pandas/core/internals/array_manager.py

+4-33
Original file line numberDiff line numberDiff line change
@@ -22,14 +22,12 @@
2222
)
2323
from pandas._typing import (
2424
ArrayLike,
25-
DtypeObj,
2625
Hashable,
2726
)
2827
from pandas.util._validators import validate_bool_kwarg
2928

3029
from pandas.core.dtypes.cast import (
3130
astype_array_safe,
32-
find_common_type,
3331
infer_dtype_from_scalar,
3432
soft_convert_objects,
3533
)
@@ -84,6 +82,7 @@
8482
from pandas.core.internals.base import (
8583
DataManager,
8684
SingleDataManager,
85+
interleaved_dtype,
8786
)
8887
from pandas.core.internals.blocks import (
8988
ensure_block_shape,
@@ -158,18 +157,13 @@ def axes(self) -> List[Index]: # type: ignore[override]
158157
"""Axes is BlockManager-compatible order (columns, rows)"""
159158
return [self._axes[1], self._axes[0]]
160159

161-
@property
162-
def shape(self) -> Tuple[int, ...]:
163-
# this still gives the BlockManager-compatible transposed shape
164-
return tuple(len(ax) for ax in self.axes)
165-
166160
@property
167161
def shape_proper(self) -> Tuple[int, ...]:
168162
# this returns (n_rows, n_columns)
169163
return tuple(len(ax) for ax in self._axes)
170164

171165
@staticmethod
172-
def _normalize_axis(axis):
166+
def _normalize_axis(axis: int) -> int:
173167
# switch axis
174168
axis = 1 if axis == 0 else 0
175169
return axis
@@ -527,9 +521,6 @@ def quantile(
527521
axes = [qs, self._axes[1]]
528522
return type(self)(new_arrs, axes)
529523

530-
def isna(self, func) -> ArrayManager:
531-
return self.apply("apply", func=func)
532-
533524
def where(self, other, cond, align: bool, errors: str, axis: int) -> ArrayManager:
534525
if align:
535526
align_keys = ["other", "cond"]
@@ -768,7 +759,7 @@ def as_array(
768759
copy = copy or na_value is not lib.no_default
769760

770761
if not dtype:
771-
dtype = _interleaved_dtype(self.arrays)
762+
dtype = interleaved_dtype([arr.dtype for arr in self.arrays])
772763

773764
if isinstance(dtype, SparseDtype):
774765
dtype = dtype.subtype
@@ -820,7 +811,7 @@ def fast_xs(self, loc: int) -> ArrayLike:
820811
-------
821812
np.ndarray or ExtensionArray
822813
"""
823-
dtype = _interleaved_dtype(self.arrays)
814+
dtype = interleaved_dtype([arr.dtype for arr in self.arrays])
824815

825816
values = [arr[loc] for arr in self.arrays]
826817
if isinstance(dtype, ExtensionDtype):
@@ -1134,26 +1125,6 @@ def unstack(self, unstacker, fill_value) -> ArrayManager:
11341125
# TODO
11351126
# equals
11361127
# to_dict
1137-
# quantile
1138-
1139-
1140-
def _interleaved_dtype(blocks) -> Optional[DtypeObj]:
1141-
"""
1142-
Find the common dtype for `blocks`.
1143-
1144-
Parameters
1145-
----------
1146-
blocks : List[Block]
1147-
1148-
Returns
1149-
-------
1150-
dtype : np.dtype, ExtensionDtype, or None
1151-
None is returned when `blocks` is empty.
1152-
"""
1153-
if not len(blocks):
1154-
return None
1155-
1156-
return find_common_type([b.dtype for b in blocks])
11571128

11581129

11591130
class SingleArrayManager(ArrayManager, SingleDataManager):

pandas/core/internals/base.py

+42
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,18 @@
44
"""
55
from typing import (
66
List,
7+
Optional,
78
TypeVar,
89
)
910

11+
from pandas._typing import (
12+
DtypeObj,
13+
Shape,
14+
)
1015
from pandas.errors import AbstractMethodError
1116

17+
from pandas.core.dtypes.cast import find_common_type
18+
1219
from pandas.core.base import PandasObject
1320
from pandas.core.indexes.api import (
1421
Index,
@@ -35,6 +42,10 @@ def __len__(self) -> int:
3542
def ndim(self) -> int:
3643
return len(self.axes)
3744

45+
@property
46+
def shape(self) -> Shape:
47+
return tuple(len(ax) for ax in self.axes)
48+
3849
def reindex_indexer(
3950
self: T,
4051
new_axis,
@@ -99,6 +110,37 @@ def equals(self, other: object) -> bool:
99110

100111
return self._equal_values(other)
101112

113+
def apply(
114+
self: T,
115+
f,
116+
align_keys: Optional[List[str]] = None,
117+
ignore_failures: bool = False,
118+
**kwargs,
119+
) -> T:
120+
raise AbstractMethodError(self)
121+
122+
def isna(self: T, func) -> T:
123+
return self.apply("apply", func=func)
124+
102125

103126
class SingleDataManager(DataManager):
104127
ndim = 1
128+
129+
130+
def interleaved_dtype(dtypes: List[DtypeObj]) -> Optional[DtypeObj]:
131+
"""
132+
Find the common dtype for `blocks`.
133+
134+
Parameters
135+
----------
136+
blocks : List[DtypeObj]
137+
138+
Returns
139+
-------
140+
dtype : np.dtype, ExtensionDtype, or None
141+
None is returned when `blocks` is empty.
142+
"""
143+
if not len(dtypes):
144+
return None
145+
146+
return find_common_type(dtypes)

pandas/core/internals/blocks.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -478,7 +478,7 @@ def _split(self) -> List[Block]:
478478
for i, ref_loc in enumerate(self.mgr_locs):
479479
vals = self.values[slice(i, i + 1)]
480480

481-
nb = self.make_block(vals, [ref_loc])
481+
nb = self.make_block(vals, BlockPlacement(ref_loc))
482482
new_blocks.append(nb)
483483
return new_blocks
484484

pandas/core/internals/managers.py

+6-37
Original file line numberDiff line numberDiff line change
@@ -32,10 +32,7 @@
3232
from pandas.errors import PerformanceWarning
3333
from pandas.util._validators import validate_bool_kwarg
3434

35-
from pandas.core.dtypes.cast import (
36-
find_common_type,
37-
infer_dtype_from_scalar,
38-
)
35+
from pandas.core.dtypes.cast import infer_dtype_from_scalar
3936
from pandas.core.dtypes.common import (
4037
DT64NS_DTYPE,
4138
is_dtype_equal,
@@ -64,6 +61,7 @@
6461
from pandas.core.internals.base import (
6562
DataManager,
6663
SingleDataManager,
64+
interleaved_dtype,
6765
)
6866
from pandas.core.internals.blocks import (
6967
Block,
@@ -251,11 +249,7 @@ def __nonzero__(self) -> bool:
251249
# Python3 compat
252250
__bool__ = __nonzero__
253251

254-
@property
255-
def shape(self) -> Shape:
256-
return tuple(len(ax) for ax in self.axes)
257-
258-
def _normalize_axis(self, axis):
252+
def _normalize_axis(self, axis: int) -> int:
259253
# switch axis to follow BlockManager logic
260254
if self.ndim == 2:
261255
axis = 1 if axis == 0 else 0
@@ -370,9 +364,6 @@ def _post_setstate(self) -> None:
370364
self._known_consolidated = False
371365
self._rebuild_blknos_and_blklocs()
372366

373-
def __len__(self) -> int:
374-
return len(self.items)
375-
376367
def __repr__(self) -> str:
377368
output = type(self).__name__
378369
for i, ax in enumerate(self.axes):
@@ -584,9 +575,6 @@ def quantile(
584575

585576
return type(self)(blocks, new_axes)
586577

587-
def isna(self, func) -> BlockManager:
588-
return self.apply("apply", func=func)
589-
590578
def where(self, other, cond, align: bool, errors: str, axis: int) -> BlockManager:
591579
axis = self._normalize_axis(axis)
592580
if align:
@@ -933,7 +921,7 @@ def _interleave(
933921
Items must be contained in the blocks
934922
"""
935923
if not dtype:
936-
dtype = _interleaved_dtype(self.blocks)
924+
dtype = interleaved_dtype([blk.dtype for blk in self.blocks])
937925

938926
# TODO: https://github.com/pandas-dev/pandas/issues/22791
939927
# Give EAs some input on what happens here. Sparse needs this.
@@ -1011,7 +999,7 @@ def fast_xs(self, loc: int) -> ArrayLike:
1011999
if len(self.blocks) == 1:
10121000
return self.blocks[0].iget((slice(None), loc))
10131001

1014-
dtype = _interleaved_dtype(self.blocks)
1002+
dtype = interleaved_dtype([blk.dtype for blk in self.blocks])
10151003

10161004
n = len(self)
10171005
if is_extension_array_dtype(dtype):
@@ -1361,7 +1349,7 @@ def reindex_indexer(
13611349
new_blocks = [
13621350
blk.take_nd(
13631351
indexer,
1364-
axis=axis,
1352+
axis=1,
13651353
fill_value=(
13661354
fill_value if fill_value is not None else blk.fill_value
13671355
),
@@ -1960,25 +1948,6 @@ def _stack_arrays(tuples, dtype: np.dtype):
19601948
return stacked, placement
19611949

19621950

1963-
def _interleaved_dtype(blocks: Sequence[Block]) -> Optional[DtypeObj]:
1964-
"""
1965-
Find the common dtype for `blocks`.
1966-
1967-
Parameters
1968-
----------
1969-
blocks : List[Block]
1970-
1971-
Returns
1972-
-------
1973-
dtype : np.dtype, ExtensionDtype, or None
1974-
None is returned when `blocks` is empty.
1975-
"""
1976-
if not len(blocks):
1977-
return None
1978-
1979-
return find_common_type([b.dtype for b in blocks])
1980-
1981-
19821951
def _consolidate(blocks: Tuple[Block, ...]) -> List[Block]:
19831952
"""
19841953
Merge blocks having same dtype, exclude non-consolidating blocks

0 commit comments

Comments
 (0)