1
1
from __future__ import annotations
2
2
3
- import itertools
4
3
from typing import (
5
4
TYPE_CHECKING ,
6
5
Sequence ,
20
19
from pandas .util ._decorators import cache_readonly
21
20
from pandas .util ._exceptions import find_stack_level
22
21
23
- from pandas .core .dtypes .astype import astype_array
24
22
from pandas .core .dtypes .cast import (
25
23
ensure_dtype_can_hold_na ,
26
24
find_common_type ,
38
36
isna_all ,
39
37
)
40
38
41
- from pandas .core .arrays import ExtensionArray
42
39
from pandas .core .arrays .sparse import SparseDtype
43
40
from pandas .core .construction import ensure_wrapped_if_datetimelike
44
- from pandas .core .internals .array_manager import (
45
- ArrayManager ,
46
- NullArrayProxy ,
47
- )
41
+ from pandas .core .internals .array_manager import ArrayManager
48
42
from pandas .core .internals .blocks import (
49
43
ensure_block_shape ,
50
44
new_block_2d ,
59
53
ArrayLike ,
60
54
AxisInt ,
61
55
DtypeObj ,
62
- Manager ,
56
+ Manager2D ,
63
57
Shape ,
64
58
)
65
59
71
65
72
66
73
67
def _concatenate_array_managers (
74
- mgrs : list [Manager ], axes : list [Index ], concat_axis : AxisInt
75
- ) -> Manager :
68
+ mgrs : list [ArrayManager ], axes : list [Index ], concat_axis : AxisInt
69
+ ) -> Manager2D :
76
70
"""
77
71
Concatenate array managers into one.
78
72
@@ -87,80 +81,16 @@ def _concatenate_array_managers(
87
81
ArrayManager
88
82
"""
89
83
if concat_axis == 1 :
90
- # concatting along the rows -> concat the reindexed arrays
91
- # TODO(ArrayManager) doesn't yet preserve the correct dtype
92
- arrays = [
93
- concat_arrays ([mgrs [i ].arrays [j ] for i in range (len (mgrs ))])
94
- for j in range (len (mgrs [0 ].arrays ))
95
- ]
84
+ return mgrs [0 ].concat_vertical (mgrs , axes )
96
85
else :
97
86
# concatting along the columns -> combine reindexed arrays in a single manager
98
87
assert concat_axis == 0
99
- arrays = list (itertools .chain .from_iterable ([mgr .arrays for mgr in mgrs ]))
100
-
101
- new_mgr = ArrayManager (arrays , [axes [1 ], axes [0 ]], verify_integrity = False )
102
- return new_mgr
103
-
104
-
105
- def concat_arrays (to_concat : list ) -> ArrayLike :
106
- """
107
- Alternative for concat_compat but specialized for use in the ArrayManager.
108
-
109
- Differences: only deals with 1D arrays (no axis keyword), assumes
110
- ensure_wrapped_if_datetimelike and does not skip empty arrays to determine
111
- the dtype.
112
- In addition ensures that all NullArrayProxies get replaced with actual
113
- arrays.
114
-
115
- Parameters
116
- ----------
117
- to_concat : list of arrays
118
-
119
- Returns
120
- -------
121
- np.ndarray or ExtensionArray
122
- """
123
- # ignore the all-NA proxies to determine the resulting dtype
124
- to_concat_no_proxy = [x for x in to_concat if not isinstance (x , NullArrayProxy )]
125
-
126
- dtypes = {x .dtype for x in to_concat_no_proxy }
127
- single_dtype = len (dtypes ) == 1
128
-
129
- if single_dtype :
130
- target_dtype = to_concat_no_proxy [0 ].dtype
131
- elif all (x .kind in "iub" and isinstance (x , np .dtype ) for x in dtypes ):
132
- # GH#42092
133
- target_dtype = np .find_common_type (list (dtypes ), [])
134
- else :
135
- target_dtype = find_common_type ([arr .dtype for arr in to_concat_no_proxy ])
136
-
137
- to_concat = [
138
- arr .to_array (target_dtype )
139
- if isinstance (arr , NullArrayProxy )
140
- else astype_array (arr , target_dtype , copy = False )
141
- for arr in to_concat
142
- ]
143
-
144
- if isinstance (to_concat [0 ], ExtensionArray ):
145
- cls = type (to_concat [0 ])
146
- return cls ._concat_same_type (to_concat )
147
-
148
- result = np .concatenate (to_concat )
149
-
150
- # TODO decide on exact behaviour (we shouldn't do this only for empty result)
151
- # see https://github.com/pandas-dev/pandas/issues/39817
152
- if len (result ) == 0 :
153
- # all empties -> check for bool to not coerce to float
154
- kinds = {obj .dtype .kind for obj in to_concat_no_proxy }
155
- if len (kinds ) != 1 :
156
- if "b" in kinds :
157
- result = result .astype (object )
158
- return result
88
+ return mgrs [0 ].concat_horizontal (mgrs , axes )
159
89
160
90
161
91
def concatenate_managers (
162
92
mgrs_indexers , axes : list [Index ], concat_axis : AxisInt , copy : bool
163
- ) -> Manager :
93
+ ) -> Manager2D :
164
94
"""
165
95
Concatenate block managers into one.
166
96
@@ -196,7 +126,7 @@ def concatenate_managers(
196
126
197
127
if concat_axis == 0 :
198
128
mgrs = _maybe_reindex_columns_na_proxy (axes , mgrs_indexers , needs_copy )
199
- return _concat_managers_axis0 (mgrs , axes )
129
+ return mgrs [ 0 ]. concat_horizontal (mgrs , axes )
200
130
201
131
if len (mgrs_indexers ) > 0 and mgrs_indexers [0 ][0 ].nblocks > 0 :
202
132
first_dtype = mgrs_indexers [0 ][0 ].blocks [0 ].dtype
@@ -266,29 +196,6 @@ def concatenate_managers(
266
196
return BlockManager (tuple (blocks ), axes )
267
197
268
198
269
- def _concat_managers_axis0 (mgrs : list [BlockManager ], axes : list [Index ]) -> BlockManager :
270
- """
271
- concat_managers specialized to concat_axis=0, with reindexing already
272
- having been done in _maybe_reindex_columns_na_proxy.
273
- """
274
-
275
- offset = 0
276
- blocks : list [Block ] = []
277
- for i , mgr in enumerate (mgrs ):
278
- for blk in mgr .blocks :
279
- # We need to do getitem_block here otherwise we would be altering
280
- # blk.mgr_locs in place, which would render it invalid. This is only
281
- # relevant in the copy=False case.
282
- nb = blk .getitem_block (slice (None ))
283
- nb ._mgr_locs = nb ._mgr_locs .add (offset )
284
- blocks .append (nb )
285
-
286
- offset += len (mgr .items )
287
-
288
- result = BlockManager (tuple (blocks ), axes )
289
- return result
290
-
291
-
292
199
def _maybe_reindex_columns_na_proxy (
293
200
axes : list [Index ],
294
201
mgrs_indexers : list [tuple [BlockManager , dict [int , np .ndarray ]]],
0 commit comments