|
11 | 11 |
|
12 | 12 | from pandas._libs import (
|
13 | 13 | NaT,
|
| 14 | + algos as libalgos, |
14 | 15 | internals as libinternals,
|
15 | 16 | lib,
|
16 | 17 | )
|
|
59 | 60 | AxisInt,
|
60 | 61 | DtypeObj,
|
61 | 62 | Manager,
|
| 63 | + Shape, |
62 | 64 | )
|
63 | 65 |
|
64 | 66 | from pandas import Index
|
@@ -202,6 +204,21 @@ def concatenate_managers(
|
202 | 204 | if concat_axis == 0:
|
203 | 205 | return _concat_managers_axis0(mgrs_indexers, axes, copy)
|
204 | 206 |
|
| 207 | + if len(mgrs_indexers) > 0 and mgrs_indexers[0][0].nblocks > 0: |
| 208 | + first_dtype = mgrs_indexers[0][0].blocks[0].dtype |
| 209 | + if first_dtype in [np.float64, np.float32]: |
| 210 | + # TODO: support more dtypes here. This will be simpler once |
| 211 | + # JoinUnit.is_na behavior is deprecated. |
| 212 | + if ( |
| 213 | + all(_is_homogeneous_mgr(mgr, first_dtype) for mgr, _ in mgrs_indexers) |
| 214 | + and len(mgrs_indexers) > 1 |
| 215 | + ): |
| 216 | + # Fastpath! |
| 217 | + # Length restriction is just to avoid having to worry about 'copy' |
| 218 | + shape = tuple(len(x) for x in axes) |
| 219 | + nb = _concat_homogeneous_fastpath(mgrs_indexers, shape, first_dtype) |
| 220 | + return BlockManager((nb,), axes) |
| 221 | + |
205 | 222 | mgrs_indexers = _maybe_reindex_columns_na_proxy(axes, mgrs_indexers)
|
206 | 223 |
|
207 | 224 | concat_plan = _get_combined_plan([mgr for mgr, _ in mgrs_indexers])
|
@@ -322,6 +339,57 @@ def _maybe_reindex_columns_na_proxy(
|
322 | 339 | return new_mgrs_indexers
|
323 | 340 |
|
324 | 341 |
|
| 342 | +def _is_homogeneous_mgr(mgr: BlockManager, first_dtype: DtypeObj) -> bool: |
| 343 | + """ |
| 344 | + Check if this Manager can be treated as a single ndarray. |
| 345 | + """ |
| 346 | + if mgr.nblocks != 1: |
| 347 | + return False |
| 348 | + blk = mgr.blocks[0] |
| 349 | + if not (blk.mgr_locs.is_slice_like and blk.mgr_locs.as_slice.step == 1): |
| 350 | + return False |
| 351 | + |
| 352 | + return blk.dtype == first_dtype |
| 353 | + |
| 354 | + |
| 355 | +def _concat_homogeneous_fastpath( |
| 356 | + mgrs_indexers, shape: Shape, first_dtype: np.dtype |
| 357 | +) -> Block: |
| 358 | + """ |
| 359 | + With single-Block managers with homogeneous dtypes (that can already hold nan), |
| 360 | + we avoid [...] |
| 361 | + """ |
| 362 | + # assumes |
| 363 | + # all(_is_homogeneous_mgr(mgr, first_dtype) for mgr, _ in in mgrs_indexers) |
| 364 | + arr = np.empty(shape, dtype=first_dtype) |
| 365 | + |
| 366 | + if first_dtype == np.float64: |
| 367 | + take_func = libalgos.take_2d_axis0_float64_float64 |
| 368 | + else: |
| 369 | + take_func = libalgos.take_2d_axis0_float32_float32 |
| 370 | + |
| 371 | + start = 0 |
| 372 | + for mgr, indexers in mgrs_indexers: |
| 373 | + mgr_len = mgr.shape[1] |
| 374 | + end = start + mgr_len |
| 375 | + |
| 376 | + if 0 in indexers: |
| 377 | + take_func( |
| 378 | + mgr.blocks[0].values, |
| 379 | + indexers[0], |
| 380 | + arr[:, start:end], |
| 381 | + ) |
| 382 | + else: |
| 383 | + # No reindexing necessary, we can copy values directly |
| 384 | + arr[:, start:end] = mgr.blocks[0].values |
| 385 | + |
| 386 | + start += mgr_len |
| 387 | + |
| 388 | + bp = libinternals.BlockPlacement(slice(shape[0])) |
| 389 | + nb = new_block_2d(arr, bp) |
| 390 | + return nb |
| 391 | + |
| 392 | + |
325 | 393 | def _get_combined_plan(
|
326 | 394 | mgrs: list[BlockManager],
|
327 | 395 | ) -> list[tuple[BlockPlacement, list[JoinUnit]]]:
|
|
0 commit comments