92
92
)
93
93
94
94
if TYPE_CHECKING :
95
+ from collections .abc import Generator
96
+
95
97
from pandas ._typing import (
96
98
ArrayLike ,
97
99
AxisInt ,
@@ -645,8 +647,7 @@ def get_bool_data(self) -> Self:
645
647
new_blocks .append (blk )
646
648
647
649
elif blk .is_object :
648
- nbs = blk ._split ()
649
- new_blocks .extend (nb for nb in nbs if nb .is_bool )
650
+ new_blocks .extend (nb for nb in blk ._split () if nb .is_bool )
650
651
651
652
return self ._combine (new_blocks )
652
653
@@ -1525,7 +1526,9 @@ def _insert_update_mgr_locs(self, loc) -> None:
1525
1526
When inserting a new Block at location 'loc', we increment
1526
1527
all of the mgr_locs of blocks above that by one.
1527
1528
"""
1528
- for blkno , count in _fast_count_smallints (self .blknos [loc :]):
1529
+ # Faster version of set(arr) for sequences of small numbers
1530
+ blknos = np .bincount (self .blknos [loc :]).nonzero ()[0 ]
1531
+ for blkno in blknos :
1529
1532
# .620 this way, .326 of which is in increment_above
1530
1533
blk = self .blocks [blkno ]
1531
1534
blk ._mgr_locs = blk ._mgr_locs .increment_above (loc )
@@ -1597,7 +1600,7 @@ def grouped_reduce(self, func: Callable) -> Self:
1597
1600
nrows = 0
1598
1601
else :
1599
1602
nrows = result_blocks [0 ].values .shape [- 1 ]
1600
- index = Index ( range ( nrows ) )
1603
+ index = default_index ( nrows )
1601
1604
1602
1605
return type (self ).from_blocks (result_blocks , [self .axes [0 ], index ])
1603
1606
@@ -1735,21 +1738,18 @@ def unstack(self, unstacker, fill_value) -> BlockManager:
1735
1738
bm = BlockManager (new_blocks , [new_columns , new_index ], verify_integrity = False )
1736
1739
return bm
1737
1740
1738
- def to_dict (self ) -> dict [ str , Self ]:
1741
+ def to_iter_dict (self ) -> Generator [ tuple [ str , Self ], None , None ]:
1739
1742
"""
1740
- Return a dict of str(dtype) -> BlockManager
1743
+ Yield a tuple of ( str(dtype), BlockManager)
1741
1744
1742
1745
Returns
1743
1746
-------
1744
- values : a dict of dtype -> BlockManager
1747
+ values : a tuple of (str( dtype), BlockManager)
1745
1748
"""
1746
-
1747
- bd : dict [str , list [Block ]] = {}
1748
- for b in self .blocks :
1749
- bd .setdefault (str (b .dtype ), []).append (b )
1750
-
1751
- # TODO(EA2D): the combine will be unnecessary with 2D EAs
1752
- return {dtype : self ._combine (blocks ) for dtype , blocks in bd .items ()}
1749
+ key = lambda block : str (block .dtype )
1750
+ for dtype , blocks in itertools .groupby (sorted (self .blocks , key = key ), key = key ):
1751
+ # TODO(EA2D): the combine will be unnecessary with 2D EAs
1752
+ yield dtype , self ._combine (list (blocks ))
1753
1753
1754
1754
def as_array (
1755
1755
self ,
@@ -2330,7 +2330,7 @@ def _grouping_func(tup: tuple[int, ArrayLike]) -> tuple[int, DtypeObj]:
2330
2330
2331
2331
2332
2332
def _form_blocks (arrays : list [ArrayLike ], consolidate : bool , refs : list ) -> list [Block ]:
2333
- tuples = list ( enumerate (arrays ) )
2333
+ tuples = enumerate (arrays )
2334
2334
2335
2335
if not consolidate :
2336
2336
return _tuples_to_blocks_no_consolidate (tuples , refs )
@@ -2351,7 +2351,7 @@ def _form_blocks(arrays: list[ArrayLike], consolidate: bool, refs: list) -> list
2351
2351
if issubclass (dtype .type , (str , bytes )):
2352
2352
dtype = np .dtype (object )
2353
2353
2354
- values , placement = _stack_arrays (list ( tup_block ) , dtype )
2354
+ values , placement = _stack_arrays (tup_block , dtype )
2355
2355
if is_dtlike :
2356
2356
values = ensure_wrapped_if_datetimelike (values )
2357
2357
blk = block_type (values , placement = BlockPlacement (placement ), ndim = 2 )
@@ -2450,15 +2450,6 @@ def _merge_blocks(
2450
2450
return blocks , False
2451
2451
2452
2452
2453
- def _fast_count_smallints (arr : npt .NDArray [np .intp ]):
2454
- """Faster version of set(arr) for sequences of small numbers."""
2455
- counts = np .bincount (arr )
2456
- nz = counts .nonzero ()[0 ]
2457
- # Note: list(zip(...) outperforms list(np.c_[nz, counts[nz]]) here,
2458
- # in one benchmark by a factor of 11
2459
- return zip (nz , counts [nz ])
2460
-
2461
-
2462
2453
def _preprocess_slice_or_indexer (
2463
2454
slice_or_indexer : slice | np .ndarray , length : int , allow_fill : bool
2464
2455
):
0 commit comments