1
1
from __future__ import annotations
2
2
3
+ from functools import wraps
3
4
import re
4
5
from typing import (
5
6
TYPE_CHECKING ,
30
31
ArrayLike ,
31
32
Dtype ,
32
33
DtypeObj ,
34
+ F ,
33
35
Shape ,
34
36
final ,
35
37
)
121
123
_dtype_obj = np .dtype ("object" )
122
124
123
125
126
+ def maybe_split (meth : F ) -> F :
127
+ """
128
+ If we have a multi-column block, split and operate block-wise. Otherwise
129
+ use the original method.
130
+ """
131
+
132
+ @wraps (meth )
133
+ def newfunc (self , * args , ** kwargs ) -> List [Block ]:
134
+
135
+ if self .ndim == 1 or self .shape [0 ] == 1 :
136
+ return meth (self , * args , ** kwargs )
137
+ else :
138
+ # Split and operate column-by-column
139
+ return self .split_and_operate (meth , * args , ** kwargs )
140
+
141
+ return cast (F , newfunc )
142
+
143
+
124
144
class Block (PandasObject ):
125
145
"""
126
146
Canonical n-dimensional unit of homogeneous dtype contained in a pandas
@@ -464,17 +484,16 @@ def fillna(
464
484
# we can't process the value, but nothing to do
465
485
return [self ] if inplace else [self .copy ()]
466
486
467
- # operate column-by-column
468
- def f (mask , val , idx ):
469
- block = self .coerce_to_target_dtype (value )
470
-
471
- # slice out our block
472
- if idx is not None :
473
- # i.e. self.ndim == 2
474
- block = block .getitem_block (slice (idx , idx + 1 ))
475
- return block .fillna (value , limit = limit , inplace = inplace , downcast = None )
487
+ elif self .ndim == 1 or self .shape [0 ] == 1 :
488
+ blk = self .coerce_to_target_dtype (value )
489
+ # bc we have already cast, inplace=True may avoid an extra copy
490
+ return blk .fillna (value , limit = limit , inplace = True , downcast = None )
476
491
477
- return self .split_and_operate (None , f , inplace )
492
+ else :
493
+ # operate column-by-column
494
+ return self .split_and_operate (
495
+ type (self ).fillna , value , limit = limit , inplace = inplace , downcast = None
496
+ )
478
497
479
498
@final
480
499
def _split (self ) -> List [Block ]:
@@ -492,75 +511,27 @@ def _split(self) -> List[Block]:
492
511
return new_blocks
493
512
494
513
@final
495
- def split_and_operate (
496
- self , mask , f , inplace : bool , ignore_failures : bool = False
497
- ) -> List [Block ]:
514
+ def split_and_operate (self , func , * args , ** kwargs ) -> List [Block ]:
498
515
"""
499
- split the block per-column, and apply the callable f
500
- per-column, return a new block for each. Handle
501
- masking which will not change a block unless needed.
516
+ Split the block and apply func column-by-column.
502
517
503
518
Parameters
504
519
----------
505
- mask : 2-d boolean mask
506
- f : callable accepting (1d-mask, 1d values, indexer)
507
- inplace : bool
508
- ignore_failures : bool, default False
520
+ func : Block method
521
+ *args
522
+ **kwargs
509
523
510
524
Returns
511
525
-------
512
- list of blocks
526
+ List[Block]
513
527
"""
514
- if mask is None :
515
- mask = np .broadcast_to (True , shape = self .shape )
516
-
517
- new_values = self .values
518
-
519
- def make_a_block (nv , ref_loc ):
520
- if isinstance (nv , list ):
521
- assert len (nv ) == 1 , nv
522
- assert isinstance (nv [0 ], Block )
523
- block = nv [0 ]
524
- else :
525
- # Put back the dimension that was taken from it and make
526
- # a block out of the result.
527
- nv = ensure_block_shape (nv , ndim = self .ndim )
528
- block = self .make_block (values = nv , placement = ref_loc )
529
- return block
530
-
531
- # ndim == 1
532
- if self .ndim == 1 :
533
- if mask .any ():
534
- nv = f (mask , new_values , None )
535
- else :
536
- nv = new_values if inplace else new_values .copy ()
537
- block = make_a_block (nv , self ._mgr_locs )
538
- return [block ]
539
-
540
- # ndim > 1
541
- new_blocks = []
542
- for i , ref_loc in enumerate (self ._mgr_locs ):
543
- m = mask [i ]
544
- v = new_values [i ]
545
-
546
- # need a new block
547
- if m .any () or m .size == 0 :
548
- # Apply our function; we may ignore_failures if this is a
549
- # reduction that is dropping nuisance columns GH#37827
550
- try :
551
- nv = f (m , v , i )
552
- except TypeError :
553
- if ignore_failures :
554
- continue
555
- else :
556
- raise
557
- else :
558
- nv = v if inplace else v .copy ()
559
-
560
- block = make_a_block (nv , [ref_loc ])
561
- new_blocks .append (block )
528
+ assert self .ndim == 2 and self .shape [0 ] != 1
562
529
563
- return new_blocks
530
+ res_blocks = []
531
+ for nb in self ._split ():
532
+ rbs = func (nb , * args , ** kwargs )
533
+ res_blocks .extend (rbs )
534
+ return res_blocks
564
535
565
536
def _maybe_downcast (self , blocks : List [Block ], downcast = None ) -> List [Block ]:
566
537
@@ -600,13 +571,17 @@ def downcast(self, dtypes=None) -> List[Block]:
600
571
elif dtypes != "infer" :
601
572
raise AssertionError ("dtypes as dict is not supported yet" )
602
573
603
- # operate column-by-column
604
- # this is expensive as it splits the blocks items-by-item
605
- def f (mask , val , idx ):
606
- val = maybe_downcast_to_dtype (val , dtype = "infer" )
607
- return val
574
+ return self ._downcast_2d ()
608
575
609
- return self .split_and_operate (None , f , False )
576
+ @maybe_split
577
+ def _downcast_2d (self ) -> List [Block ]:
578
+ """
579
+ downcast specialized to 2D case post-validation.
580
+
581
+ Refactored to allow use of maybe_split.
582
+ """
583
+ new_values = maybe_downcast_to_dtype (self .values , dtype = "infer" )
584
+ return [self .make_block (new_values )]
610
585
611
586
@final
612
587
def astype (self , dtype , copy : bool = False , errors : str = "raise" ):
@@ -735,18 +710,13 @@ def replace(
735
710
# bc _can_hold_element is incorrect.
736
711
return [self ] if inplace else [self .copy ()]
737
712
738
- if not self ._can_hold_element (value ):
739
- if self .ndim == 2 and self .shape [0 ] > 1 :
740
- # split so that we only upcast where necessary
741
- nbs = self ._split ()
742
- res_blocks = extend_blocks (
743
- [
744
- blk .replace (to_replace , value , inplace = inplace , regex = regex )
745
- for blk in nbs
746
- ]
747
- )
748
- return res_blocks
713
+ elif self ._can_hold_element (value ):
714
+ blk = self if inplace else self .copy ()
715
+ putmask_inplace (blk .values , mask , value )
716
+ blocks = blk .convert (numeric = False , copy = False )
717
+ return blocks
749
718
719
+ elif self .ndim == 1 or self .shape [0 ] == 1 :
750
720
blk = self .coerce_to_target_dtype (value )
751
721
return blk .replace (
752
722
to_replace = to_replace ,
@@ -755,10 +725,11 @@ def replace(
755
725
regex = regex ,
756
726
)
757
727
758
- blk = self if inplace else self .copy ()
759
- putmask_inplace (blk .values , mask , value )
760
- blocks = blk .convert (numeric = False , copy = False )
761
- return blocks
728
+ else :
729
+ # split so that we only upcast where necessary
730
+ return self .split_and_operate (
731
+ type (self ).replace , to_replace , value , inplace = inplace , regex = regex
732
+ )
762
733
763
734
@final
764
735
def _replace_regex (
@@ -2048,6 +2019,8 @@ class ObjectBlock(Block):
2048
2019
is_object = True
2049
2020
_can_hold_na = True
2050
2021
2022
+ values : np .ndarray
2023
+
2051
2024
@property
2052
2025
def is_bool (self ):
2053
2026
"""
@@ -2056,26 +2029,15 @@ def is_bool(self):
2056
2029
"""
2057
2030
return lib .is_bool_array (self .values .ravel ("K" ))
2058
2031
2032
+ @maybe_split
2059
2033
def reduce (self , func , ignore_failures : bool = False ) -> List [Block ]:
2060
2034
"""
2061
2035
For object-dtype, we operate column-wise.
2062
2036
"""
2063
2037
assert self .ndim == 2
2064
2038
2065
- values = self .values
2066
- if len (values ) > 1 :
2067
- # split_and_operate expects func with signature (mask, values, inplace)
2068
- def mask_func (mask , values , inplace ):
2069
- if values .ndim == 1 :
2070
- values = values .reshape (1 , - 1 )
2071
- return func (values )
2072
-
2073
- return self .split_and_operate (
2074
- None , mask_func , False , ignore_failures = ignore_failures
2075
- )
2076
-
2077
2039
try :
2078
- res = func (values )
2040
+ res = func (self . values )
2079
2041
except TypeError :
2080
2042
if not ignore_failures :
2081
2043
raise
@@ -2086,6 +2048,7 @@ def mask_func(mask, values, inplace):
2086
2048
res = res .reshape (1 , - 1 )
2087
2049
return [self .make_block_same_class (res )]
2088
2050
2051
+ @maybe_split
2089
2052
def convert (
2090
2053
self ,
2091
2054
copy : bool = True ,
@@ -2097,30 +2060,15 @@ def convert(
2097
2060
attempt to cast any object types to better types return a copy of
2098
2061
the block (if copy = True) by definition we ARE an ObjectBlock!!!!!
2099
2062
"""
2100
-
2101
- # operate column-by-column
2102
- def f (mask , val , idx ):
2103
- shape = val .shape
2104
- values = soft_convert_objects (
2105
- val .ravel (),
2106
- datetime = datetime ,
2107
- numeric = numeric ,
2108
- timedelta = timedelta ,
2109
- copy = copy ,
2110
- )
2111
- if isinstance (values , np .ndarray ):
2112
- # TODO(EA2D): allow EA once reshape is supported
2113
- values = values .reshape (shape )
2114
-
2115
- return values
2116
-
2117
- if self .ndim == 2 :
2118
- blocks = self .split_and_operate (None , f , False )
2119
- else :
2120
- values = f (None , self .values .ravel (), None )
2121
- blocks = [self .make_block (values )]
2122
-
2123
- return blocks
2063
+ res_values = soft_convert_objects (
2064
+ self .values .ravel (),
2065
+ datetime = datetime ,
2066
+ numeric = numeric ,
2067
+ timedelta = timedelta ,
2068
+ copy = copy ,
2069
+ )
2070
+ res_values = ensure_block_shape (res_values , self .ndim )
2071
+ return [self .make_block (res_values )]
2124
2072
2125
2073
def _maybe_downcast (self , blocks : List [Block ], downcast = None ) -> List [Block ]:
2126
2074
0 commit comments