57
57
import pandas .core .algorithms as algos
58
58
from pandas .core .arrays ._mixins import NDArrayBackedExtensionArray
59
59
from pandas .core .arrays .sparse import SparseDtype
60
+ import pandas .core .common as com
60
61
from pandas .core .construction import (
61
62
ensure_wrapped_if_datetimelike ,
62
63
extract_array ,
@@ -148,6 +149,7 @@ class BaseBlockManager(DataManager):
148
149
blocks : tuple [Block , ...]
149
150
axes : list [Index ]
150
151
refs : list [weakref .ref | None ] | None
152
+ parent : object
151
153
152
154
@property
153
155
def ndim (self ) -> int :
@@ -165,6 +167,7 @@ def from_blocks(
165
167
blocks : list [Block ],
166
168
axes : list [Index ],
167
169
refs : list [weakref .ref | None ] | None = None ,
170
+ parent : object = None ,
168
171
) -> T :
169
172
raise NotImplementedError
170
173
@@ -264,6 +267,8 @@ def _clear_reference_block(self, blkno: int) -> None:
264
267
"""
265
268
if self .refs is not None :
266
269
self .refs [blkno ] = None
270
+ if com .all_none (* self .refs ):
271
+ self .parent = None
267
272
268
273
def get_dtypes (self ):
269
274
dtypes = np .array ([blk .dtype for blk in self .blocks ])
@@ -605,7 +610,9 @@ def _combine(
605
610
axes [- 1 ] = index
606
611
axes [0 ] = self .items .take (indexer )
607
612
608
- return type (self ).from_blocks (new_blocks , axes , new_refs )
613
+ return type (self ).from_blocks (
614
+ new_blocks , axes , new_refs , parent = None if copy else self
615
+ )
609
616
610
617
@property
611
618
def nblocks (self ) -> int :
@@ -648,11 +655,14 @@ def copy_func(ax):
648
655
new_refs : list [weakref .ref | None ] | None
649
656
if deep :
650
657
new_refs = None
658
+ parent = None
651
659
else :
652
660
new_refs = [weakref .ref (blk ) for blk in self .blocks ]
661
+ parent = self
653
662
654
663
res .axes = new_axes
655
664
res .refs = new_refs
665
+ res .parent = parent
656
666
657
667
if self .ndim > 1 :
658
668
# Avoid needing to re-compute these
@@ -744,6 +754,7 @@ def reindex_indexer(
744
754
only_slice = only_slice ,
745
755
use_na_proxy = use_na_proxy ,
746
756
)
757
+ parent = None if com .all_none (* new_refs ) else self
747
758
else :
748
759
new_blocks = [
749
760
blk .take_nd (
@@ -756,11 +767,12 @@ def reindex_indexer(
756
767
for blk in self .blocks
757
768
]
758
769
new_refs = None
770
+ parent = None
759
771
760
772
new_axes = list (self .axes )
761
773
new_axes [axis ] = new_axis
762
774
763
- new_mgr = type (self ).from_blocks (new_blocks , new_axes , new_refs )
775
+ new_mgr = type (self ).from_blocks (new_blocks , new_axes , new_refs , parent = parent )
764
776
if axis == 1 :
765
777
# We can avoid the need to rebuild these
766
778
new_mgr ._blknos = self .blknos .copy ()
@@ -995,6 +1007,7 @@ def __init__(
995
1007
blocks : Sequence [Block ],
996
1008
axes : Sequence [Index ],
997
1009
refs : list [weakref .ref | None ] | None = None ,
1010
+ parent : object = None ,
998
1011
verify_integrity : bool = True ,
999
1012
) -> None :
1000
1013
@@ -1059,11 +1072,13 @@ def from_blocks(
1059
1072
blocks : list [Block ],
1060
1073
axes : list [Index ],
1061
1074
refs : list [weakref .ref | None ] | None = None ,
1075
+ parent : object = None ,
1062
1076
) -> BlockManager :
1063
1077
"""
1064
1078
Constructor for BlockManager and SingleBlockManager with same signature.
1065
1079
"""
1066
- return cls (blocks , axes , refs , verify_integrity = False )
1080
+ parent = parent if _using_copy_on_write () else None
1081
+ return cls (blocks , axes , refs , parent , verify_integrity = False )
1067
1082
1068
1083
# ----------------------------------------------------------------
1069
1084
# Indexing
@@ -1085,7 +1100,7 @@ def fast_xs(self, loc: int) -> SingleBlockManager:
1085
1100
block = new_block (result , placement = slice (0 , len (result )), ndim = 1 )
1086
1101
# in the case of a single block, the new block is a view
1087
1102
ref = weakref .ref (self .blocks [0 ])
1088
- return SingleBlockManager (block , self .axes [0 ], [ref ])
1103
+ return SingleBlockManager (block , self .axes [0 ], [ref ], parent = self )
1089
1104
1090
1105
dtype = interleaved_dtype ([blk .dtype for blk in self .blocks ])
1091
1106
@@ -1119,7 +1134,7 @@ def fast_xs(self, loc: int) -> SingleBlockManager:
1119
1134
block = new_block (result , placement = slice (0 , len (result )), ndim = 1 )
1120
1135
return SingleBlockManager (block , self .axes [0 ])
1121
1136
1122
- def iget (self , i : int ) -> SingleBlockManager :
1137
+ def iget (self , i : int , track_ref : bool = True ) -> SingleBlockManager :
1123
1138
"""
1124
1139
Return the data as a SingleBlockManager.
1125
1140
"""
@@ -1129,7 +1144,9 @@ def iget(self, i: int) -> SingleBlockManager:
1129
1144
# shortcut for select a single-dim from a 2-dim BM
1130
1145
bp = BlockPlacement (slice (0 , len (values )))
1131
1146
nb = type (block )(values , placement = bp , ndim = 1 )
1132
- return SingleBlockManager (nb , self .axes [1 ], [weakref .ref (block )])
1147
+ ref = weakref .ref (block ) if track_ref else None
1148
+ parent = self if track_ref else None
1149
+ return SingleBlockManager (nb , self .axes [1 ], [ref ], parent )
1133
1150
1134
1151
def iget_values (self , i : int ) -> ArrayLike :
1135
1152
"""
@@ -1371,7 +1388,9 @@ def column_setitem(self, loc: int, idx: int | slice | np.ndarray, value) -> None
1371
1388
self .blocks = tuple (blocks )
1372
1389
self ._clear_reference_block (blkno )
1373
1390
1374
- col_mgr = self .iget (loc )
1391
+ # this manager is only created temporarily to mutate the values in place
1392
+ # so don't track references, otherwise the `setitem` would perform CoW again
1393
+ col_mgr = self .iget (loc , track_ref = False )
1375
1394
new_mgr = col_mgr .setitem ((idx ,), value )
1376
1395
self .iset (loc , new_mgr ._block .values , inplace = True )
1377
1396
@@ -1469,7 +1488,9 @@ def idelete(self, indexer) -> BlockManager:
1469
1488
nbs , new_refs = self ._slice_take_blocks_ax0 (taker , only_slice = True )
1470
1489
new_columns = self .items [~ is_deleted ]
1471
1490
axes = [new_columns , self .axes [1 ]]
1472
- return type (self )(tuple (nbs ), axes , new_refs , verify_integrity = False )
1491
+ # TODO this might not be needed (can a delete ever be done in chained manner?)
1492
+ parent = None if com .all_none (* new_refs ) else self
1493
+ return type (self )(tuple (nbs ), axes , new_refs , parent , verify_integrity = False )
1473
1494
1474
1495
# ----------------------------------------------------------------
1475
1496
# Block-wise Operation
@@ -1875,6 +1896,7 @@ def __init__(
1875
1896
block : Block ,
1876
1897
axis : Index ,
1877
1898
refs : list [weakref .ref | None ] | None = None ,
1899
+ parent : object = None ,
1878
1900
verify_integrity : bool = False ,
1879
1901
fastpath = lib .no_default ,
1880
1902
) -> None :
@@ -1893,13 +1915,15 @@ def __init__(
1893
1915
self .axes = [axis ]
1894
1916
self .blocks = (block ,)
1895
1917
self .refs = refs
1918
+ self .parent = parent if _using_copy_on_write () else None
1896
1919
1897
1920
@classmethod
1898
1921
def from_blocks (
1899
1922
cls ,
1900
1923
blocks : list [Block ],
1901
1924
axes : list [Index ],
1902
1925
refs : list [weakref .ref | None ] | None = None ,
1926
+ parent : object = None ,
1903
1927
) -> SingleBlockManager :
1904
1928
"""
1905
1929
Constructor for BlockManager and SingleBlockManager with same signature.
@@ -1908,7 +1932,7 @@ def from_blocks(
1908
1932
assert len (axes ) == 1
1909
1933
if refs is not None :
1910
1934
assert len (refs ) == 1
1911
- return cls (blocks [0 ], axes [0 ], refs , verify_integrity = False )
1935
+ return cls (blocks [0 ], axes [0 ], refs , parent , verify_integrity = False )
1912
1936
1913
1937
@classmethod
1914
1938
def from_array (cls , array : ArrayLike , index : Index ) -> SingleBlockManager :
@@ -1928,7 +1952,10 @@ def to_2d_mgr(self, columns: Index) -> BlockManager:
1928
1952
new_blk = type (blk )(arr , placement = bp , ndim = 2 )
1929
1953
axes = [columns , self .axes [0 ]]
1930
1954
refs : list [weakref .ref | None ] = [weakref .ref (blk )]
1931
- return BlockManager ([new_blk ], axes = axes , refs = refs , verify_integrity = False )
1955
+ parent = self if _using_copy_on_write () else None
1956
+ return BlockManager (
1957
+ [new_blk ], axes = axes , refs = refs , parent = parent , verify_integrity = False
1958
+ )
1932
1959
1933
1960
def _has_no_reference (self , i : int = 0 ) -> bool :
1934
1961
"""
@@ -2010,7 +2037,7 @@ def getitem_mgr(self, indexer: slice | npt.NDArray[np.bool_]) -> SingleBlockMana
2010
2037
new_idx = self .index [indexer ]
2011
2038
# TODO(CoW) in theory only need to track reference if new_array is a view
2012
2039
ref = weakref .ref (blk )
2013
- return type (self )(block , new_idx , [ref ])
2040
+ return type (self )(block , new_idx , [ref ], parent = self )
2014
2041
2015
2042
def get_slice (self , slobj : slice , axis : AxisInt = 0 ) -> SingleBlockManager :
2016
2043
# Assertion disabled for performance
@@ -2023,7 +2050,9 @@ def get_slice(self, slobj: slice, axis: AxisInt = 0) -> SingleBlockManager:
2023
2050
bp = BlockPlacement (slice (0 , len (array )))
2024
2051
block = type (blk )(array , placement = bp , ndim = 1 )
2025
2052
new_index = self .index ._getitem_slice (slobj )
2026
- return type (self )(block , new_index , [weakref .ref (blk )])
2053
+ # TODO this method is only used in groupby SeriesSplitter at the moment,
2054
+ # so passing refs / parent is not yet covered by the tests
2055
+ return type (self )(block , new_index , [weakref .ref (blk )], parent = self )
2027
2056
2028
2057
@property
2029
2058
def index (self ) -> Index :
@@ -2070,6 +2099,7 @@ def setitem_inplace(self, indexer, value) -> None:
2070
2099
if _using_copy_on_write () and not self ._has_no_reference (0 ):
2071
2100
self .blocks = (self ._block .copy (),)
2072
2101
self .refs = None
2102
+ self .parent = None
2073
2103
self ._cache .clear ()
2074
2104
2075
2105
super ().setitem_inplace (indexer , value )
@@ -2086,6 +2116,7 @@ def idelete(self, indexer) -> SingleBlockManager:
2086
2116
self ._cache .clear ()
2087
2117
# clear reference since delete always results in a new array
2088
2118
self .refs = None
2119
+ self .parent = None
2089
2120
return self
2090
2121
2091
2122
def fast_xs (self , loc ):
0 commit comments