55
55
import pandas .core .algorithms as algos
56
56
from pandas .core .arrays ._mixins import NDArrayBackedExtensionArray
57
57
from pandas .core .arrays .sparse import SparseDtype
58
+ import pandas .core .common as com
58
59
from pandas .core .construction import (
59
60
ensure_wrapped_if_datetimelike ,
60
61
extract_array ,
@@ -146,6 +147,7 @@ class BaseBlockManager(DataManager):
146
147
blocks : tuple [Block , ...]
147
148
axes : list [Index ]
148
149
refs : list [weakref .ref | None ] | None
150
+ parent : object
149
151
150
152
@property
151
153
def ndim (self ) -> int :
@@ -163,6 +165,7 @@ def from_blocks(
163
165
blocks : list [Block ],
164
166
axes : list [Index ],
165
167
refs : list [weakref .ref | None ] | None = None ,
168
+ parent : object = None ,
166
169
) -> T :
167
170
raise NotImplementedError
168
171
@@ -262,6 +265,8 @@ def _clear_reference_block(self, blkno: int) -> None:
262
265
"""
263
266
if self .refs is not None :
264
267
self .refs [blkno ] = None
268
+ if com .all_none (* self .refs ):
269
+ self .parent = None
265
270
266
271
def get_dtypes (self ):
267
272
dtypes = np .array ([blk .dtype for blk in self .blocks ])
@@ -602,7 +607,9 @@ def _combine(
602
607
axes [- 1 ] = index
603
608
axes [0 ] = self .items .take (indexer )
604
609
605
- return type (self ).from_blocks (new_blocks , axes , new_refs )
610
+ return type (self ).from_blocks (
611
+ new_blocks , axes , new_refs , parent = None if copy else self
612
+ )
606
613
607
614
@property
608
615
def nblocks (self ) -> int :
@@ -645,11 +652,14 @@ def copy_func(ax):
645
652
new_refs : list [weakref .ref | None ] | None
646
653
if deep :
647
654
new_refs = None
655
+ parent = None
648
656
else :
649
657
new_refs = [weakref .ref (blk ) for blk in self .blocks ]
658
+ parent = self
650
659
651
660
res .axes = new_axes
652
661
res .refs = new_refs
662
+ res .parent = parent
653
663
654
664
if self .ndim > 1 :
655
665
# Avoid needing to re-compute these
@@ -738,6 +748,7 @@ def reindex_indexer(
738
748
only_slice = only_slice ,
739
749
use_na_proxy = use_na_proxy ,
740
750
)
751
+ parent = None if com .all_none (* new_refs ) else self
741
752
else :
742
753
new_blocks = [
743
754
blk .take_nd (
@@ -750,11 +761,12 @@ def reindex_indexer(
750
761
for blk in self .blocks
751
762
]
752
763
new_refs = None
764
+ parent = None
753
765
754
766
new_axes = list (self .axes )
755
767
new_axes [axis ] = new_axis
756
768
757
- new_mgr = type (self ).from_blocks (new_blocks , new_axes , new_refs )
769
+ new_mgr = type (self ).from_blocks (new_blocks , new_axes , new_refs , parent = parent )
758
770
if axis == 1 :
759
771
# We can avoid the need to rebuild these
760
772
new_mgr ._blknos = self .blknos .copy ()
@@ -989,6 +1001,7 @@ def __init__(
989
1001
blocks : Sequence [Block ],
990
1002
axes : Sequence [Index ],
991
1003
refs : list [weakref .ref | None ] | None = None ,
1004
+ parent : object = None ,
992
1005
verify_integrity : bool = True ,
993
1006
) -> None :
994
1007
@@ -1053,11 +1066,13 @@ def from_blocks(
1053
1066
blocks : list [Block ],
1054
1067
axes : list [Index ],
1055
1068
refs : list [weakref .ref | None ] | None = None ,
1069
+ parent : object = None ,
1056
1070
) -> BlockManager :
1057
1071
"""
1058
1072
Constructor for BlockManager and SingleBlockManager with same signature.
1059
1073
"""
1060
- return cls (blocks , axes , refs , verify_integrity = False )
1074
+ parent = parent if _using_copy_on_write () else None
1075
+ return cls (blocks , axes , refs , parent , verify_integrity = False )
1061
1076
1062
1077
# ----------------------------------------------------------------
1063
1078
# Indexing
@@ -1079,7 +1094,7 @@ def fast_xs(self, loc: int) -> SingleBlockManager:
1079
1094
block = new_block (result , placement = slice (0 , len (result )), ndim = 1 )
1080
1095
# in the case of a single block, the new block is a view
1081
1096
ref = weakref .ref (self .blocks [0 ])
1082
- return SingleBlockManager (block , self .axes [0 ], [ref ])
1097
+ return SingleBlockManager (block , self .axes [0 ], [ref ], parent = self )
1083
1098
1084
1099
dtype = interleaved_dtype ([blk .dtype for blk in self .blocks ])
1085
1100
@@ -1113,7 +1128,7 @@ def fast_xs(self, loc: int) -> SingleBlockManager:
1113
1128
block = new_block (result , placement = slice (0 , len (result )), ndim = 1 )
1114
1129
return SingleBlockManager (block , self .axes [0 ])
1115
1130
1116
- def iget (self , i : int ) -> SingleBlockManager :
1131
+ def iget (self , i : int , track_ref : bool = True ) -> SingleBlockManager :
1117
1132
"""
1118
1133
Return the data as a SingleBlockManager.
1119
1134
"""
@@ -1123,7 +1138,9 @@ def iget(self, i: int) -> SingleBlockManager:
1123
1138
# shortcut for select a single-dim from a 2-dim BM
1124
1139
bp = BlockPlacement (slice (0 , len (values )))
1125
1140
nb = type (block )(values , placement = bp , ndim = 1 )
1126
- return SingleBlockManager (nb , self .axes [1 ], [weakref .ref (block )])
1141
+ ref = weakref .ref (block ) if track_ref else None
1142
+ parent = self if track_ref else None
1143
+ return SingleBlockManager (nb , self .axes [1 ], [ref ], parent )
1127
1144
1128
1145
def iget_values (self , i : int ) -> ArrayLike :
1129
1146
"""
@@ -1365,7 +1382,9 @@ def column_setitem(self, loc: int, idx: int | slice | np.ndarray, value) -> None
1365
1382
self .blocks = tuple (blocks )
1366
1383
self ._clear_reference_block (blkno )
1367
1384
1368
- col_mgr = self .iget (loc )
1385
+ # this manager is only created temporarily to mutate the values in place
1386
+ # so don't track references, otherwise the `setitem` would perform CoW again
1387
+ col_mgr = self .iget (loc , track_ref = False )
1369
1388
new_mgr = col_mgr .setitem ((idx ,), value )
1370
1389
self .iset (loc , new_mgr ._block .values , inplace = True )
1371
1390
@@ -1463,7 +1482,9 @@ def idelete(self, indexer) -> BlockManager:
1463
1482
nbs , new_refs = self ._slice_take_blocks_ax0 (taker , only_slice = True )
1464
1483
new_columns = self .items [~ is_deleted ]
1465
1484
axes = [new_columns , self .axes [1 ]]
1466
- return type (self )(tuple (nbs ), axes , new_refs , verify_integrity = False )
1485
+ # TODO this might not be needed (can a delete ever be done in chained manner?)
1486
+ parent = None if com .all_none (* new_refs ) else self
1487
+ return type (self )(tuple (nbs ), axes , new_refs , parent , verify_integrity = False )
1467
1488
1468
1489
# ----------------------------------------------------------------
1469
1490
# Block-wise Operation
@@ -1869,6 +1890,7 @@ def __init__(
1869
1890
block : Block ,
1870
1891
axis : Index ,
1871
1892
refs : list [weakref .ref | None ] | None = None ,
1893
+ parent : object = None ,
1872
1894
verify_integrity : bool = False ,
1873
1895
fastpath = lib .no_default ,
1874
1896
) -> None :
@@ -1887,13 +1909,15 @@ def __init__(
1887
1909
self .axes = [axis ]
1888
1910
self .blocks = (block ,)
1889
1911
self .refs = refs
1912
+ self .parent = parent if _using_copy_on_write () else None
1890
1913
1891
1914
@classmethod
1892
1915
def from_blocks (
1893
1916
cls ,
1894
1917
blocks : list [Block ],
1895
1918
axes : list [Index ],
1896
1919
refs : list [weakref .ref | None ] | None = None ,
1920
+ parent : object = None ,
1897
1921
) -> SingleBlockManager :
1898
1922
"""
1899
1923
Constructor for BlockManager and SingleBlockManager with same signature.
@@ -1902,7 +1926,7 @@ def from_blocks(
1902
1926
assert len (axes ) == 1
1903
1927
if refs is not None :
1904
1928
assert len (refs ) == 1
1905
- return cls (blocks [0 ], axes [0 ], refs , verify_integrity = False )
1929
+ return cls (blocks [0 ], axes [0 ], refs , parent , verify_integrity = False )
1906
1930
1907
1931
@classmethod
1908
1932
def from_array (cls , array : ArrayLike , index : Index ) -> SingleBlockManager :
@@ -1922,7 +1946,10 @@ def to_2d_mgr(self, columns: Index) -> BlockManager:
1922
1946
new_blk = type (blk )(arr , placement = bp , ndim = 2 )
1923
1947
axes = [columns , self .axes [0 ]]
1924
1948
refs : list [weakref .ref | None ] = [weakref .ref (blk )]
1925
- return BlockManager ([new_blk ], axes = axes , refs = refs , verify_integrity = False )
1949
+ parent = self if _using_copy_on_write () else None
1950
+ return BlockManager (
1951
+ [new_blk ], axes = axes , refs = refs , parent = parent , verify_integrity = False
1952
+ )
1926
1953
1927
1954
def _has_no_reference (self , i : int = 0 ) -> bool :
1928
1955
"""
@@ -2004,7 +2031,7 @@ def getitem_mgr(self, indexer: slice | npt.NDArray[np.bool_]) -> SingleBlockMana
2004
2031
new_idx = self .index [indexer ]
2005
2032
# TODO(CoW) in theory only need to track reference if new_array is a view
2006
2033
ref = weakref .ref (blk )
2007
- return type (self )(block , new_idx , [ref ])
2034
+ return type (self )(block , new_idx , [ref ], parent = self )
2008
2035
2009
2036
def get_slice (self , slobj : slice , axis : int = 0 ) -> SingleBlockManager :
2010
2037
# Assertion disabled for performance
@@ -2017,7 +2044,9 @@ def get_slice(self, slobj: slice, axis: int = 0) -> SingleBlockManager:
2017
2044
bp = BlockPlacement (slice (0 , len (array )))
2018
2045
block = type (blk )(array , placement = bp , ndim = 1 )
2019
2046
new_index = self .index ._getitem_slice (slobj )
2020
- return type (self )(block , new_index , [weakref .ref (blk )])
2047
+ # TODO this method is only used in groupby SeriesSplitter at the moment,
2048
+ # so passing refs / parent is not yet covered by the tests
2049
+ return type (self )(block , new_index , [weakref .ref (blk )], parent = self )
2021
2050
2022
2051
@property
2023
2052
def index (self ) -> Index :
@@ -2064,6 +2093,7 @@ def setitem_inplace(self, indexer, value) -> None:
2064
2093
if _using_copy_on_write () and not self ._has_no_reference (0 ):
2065
2094
self .blocks = (self ._block .copy (),)
2066
2095
self .refs = None
2096
+ self .parent = None
2067
2097
self ._cache .clear ()
2068
2098
2069
2099
super ().setitem_inplace (indexer , value )
@@ -2080,6 +2110,7 @@ def idelete(self, indexer) -> SingleBlockManager:
2080
2110
self ._cache .clear ()
2081
2111
# clear reference since delete always results in a new array
2082
2112
self .refs = None
2113
+ self .parent = None
2083
2114
return self
2084
2115
2085
2116
def fast_xs (self , loc ):
0 commit comments