@@ -580,9 +580,16 @@ cdef class SharedBlock:
580
580
"""
581
581
cdef:
582
582
public BlockPlacement _mgr_locs
583
+ public BlockValuesRefs refs
583
584
readonly int ndim
584
585
585
- def __cinit__ (self , values , placement: BlockPlacement , ndim: int ):
586
+ def __cinit__ (
587
+ self ,
588
+ values ,
589
+ placement: BlockPlacement ,
590
+ ndim: int ,
591
+ refs: BlockValuesRefs | None = None ,
592
+ ):
586
593
"""
587
594
Parameters
588
595
----------
@@ -591,9 +598,22 @@ cdef class SharedBlock:
591
598
placement : BlockPlacement
592
599
ndim : int
593
600
1 for SingleBlockManager/Series, 2 for BlockManager/DataFrame
601
+ refs: BlockValuesRefs, optional
602
+ Ref tracking object or None if block does not have any refs.
594
603
"""
595
604
self ._mgr_locs = placement
596
605
self .ndim = ndim
606
+ if refs is None :
607
+ # if no refs are passed, that means we are creating a Block from
608
+ # new values that it uniquely owns -> start a new BlockValuesRefs
609
+ # object that only references this block
610
+ self .refs = BlockValuesRefs(self )
611
+ else :
612
+ # if refs are passed, this is the BlockValuesRefs object that is shared
613
+ # with the parent blocks which share the values, and a reference to this
614
+ # new block is added
615
+ refs.add_reference(self )
616
+ self .refs = refs
597
617
598
618
cpdef __reduce__(self ):
599
619
args = (self .values, self .mgr_locs.indexer, self .ndim)
@@ -619,9 +639,15 @@ cdef class NumpyBlock(SharedBlock):
619
639
cdef:
620
640
public ndarray values
621
641
622
- def __cinit__ (self , ndarray values , BlockPlacement placement , int ndim ):
642
+ def __cinit__ (
643
+ self ,
644
+ ndarray values ,
645
+ BlockPlacement placement ,
646
+ int ndim ,
647
+ refs: BlockValuesRefs | None = None ,
648
+ ):
623
649
# set values here; the (implicit) call to SharedBlock.__cinit__ will
624
- # set placement and ndim
650
+ # set placement, ndim and refs
625
651
self .values = values
626
652
627
653
cpdef NumpyBlock getitem_block_index(self , slice slicer):
@@ -631,7 +657,7 @@ cdef class NumpyBlock(SharedBlock):
631
657
Assumes self.ndim == 2
632
658
"""
633
659
new_values = self .values[..., slicer]
634
- return type (self )(new_values, self ._mgr_locs, ndim = self .ndim)
660
+ return type (self )(new_values, self ._mgr_locs, ndim = self .ndim, refs = self .refs )
635
661
636
662
637
663
cdef class NDArrayBackedBlock(SharedBlock):
@@ -641,9 +667,15 @@ cdef class NDArrayBackedBlock(SharedBlock):
641
667
cdef public:
642
668
NDArrayBacked values
643
669
644
- def __cinit__ (self , NDArrayBacked values , BlockPlacement placement , int ndim ):
670
+ def __cinit__ (
671
+ self ,
672
+ NDArrayBacked values ,
673
+ BlockPlacement placement ,
674
+ int ndim ,
675
+ refs: BlockValuesRefs | None = None ,
676
+ ):
645
677
# set values here; the (implicit) call to SharedBlock.__cinit__ will
646
- # set placement and ndim
678
+ # set placement, ndim and refs
647
679
self .values = values
648
680
649
681
cpdef NDArrayBackedBlock getitem_block_index(self , slice slicer):
@@ -653,16 +685,22 @@ cdef class NDArrayBackedBlock(SharedBlock):
653
685
Assumes self.ndim == 2
654
686
"""
655
687
new_values = self .values[..., slicer]
656
- return type (self )(new_values, self ._mgr_locs, ndim = self .ndim)
688
+ return type (self )(new_values, self ._mgr_locs, ndim = self .ndim, refs = self .refs )
657
689
658
690
659
691
cdef class Block(SharedBlock):
660
692
cdef:
661
693
public object values
662
694
663
- def __cinit__ (self , object values , BlockPlacement placement , int ndim ):
695
+ def __cinit__ (
696
+ self ,
697
+ object values ,
698
+ BlockPlacement placement ,
699
+ int ndim ,
700
+ refs: BlockValuesRefs | None = None ,
701
+ ):
664
702
# set values here; the (implicit) call to SharedBlock.__cinit__ will
665
- # set placement and ndim
703
+ # set placement, ndim and refs
666
704
self .values = values
667
705
668
706
@@ -673,15 +711,11 @@ cdef class BlockManager:
673
711
public list axes
674
712
public bint _known_consolidated, _is_consolidated
675
713
public ndarray _blknos, _blklocs
676
- public list refs
677
- public object parent
678
714
679
715
def __cinit__ (
680
716
self ,
681
717
blocks = None ,
682
718
axes = None ,
683
- refs = None ,
684
- parent = None ,
685
719
verify_integrity = True ,
686
720
):
687
721
# None as defaults for unpickling GH#42345
@@ -695,8 +729,6 @@ cdef class BlockManager:
695
729
696
730
self .blocks = blocks
697
731
self .axes = axes.copy() # copy to make sure we are not remotely-mutable
698
- self .refs = refs
699
- self .parent = parent
700
732
701
733
# Populate known_consolidate, blknos, and blklocs lazily
702
734
self ._known_consolidated = False
@@ -805,16 +837,12 @@ cdef class BlockManager:
805
837
ndarray blknos, blklocs
806
838
807
839
nbs = []
808
- nrefs = []
809
840
for blk in self .blocks:
810
841
nb = blk.getitem_block_index(slobj)
811
842
nbs.append(nb)
812
- nrefs.append(weakref.ref(blk))
813
843
814
844
new_axes = [self .axes[0 ], self .axes[1 ]._getitem_slice(slobj)]
815
- mgr = type (self )(
816
- tuple (nbs), new_axes, nrefs, parent = self , verify_integrity = False
817
- )
845
+ mgr = type (self )(tuple (nbs), new_axes, verify_integrity = False )
818
846
819
847
# We can avoid having to rebuild blklocs/blknos
820
848
blklocs = self ._blklocs
@@ -827,7 +855,7 @@ cdef class BlockManager:
827
855
def get_slice (self , slobj: slice , axis: int = 0 ) -> BlockManager:
828
856
829
857
if axis == 0:
830
- new_blocks , new_refs = self ._slice_take_blocks_ax0(slobj)
858
+ new_blocks = self ._slice_take_blocks_ax0(slobj)
831
859
elif axis == 1:
832
860
return self._get_index_slice(slobj )
833
861
else:
@@ -836,6 +864,40 @@ cdef class BlockManager:
836
864
new_axes = list (self .axes)
837
865
new_axes[axis] = new_axes[axis]._getitem_slice(slobj )
838
866
839
- return type(self )(
840
- tuple(new_blocks ), new_axes , new_refs , parent = self , verify_integrity = False
841
- )
867
+ return type(self )(tuple(new_blocks ), new_axes , verify_integrity = False )
868
+
869
+
870
+ cdef class BlockValuesRefs:
871
+ """Tracks all references to a given array.
872
+
873
+ Keeps track of all blocks (through weak references ) that reference the same
874
+ data.
875
+ """
876
+ cdef:
877
+ public object referenced_blocks
878
+
879
+ def __cinit__(self , blk: SharedBlock ) -> None:
880
+ self.referenced_blocks = weakref.WeakSet([blk])
881
+
882
+ def add_reference(self , blk: SharedBlock ) -> None:
883
+ """Adds a new reference to our reference collection.
884
+
885
+ Parameters
886
+ ----------
887
+ blk: SharedBlock
888
+ The block that the new references should point to.
889
+ """
890
+ self.referenced_blocks.add(blk )
891
+
892
+ def has_reference(self ) -> bool:
893
+ """Checks if block has foreign references.
894
+
895
+ A reference is only relevant if it is still alive. The reference to
896
+ ourselves does not count.
897
+
898
+ Returns
899
+ -------
900
+ bool
901
+ """
902
+ # Checking for more references than block pointing to itself
903
+ return len(self.referenced_blocks ) > 1
0 commit comments