diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index 88780bac06637..f735ce682fc83 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -944,12 +944,17 @@ def is_in_obj(gpr) -> bool: if not hasattr(gpr, "name"): return False if using_copy_on_write(): - # For the CoW case, we need an equality check as the identity check - # no longer works (each Series from column access is a new object) + # For the CoW case, we check the references to determine if the + # series is part of the object try: - return gpr.equals(obj[gpr.name]) - except (AttributeError, KeyError, IndexError, InvalidIndexError): + obj_gpr_column = obj[gpr.name] + except (KeyError, IndexError, InvalidIndexError): return False + if isinstance(gpr, Series) and isinstance(obj_gpr_column, Series): + return gpr._mgr.references_same_values( # type: ignore[union-attr] + obj_gpr_column._mgr, 0 # type: ignore[arg-type] + ) + return False try: return gpr is obj[gpr.name] except (KeyError, IndexError, InvalidIndexError): diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 664a122015ba5..ae0819a78c6e8 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -11,6 +11,7 @@ cast, ) import warnings +import weakref import numpy as np @@ -258,6 +259,14 @@ def add_references(self, mgr: BaseBlockManager) -> None: # "Block"; expected "SharedBlock" blk.refs.add_reference(blk) # type: ignore[arg-type] + def references_same_values(self, mgr: BaseBlockManager, blkno: int) -> bool: + """ + Checks if two blocks from two different block managers reference the + same underlying values. + """ + ref = weakref.ref(self.blocks[blkno]) + return ref in mgr.blocks[blkno].refs.referenced_blocks + def get_dtypes(self): dtypes = np.array([blk.dtype for blk in self.blocks]) return dtypes.take(self.blknos)