From 88cb0114bd97beed813dfbc873c6dbb5157f8fde Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Mon, 8 May 2023 21:54:25 +0200 Subject: [PATCH] CLN: Cleanup after CoW setitem PRs --- doc/source/whatsnew/v2.1.0.rst | 8 +++++--- pandas/core/frame.py | 21 ++++++++------------- 2 files changed, 13 insertions(+), 16 deletions(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 258c14cec7925..67f1f2fd86b3a 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -14,10 +14,12 @@ including other versions of pandas. Enhancements ~~~~~~~~~~~~ -.. _whatsnew_210.enhancements.enhancement1: +.. _whatsnew_210.enhancements.cow: -enhancement1 -^^^^^^^^^^^^ +Copy-on-Write improvements +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +- Setting a :class:`Series` into a :class:`DataFrame` now creates a lazy instead of a deep copy (:issue:`53142`) .. _whatsnew_210.enhancements.enhancement2: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 56c58bc9347e0..572b3f5b4ecc8 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3913,7 +3913,6 @@ def isetitem(self, loc, value) -> None: In cases where ``frame.columns`` is unique, this is equivalent to ``frame[frame.columns[i]] = value``. """ - using_cow = using_copy_on_write() if isinstance(value, DataFrame): if is_integer(loc): loc = [loc] @@ -3925,13 +3924,11 @@ def isetitem(self, loc, value) -> None: ) for i, idx in enumerate(loc): - arraylike, refs = self._sanitize_column( - value.iloc[:, i], using_cow=using_cow - ) + arraylike, refs = self._sanitize_column(value.iloc[:, i]) self._iset_item_mgr(idx, arraylike, inplace=False, refs=refs) return - arraylike, refs = self._sanitize_column(value, using_cow=using_cow) + arraylike, refs = self._sanitize_column(value) self._iset_item_mgr(loc, arraylike, inplace=False, refs=refs) def __setitem__(self, key, value): @@ -4164,7 +4161,7 @@ def _set_item(self, key, value) -> None: Series/TimeSeries will be conformed to the DataFrames index to ensure homogeneity. """ - value, refs = self._sanitize_column(value, using_cow=using_copy_on_write()) + value, refs = self._sanitize_column(value) if ( key in self.columns @@ -4806,7 +4803,7 @@ def insert( elif isinstance(value, DataFrame): value = value.iloc[:, 0] - value, refs = self._sanitize_column(value, using_cow=using_copy_on_write()) + value, refs = self._sanitize_column(value) self._mgr.insert(loc, column, value, refs=refs) def assign(self, **kwargs) -> DataFrame: @@ -4877,9 +4874,7 @@ def assign(self, **kwargs) -> DataFrame: data[k] = com.apply_if_callable(v, data) return data - def _sanitize_column( - self, value, using_cow: bool = False - ) -> tuple[ArrayLike, BlockValuesRefs | None]: + def _sanitize_column(self, value) -> tuple[ArrayLike, BlockValuesRefs | None]: """ Ensures new columns (which go into the BlockManager as new blocks) are always copied (or a reference is being tracked to them under CoW) @@ -4900,7 +4895,7 @@ def _sanitize_column( if is_dict_like(value): if not isinstance(value, Series): value = Series(value) - return _reindex_for_setitem(value, self.index, using_cow=using_cow) + return _reindex_for_setitem(value, self.index) if is_list_like(value): com.require_length_match(value, self.index) @@ -11916,12 +11911,12 @@ def _from_nested_dict(data) -> collections.defaultdict: def _reindex_for_setitem( - value: DataFrame | Series, index: Index, using_cow: bool = False + value: DataFrame | Series, index: Index ) -> tuple[ArrayLike, BlockValuesRefs | None]: # reindex if necessary if value.index.equals(index) or not len(index): - if using_cow and isinstance(value, Series): + if using_copy_on_write() and isinstance(value, Series): return value._values, value._references return value._values.copy(), None